1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5736b9155Smarks * Common Development and Distribution License (the "License"). 6736b9155Smarks * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22d39ee142SMark Shellenbaum * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23*0fbc0cd0SMatthew Ahrens * Copyright (c) 2012, 2014 by Delphix. All rights reserved. 242144b121SMarcel Telka * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 25fa9e4066Sahrens */ 26fa9e4066Sahrens 2775c76197Speteh /* Portions Copyright 2007 Jeremy Teo */ 2855da60b9SMark J Musante /* Portions Copyright 2010 Robert Milkowski */ 2975c76197Speteh 30fa9e4066Sahrens #include <sys/types.h> 31fa9e4066Sahrens #include <sys/param.h> 32fa9e4066Sahrens #include <sys/time.h> 33fa9e4066Sahrens #include <sys/systm.h> 34fa9e4066Sahrens #include <sys/sysmacros.h> 35fa9e4066Sahrens #include <sys/resource.h> 36fa9e4066Sahrens #include <sys/vfs.h> 37aa59c4cbSrsb #include <sys/vfs_opreg.h> 38fa9e4066Sahrens #include <sys/vnode.h> 39fa9e4066Sahrens #include <sys/file.h> 40fa9e4066Sahrens #include <sys/stat.h> 41fa9e4066Sahrens #include <sys/kmem.h> 42fa9e4066Sahrens #include <sys/taskq.h> 43fa9e4066Sahrens #include <sys/uio.h> 44fa9e4066Sahrens #include <sys/vmsystm.h> 45fa9e4066Sahrens #include <sys/atomic.h> 4644eda4d7Smaybee #include <sys/vm.h> 47fa9e4066Sahrens #include <vm/seg_vn.h> 48fa9e4066Sahrens #include <vm/pvn.h> 49fa9e4066Sahrens #include <vm/as.h> 500fab61baSJonathan W Adams #include <vm/kpm.h> 510fab61baSJonathan W Adams #include <vm/seg_kpm.h> 52fa9e4066Sahrens #include <sys/mman.h> 53fa9e4066Sahrens #include <sys/pathname.h> 54fa9e4066Sahrens #include <sys/cmn_err.h> 55fa9e4066Sahrens #include <sys/errno.h> 56fa9e4066Sahrens #include <sys/unistd.h> 57fa9e4066Sahrens #include <sys/zfs_dir.h> 58fa9e4066Sahrens #include <sys/zfs_acl.h> 59fa9e4066Sahrens #include <sys/zfs_ioctl.h> 60fa9e4066Sahrens #include <sys/fs/zfs.h> 61fa9e4066Sahrens #include <sys/dmu.h> 6255da60b9SMark J Musante #include <sys/dmu_objset.h> 63fa9e4066Sahrens #include <sys/spa.h> 64fa9e4066Sahrens #include <sys/txg.h> 65fa9e4066Sahrens #include <sys/dbuf.h> 66fa9e4066Sahrens #include <sys/zap.h> 670a586ceaSMark Shellenbaum #include <sys/sa.h> 68fa9e4066Sahrens #include <sys/dirent.h> 69fa9e4066Sahrens #include <sys/policy.h> 70fa9e4066Sahrens #include <sys/sunddi.h> 71fa9e4066Sahrens #include <sys/filio.h> 72c1ce5987SMark Shellenbaum #include <sys/sid.h> 73fa9e4066Sahrens #include "fs/fs_subr.h" 74fa9e4066Sahrens #include <sys/zfs_ctldir.h> 75da6c28aaSamw #include <sys/zfs_fuid.h> 760a586ceaSMark Shellenbaum #include <sys/zfs_sa.h> 77033f9833Sek #include <sys/dnlc.h> 78104e2ed7Sperrin #include <sys/zfs_rlock.h> 79da6c28aaSamw #include <sys/extdirent.h> 80da6c28aaSamw #include <sys/kidmap.h> 8167dbe2beSCasper H.S. Dik #include <sys/cred.h> 82b38f0970Sck #include <sys/attr.h> 83fa9e4066Sahrens 84fa9e4066Sahrens /* 85fa9e4066Sahrens * Programming rules. 86fa9e4066Sahrens * 87fa9e4066Sahrens * Each vnode op performs some logical unit of work. To do this, the ZPL must 88fa9e4066Sahrens * properly lock its in-core state, create a DMU transaction, do the work, 89fa9e4066Sahrens * record this work in the intent log (ZIL), commit the DMU transaction, 90da6c28aaSamw * and wait for the intent log to commit if it is a synchronous operation. 91da6c28aaSamw * Moreover, the vnode ops must work in both normal and log replay context. 92fa9e4066Sahrens * The ordering of events is important to avoid deadlocks and references 93fa9e4066Sahrens * to freed memory. The example below illustrates the following Big Rules: 94fa9e4066Sahrens * 95f7170741SWill Andrews * (1) A check must be made in each zfs thread for a mounted file system. 963cb34c60Sahrens * This is done avoiding races using ZFS_ENTER(zfsvfs). 97f7170741SWill Andrews * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 98f7170741SWill Andrews * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 99f7170741SWill Andrews * can return EIO from the calling function. 100fa9e4066Sahrens * 101fa9e4066Sahrens * (2) VN_RELE() should always be the last thing except for zil_commit() 102b19a79ecSperrin * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 103fa9e4066Sahrens * First, if it's the last reference, the vnode/znode 104fa9e4066Sahrens * can be freed, so the zp may point to freed memory. Second, the last 105fa9e4066Sahrens * reference will call zfs_zinactive(), which may induce a lot of work -- 106104e2ed7Sperrin * pushing cached pages (which acquires range locks) and syncing out 107fa9e4066Sahrens * cached atime changes. Third, zfs_zinactive() may require a new tx, 108fa9e4066Sahrens * which could deadlock the system if you were already holding one. 1099d3574bfSNeil Perrin * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 110fa9e4066Sahrens * 1117885c754Sperrin * (3) All range locks must be grabbed before calling dmu_tx_assign(), 1127885c754Sperrin * as they can span dmu_tx_assign() calls. 1137885c754Sperrin * 114e722410cSMatthew Ahrens * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 115e722410cSMatthew Ahrens * dmu_tx_assign(). This is critical because we don't want to block 116e722410cSMatthew Ahrens * while holding locks. 117e722410cSMatthew Ahrens * 118e722410cSMatthew Ahrens * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 119e722410cSMatthew Ahrens * reduces lock contention and CPU usage when we must wait (note that if 120e722410cSMatthew Ahrens * throughput is constrained by the storage, nearly every transaction 121e722410cSMatthew Ahrens * must wait). 122e722410cSMatthew Ahrens * 123e722410cSMatthew Ahrens * Note, in particular, that if a lock is sometimes acquired before 124e722410cSMatthew Ahrens * the tx assigns, and sometimes after (e.g. z_lock), then failing 125e722410cSMatthew Ahrens * to use a non-blocking assign can deadlock the system. The scenario: 126fa9e4066Sahrens * 127fa9e4066Sahrens * Thread A has grabbed a lock before calling dmu_tx_assign(). 128fa9e4066Sahrens * Thread B is in an already-assigned tx, and blocks for this lock. 129fa9e4066Sahrens * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 130fa9e4066Sahrens * forever, because the previous txg can't quiesce until B's tx commits. 131fa9e4066Sahrens * 132fa9e4066Sahrens * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 13369962b56SMatthew Ahrens * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 13469962b56SMatthew Ahrens * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT, 13569962b56SMatthew Ahrens * to indicate that this operation has already called dmu_tx_wait(). 13669962b56SMatthew Ahrens * This will ensure that we don't retry forever, waiting a short bit 13769962b56SMatthew Ahrens * each time. 138fa9e4066Sahrens * 1397885c754Sperrin * (5) If the operation succeeded, generate the intent log entry for it 140fa9e4066Sahrens * before dropping locks. This ensures that the ordering of events 141fa9e4066Sahrens * in the intent log matches the order in which they actually occurred. 142f7170741SWill Andrews * During ZIL replay the zfs_log_* functions will update the sequence 1431209a471SNeil Perrin * number to indicate the zil transaction has replayed. 144fa9e4066Sahrens * 1457885c754Sperrin * (6) At the end of each vnode op, the DMU tx must always commit, 146fa9e4066Sahrens * regardless of whether there were any errors. 147fa9e4066Sahrens * 1485002558fSNeil Perrin * (7) After dropping all locks, invoke zil_commit(zilog, foid) 149fa9e4066Sahrens * to ensure that synchronous semantics are provided when necessary. 150fa9e4066Sahrens * 151fa9e4066Sahrens * In general, this is how things should be ordered in each vnode op: 152fa9e4066Sahrens * 153fa9e4066Sahrens * ZFS_ENTER(zfsvfs); // exit if unmounted 154fa9e4066Sahrens * top: 155fa9e4066Sahrens * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 156fa9e4066Sahrens * rw_enter(...); // grab any other locks you need 157fa9e4066Sahrens * tx = dmu_tx_create(...); // get DMU tx 158fa9e4066Sahrens * dmu_tx_hold_*(); // hold each object you might modify 15969962b56SMatthew Ahrens * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 160fa9e4066Sahrens * if (error) { 161fa9e4066Sahrens * rw_exit(...); // drop locks 162fa9e4066Sahrens * zfs_dirent_unlock(dl); // unlock directory entry 163fa9e4066Sahrens * VN_RELE(...); // release held vnodes 1641209a471SNeil Perrin * if (error == ERESTART) { 16569962b56SMatthew Ahrens * waited = B_TRUE; 1668a2f1b91Sahrens * dmu_tx_wait(tx); 1678a2f1b91Sahrens * dmu_tx_abort(tx); 168fa9e4066Sahrens * goto top; 169fa9e4066Sahrens * } 1708a2f1b91Sahrens * dmu_tx_abort(tx); // abort DMU tx 171fa9e4066Sahrens * ZFS_EXIT(zfsvfs); // finished in zfs 172fa9e4066Sahrens * return (error); // really out of space 173fa9e4066Sahrens * } 174fa9e4066Sahrens * error = do_real_work(); // do whatever this VOP does 175fa9e4066Sahrens * if (error == 0) 176b19a79ecSperrin * zfs_log_*(...); // on success, make ZIL entry 177fa9e4066Sahrens * dmu_tx_commit(tx); // commit DMU tx -- error or not 178fa9e4066Sahrens * rw_exit(...); // drop locks 179fa9e4066Sahrens * zfs_dirent_unlock(dl); // unlock directory entry 180fa9e4066Sahrens * VN_RELE(...); // release held vnodes 1815002558fSNeil Perrin * zil_commit(zilog, foid); // synchronous when necessary 182fa9e4066Sahrens * ZFS_EXIT(zfsvfs); // finished in zfs 183fa9e4066Sahrens * return (error); // done, report error 184fa9e4066Sahrens */ 1853cb34c60Sahrens 186fa9e4066Sahrens /* ARGSUSED */ 187fa9e4066Sahrens static int 188da6c28aaSamw zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 189fa9e4066Sahrens { 19067bd71c6Sperrin znode_t *zp = VTOZ(*vpp); 191b614fdaaSMark Shellenbaum zfsvfs_t *zfsvfs = zp->z_zfsvfs; 192b614fdaaSMark Shellenbaum 193b614fdaaSMark Shellenbaum ZFS_ENTER(zfsvfs); 194b614fdaaSMark Shellenbaum ZFS_VERIFY_ZP(zp); 19567bd71c6Sperrin 1960a586ceaSMark Shellenbaum if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 197da6c28aaSamw ((flag & FAPPEND) == 0)) { 198b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 199be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 200da6c28aaSamw } 201da6c28aaSamw 202da6c28aaSamw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 203da6c28aaSamw ZTOV(zp)->v_type == VREG && 2040a586ceaSMark Shellenbaum !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 205b614fdaaSMark Shellenbaum if (fs_vscan(*vpp, cr, 0) != 0) { 206b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 207be6fd75aSMatthew Ahrens return (SET_ERROR(EACCES)); 208b614fdaaSMark Shellenbaum } 209b614fdaaSMark Shellenbaum } 210da6c28aaSamw 21167bd71c6Sperrin /* Keep a count of the synchronous opens in the znode */ 21267bd71c6Sperrin if (flag & (FSYNC | FDSYNC)) 21367bd71c6Sperrin atomic_inc_32(&zp->z_sync_cnt); 214da6c28aaSamw 215b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 216fa9e4066Sahrens return (0); 217fa9e4066Sahrens } 218fa9e4066Sahrens 219fa9e4066Sahrens /* ARGSUSED */ 220fa9e4066Sahrens static int 221da6c28aaSamw zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 222da6c28aaSamw caller_context_t *ct) 223fa9e4066Sahrens { 22467bd71c6Sperrin znode_t *zp = VTOZ(vp); 225b614fdaaSMark Shellenbaum zfsvfs_t *zfsvfs = zp->z_zfsvfs; 226b614fdaaSMark Shellenbaum 227ee8143cbSChris Kirby /* 228ee8143cbSChris Kirby * Clean up any locks held by this process on the vp. 229ee8143cbSChris Kirby */ 230ee8143cbSChris Kirby cleanlocks(vp, ddi_get_pid(), 0); 231ee8143cbSChris Kirby cleanshares(vp, ddi_get_pid()); 232ee8143cbSChris Kirby 233b614fdaaSMark Shellenbaum ZFS_ENTER(zfsvfs); 234b614fdaaSMark Shellenbaum ZFS_VERIFY_ZP(zp); 23567bd71c6Sperrin 23667bd71c6Sperrin /* Decrement the synchronous opens in the znode */ 237ecb72030Sperrin if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 23867bd71c6Sperrin atomic_dec_32(&zp->z_sync_cnt); 23967bd71c6Sperrin 240da6c28aaSamw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 241da6c28aaSamw ZTOV(zp)->v_type == VREG && 2420a586ceaSMark Shellenbaum !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 243da6c28aaSamw VERIFY(fs_vscan(vp, cr, 1) == 0); 244da6c28aaSamw 245b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 246fa9e4066Sahrens return (0); 247fa9e4066Sahrens } 248fa9e4066Sahrens 249fa9e4066Sahrens /* 250fa9e4066Sahrens * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 251fa9e4066Sahrens * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 252fa9e4066Sahrens */ 253fa9e4066Sahrens static int 254fa9e4066Sahrens zfs_holey(vnode_t *vp, int cmd, offset_t *off) 255fa9e4066Sahrens { 256fa9e4066Sahrens znode_t *zp = VTOZ(vp); 257fa9e4066Sahrens uint64_t noff = (uint64_t)*off; /* new offset */ 258fa9e4066Sahrens uint64_t file_sz; 259fa9e4066Sahrens int error; 260fa9e4066Sahrens boolean_t hole; 261fa9e4066Sahrens 2620a586ceaSMark Shellenbaum file_sz = zp->z_size; 263fa9e4066Sahrens if (noff >= file_sz) { 264be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 265fa9e4066Sahrens } 266fa9e4066Sahrens 267fa9e4066Sahrens if (cmd == _FIO_SEEK_HOLE) 268fa9e4066Sahrens hole = B_TRUE; 269fa9e4066Sahrens else 270fa9e4066Sahrens hole = B_FALSE; 271fa9e4066Sahrens 272fa9e4066Sahrens error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 273fa9e4066Sahrens 274*0fbc0cd0SMatthew Ahrens if (error == ESRCH) 275be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 276*0fbc0cd0SMatthew Ahrens 277*0fbc0cd0SMatthew Ahrens /* 278*0fbc0cd0SMatthew Ahrens * We could find a hole that begins after the logical end-of-file, 279*0fbc0cd0SMatthew Ahrens * because dmu_offset_next() only works on whole blocks. If the 280*0fbc0cd0SMatthew Ahrens * EOF falls mid-block, then indicate that the "virtual hole" 281*0fbc0cd0SMatthew Ahrens * at the end of the file begins at the logical EOF, rather than 282*0fbc0cd0SMatthew Ahrens * at the end of the last block. 283*0fbc0cd0SMatthew Ahrens */ 284*0fbc0cd0SMatthew Ahrens if (noff > file_sz) { 285*0fbc0cd0SMatthew Ahrens ASSERT(hole); 286*0fbc0cd0SMatthew Ahrens noff = file_sz; 287fa9e4066Sahrens } 288fa9e4066Sahrens 289fa9e4066Sahrens if (noff < *off) 290fa9e4066Sahrens return (error); 291fa9e4066Sahrens *off = noff; 292fa9e4066Sahrens return (error); 293fa9e4066Sahrens } 294fa9e4066Sahrens 295fa9e4066Sahrens /* ARGSUSED */ 296fa9e4066Sahrens static int 297fa9e4066Sahrens zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, 298da6c28aaSamw int *rvalp, caller_context_t *ct) 299fa9e4066Sahrens { 300fa9e4066Sahrens offset_t off; 301fa9e4066Sahrens int error; 302fa9e4066Sahrens zfsvfs_t *zfsvfs; 303f18faf3fSek znode_t *zp; 304fa9e4066Sahrens 305fa9e4066Sahrens switch (com) { 306ecb72030Sperrin case _FIOFFS: 307fa9e4066Sahrens return (zfs_sync(vp->v_vfsp, 0, cred)); 308fa9e4066Sahrens 309ea8dc4b6Seschrock /* 310ea8dc4b6Seschrock * The following two ioctls are used by bfu. Faking out, 311ea8dc4b6Seschrock * necessary to avoid bfu errors. 312ea8dc4b6Seschrock */ 313ecb72030Sperrin case _FIOGDIO: 314ecb72030Sperrin case _FIOSDIO: 315ea8dc4b6Seschrock return (0); 316ea8dc4b6Seschrock 317ecb72030Sperrin case _FIO_SEEK_DATA: 318ecb72030Sperrin case _FIO_SEEK_HOLE: 319fa9e4066Sahrens if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 320be6fd75aSMatthew Ahrens return (SET_ERROR(EFAULT)); 321fa9e4066Sahrens 322f18faf3fSek zp = VTOZ(vp); 323f18faf3fSek zfsvfs = zp->z_zfsvfs; 3243cb34c60Sahrens ZFS_ENTER(zfsvfs); 3253cb34c60Sahrens ZFS_VERIFY_ZP(zp); 326fa9e4066Sahrens 327fa9e4066Sahrens /* offset parameter is in/out */ 328fa9e4066Sahrens error = zfs_holey(vp, com, &off); 329fa9e4066Sahrens ZFS_EXIT(zfsvfs); 330fa9e4066Sahrens if (error) 331fa9e4066Sahrens return (error); 332fa9e4066Sahrens if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 333be6fd75aSMatthew Ahrens return (SET_ERROR(EFAULT)); 334fa9e4066Sahrens return (0); 335fa9e4066Sahrens } 336be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTTY)); 337fa9e4066Sahrens } 338fa9e4066Sahrens 3390fab61baSJonathan W Adams /* 3400fab61baSJonathan W Adams * Utility functions to map and unmap a single physical page. These 3410fab61baSJonathan W Adams * are used to manage the mappable copies of ZFS file data, and therefore 3420fab61baSJonathan W Adams * do not update ref/mod bits. 3430fab61baSJonathan W Adams */ 3440fab61baSJonathan W Adams caddr_t 3450fab61baSJonathan W Adams zfs_map_page(page_t *pp, enum seg_rw rw) 3460fab61baSJonathan W Adams { 3470fab61baSJonathan W Adams if (kpm_enable) 3480fab61baSJonathan W Adams return (hat_kpm_mapin(pp, 0)); 3490fab61baSJonathan W Adams ASSERT(rw == S_READ || rw == S_WRITE); 3500fab61baSJonathan W Adams return (ppmapin(pp, PROT_READ | ((rw == S_WRITE) ? PROT_WRITE : 0), 3510fab61baSJonathan W Adams (caddr_t)-1)); 3520fab61baSJonathan W Adams } 3530fab61baSJonathan W Adams 3540fab61baSJonathan W Adams void 3550fab61baSJonathan W Adams zfs_unmap_page(page_t *pp, caddr_t addr) 3560fab61baSJonathan W Adams { 3570fab61baSJonathan W Adams if (kpm_enable) { 3580fab61baSJonathan W Adams hat_kpm_mapout(pp, 0, addr); 3590fab61baSJonathan W Adams } else { 3600fab61baSJonathan W Adams ppmapout(addr); 3610fab61baSJonathan W Adams } 3620fab61baSJonathan W Adams } 3630fab61baSJonathan W Adams 364fa9e4066Sahrens /* 365fa9e4066Sahrens * When a file is memory mapped, we must keep the IO data synchronized 366fa9e4066Sahrens * between the DMU cache and the memory mapped pages. What this means: 367fa9e4066Sahrens * 368fa9e4066Sahrens * On Write: If we find a memory mapped page, we write to *both* 369fa9e4066Sahrens * the page and the dmu buffer. 370fa9e4066Sahrens */ 371ac05c741SMark Maybee static void 372ac05c741SMark Maybee update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid) 373fa9e4066Sahrens { 374ac05c741SMark Maybee int64_t off; 375fa9e4066Sahrens 376fa9e4066Sahrens off = start & PAGEOFFSET; 377fa9e4066Sahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 378fa9e4066Sahrens page_t *pp; 379ac05c741SMark Maybee uint64_t nbytes = MIN(PAGESIZE - off, len); 380fa9e4066Sahrens 381fa9e4066Sahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 382fa9e4066Sahrens caddr_t va; 383fa9e4066Sahrens 3840fab61baSJonathan W Adams va = zfs_map_page(pp, S_WRITE); 3857bfdf011SNeil Perrin (void) dmu_read(os, oid, start+off, nbytes, va+off, 3867bfdf011SNeil Perrin DMU_READ_PREFETCH); 3870fab61baSJonathan W Adams zfs_unmap_page(pp, va); 388fa9e4066Sahrens page_unlock(pp); 389fa9e4066Sahrens } 390ac05c741SMark Maybee len -= nbytes; 391fa9e4066Sahrens off = 0; 392fa9e4066Sahrens } 393fa9e4066Sahrens } 394fa9e4066Sahrens 395fa9e4066Sahrens /* 396fa9e4066Sahrens * When a file is memory mapped, we must keep the IO data synchronized 397fa9e4066Sahrens * between the DMU cache and the memory mapped pages. What this means: 398fa9e4066Sahrens * 399fa9e4066Sahrens * On Read: We "read" preferentially from memory mapped pages, 400fa9e4066Sahrens * else we default from the dmu buffer. 401fa9e4066Sahrens * 402fa9e4066Sahrens * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 403f7170741SWill Andrews * the file is memory mapped. 404fa9e4066Sahrens */ 405fa9e4066Sahrens static int 406feb08c6bSbillm mappedread(vnode_t *vp, int nbytes, uio_t *uio) 407fa9e4066Sahrens { 408feb08c6bSbillm znode_t *zp = VTOZ(vp); 409feb08c6bSbillm objset_t *os = zp->z_zfsvfs->z_os; 410feb08c6bSbillm int64_t start, off; 411fa9e4066Sahrens int len = nbytes; 412fa9e4066Sahrens int error = 0; 413fa9e4066Sahrens 414fa9e4066Sahrens start = uio->uio_loffset; 415fa9e4066Sahrens off = start & PAGEOFFSET; 416fa9e4066Sahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 417fa9e4066Sahrens page_t *pp; 418feb08c6bSbillm uint64_t bytes = MIN(PAGESIZE - off, len); 419fa9e4066Sahrens 420fa9e4066Sahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 421fa9e4066Sahrens caddr_t va; 422fa9e4066Sahrens 4230fab61baSJonathan W Adams va = zfs_map_page(pp, S_READ); 424fa9e4066Sahrens error = uiomove(va + off, bytes, UIO_READ, uio); 4250fab61baSJonathan W Adams zfs_unmap_page(pp, va); 426fa9e4066Sahrens page_unlock(pp); 427fa9e4066Sahrens } else { 428feb08c6bSbillm error = dmu_read_uio(os, zp->z_id, uio, bytes); 429fa9e4066Sahrens } 430fa9e4066Sahrens len -= bytes; 431fa9e4066Sahrens off = 0; 432fa9e4066Sahrens if (error) 433fa9e4066Sahrens break; 434fa9e4066Sahrens } 435fa9e4066Sahrens return (error); 436fa9e4066Sahrens } 437fa9e4066Sahrens 438feb08c6bSbillm offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 439fa9e4066Sahrens 440fa9e4066Sahrens /* 441fa9e4066Sahrens * Read bytes from specified file into supplied buffer. 442fa9e4066Sahrens * 443fa9e4066Sahrens * IN: vp - vnode of file to be read from. 444fa9e4066Sahrens * uio - structure supplying read location, range info, 445fa9e4066Sahrens * and return buffer. 446fa9e4066Sahrens * ioflag - SYNC flags; used to provide FRSYNC semantics. 447fa9e4066Sahrens * cr - credentials of caller. 448da6c28aaSamw * ct - caller context 449fa9e4066Sahrens * 450fa9e4066Sahrens * OUT: uio - updated offset and range, buffer filled. 451fa9e4066Sahrens * 452f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 453fa9e4066Sahrens * 454fa9e4066Sahrens * Side Effects: 455fa9e4066Sahrens * vp - atime updated if byte count > 0 456fa9e4066Sahrens */ 457fa9e4066Sahrens /* ARGSUSED */ 458fa9e4066Sahrens static int 459fa9e4066Sahrens zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 460fa9e4066Sahrens { 461fa9e4066Sahrens znode_t *zp = VTOZ(vp); 462fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 463f18faf3fSek objset_t *os; 464feb08c6bSbillm ssize_t n, nbytes; 465d5285caeSGeorge Wilson int error = 0; 466104e2ed7Sperrin rl_t *rl; 467c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_t *xuio = NULL; 468fa9e4066Sahrens 4693cb34c60Sahrens ZFS_ENTER(zfsvfs); 4703cb34c60Sahrens ZFS_VERIFY_ZP(zp); 471f18faf3fSek os = zfsvfs->z_os; 472fa9e4066Sahrens 4730a586ceaSMark Shellenbaum if (zp->z_pflags & ZFS_AV_QUARANTINED) { 4740616c50eSmarks ZFS_EXIT(zfsvfs); 475be6fd75aSMatthew Ahrens return (SET_ERROR(EACCES)); 4760616c50eSmarks } 4770616c50eSmarks 478fa9e4066Sahrens /* 479fa9e4066Sahrens * Validate file offset 480fa9e4066Sahrens */ 481fa9e4066Sahrens if (uio->uio_loffset < (offset_t)0) { 482fa9e4066Sahrens ZFS_EXIT(zfsvfs); 483be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 484fa9e4066Sahrens } 485fa9e4066Sahrens 486fa9e4066Sahrens /* 487fa9e4066Sahrens * Fasttrack empty reads 488fa9e4066Sahrens */ 489fa9e4066Sahrens if (uio->uio_resid == 0) { 490fa9e4066Sahrens ZFS_EXIT(zfsvfs); 491fa9e4066Sahrens return (0); 492fa9e4066Sahrens } 493fa9e4066Sahrens 494fa9e4066Sahrens /* 495104e2ed7Sperrin * Check for mandatory locks 496fa9e4066Sahrens */ 4970a586ceaSMark Shellenbaum if (MANDMODE(zp->z_mode)) { 498fa9e4066Sahrens if (error = chklock(vp, FREAD, 499fa9e4066Sahrens uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 500fa9e4066Sahrens ZFS_EXIT(zfsvfs); 501fa9e4066Sahrens return (error); 502fa9e4066Sahrens } 503fa9e4066Sahrens } 504fa9e4066Sahrens 505fa9e4066Sahrens /* 506fa9e4066Sahrens * If we're in FRSYNC mode, sync out this znode before reading it. 507fa9e4066Sahrens */ 50855da60b9SMark J Musante if (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5095002558fSNeil Perrin zil_commit(zfsvfs->z_log, zp->z_id); 510fa9e4066Sahrens 511fa9e4066Sahrens /* 512104e2ed7Sperrin * Lock the range against changes. 513fa9e4066Sahrens */ 514104e2ed7Sperrin rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 515104e2ed7Sperrin 516fa9e4066Sahrens /* 517fa9e4066Sahrens * If we are reading past end-of-file we can skip 518fa9e4066Sahrens * to the end; but we might still need to set atime. 519fa9e4066Sahrens */ 5200a586ceaSMark Shellenbaum if (uio->uio_loffset >= zp->z_size) { 521fa9e4066Sahrens error = 0; 522fa9e4066Sahrens goto out; 523fa9e4066Sahrens } 524fa9e4066Sahrens 5250a586ceaSMark Shellenbaum ASSERT(uio->uio_loffset < zp->z_size); 5260a586ceaSMark Shellenbaum n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 527feb08c6bSbillm 528c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((uio->uio_extflg == UIO_XUIO) && 529c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 530c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int nblk; 531c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int blksz = zp->z_blksz; 532c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uint64_t offset = uio->uio_loffset; 533c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 534c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio = (xuio_t *)uio; 535c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((ISP2(blksz))) { 536c242f9a0Schunli zhang - Sun Microsystems - Irvine United States nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 537c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz)) / blksz; 538c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 539c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(offset + n <= blksz); 540c242f9a0Schunli zhang - Sun Microsystems - Irvine United States nblk = 1; 541c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 542570de38fSSurya Prakki (void) dmu_xuio_init(xuio, nblk); 543c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 544c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (vn_has_cached_data(vp)) { 545c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 546c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * For simplicity, we always allocate a full buffer 547c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * even if we only expect to read a portion of a block. 548c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 549c242f9a0Schunli zhang - Sun Microsystems - Irvine United States while (--nblk >= 0) { 550570de38fSSurya Prakki (void) dmu_xuio_add(xuio, 5510a586ceaSMark Shellenbaum dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5520a586ceaSMark Shellenbaum blksz), 0, blksz); 553c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 554c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 555c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 556c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 557feb08c6bSbillm while (n > 0) { 558feb08c6bSbillm nbytes = MIN(n, zfs_read_chunk_size - 559feb08c6bSbillm P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 560fa9e4066Sahrens 561feb08c6bSbillm if (vn_has_cached_data(vp)) 562feb08c6bSbillm error = mappedread(vp, nbytes, uio); 563feb08c6bSbillm else 564feb08c6bSbillm error = dmu_read_uio(os, zp->z_id, uio, nbytes); 565b87f3af3Sperrin if (error) { 566b87f3af3Sperrin /* convert checksum errors into IO errors */ 567b87f3af3Sperrin if (error == ECKSUM) 568be6fd75aSMatthew Ahrens error = SET_ERROR(EIO); 569feb08c6bSbillm break; 570b87f3af3Sperrin } 571fa9e4066Sahrens 572feb08c6bSbillm n -= nbytes; 573fa9e4066Sahrens } 574fa9e4066Sahrens out: 575c5c6ffa0Smaybee zfs_range_unlock(rl); 576fa9e4066Sahrens 577fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 578fa9e4066Sahrens ZFS_EXIT(zfsvfs); 579fa9e4066Sahrens return (error); 580fa9e4066Sahrens } 581fa9e4066Sahrens 582fa9e4066Sahrens /* 583fa9e4066Sahrens * Write the bytes to a file. 584fa9e4066Sahrens * 585fa9e4066Sahrens * IN: vp - vnode of file to be written to. 586fa9e4066Sahrens * uio - structure supplying write location, range info, 587fa9e4066Sahrens * and data buffer. 588f7170741SWill Andrews * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 589f7170741SWill Andrews * set if in append mode. 590fa9e4066Sahrens * cr - credentials of caller. 591da6c28aaSamw * ct - caller context (NFS/CIFS fem monitor only) 592fa9e4066Sahrens * 593fa9e4066Sahrens * OUT: uio - updated offset and range. 594fa9e4066Sahrens * 595f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 596fa9e4066Sahrens * 597fa9e4066Sahrens * Timestamps: 598fa9e4066Sahrens * vp - ctime|mtime updated if byte count > 0 599fa9e4066Sahrens */ 6000a586ceaSMark Shellenbaum 601fa9e4066Sahrens /* ARGSUSED */ 602fa9e4066Sahrens static int 603fa9e4066Sahrens zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 604fa9e4066Sahrens { 605fa9e4066Sahrens znode_t *zp = VTOZ(vp); 606fa9e4066Sahrens rlim64_t limit = uio->uio_llimit; 607fa9e4066Sahrens ssize_t start_resid = uio->uio_resid; 608fa9e4066Sahrens ssize_t tx_bytes; 609fa9e4066Sahrens uint64_t end_size; 610fa9e4066Sahrens dmu_tx_t *tx; 611fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 612f18faf3fSek zilog_t *zilog; 613fa9e4066Sahrens offset_t woff; 614fa9e4066Sahrens ssize_t n, nbytes; 615104e2ed7Sperrin rl_t *rl; 616fa9e4066Sahrens int max_blksz = zfsvfs->z_max_blksz; 617d5285caeSGeorge Wilson int error = 0; 6182fdbea25SAleksandr Guzovskiy arc_buf_t *abuf; 619d5285caeSGeorge Wilson iovec_t *aiov = NULL; 620c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_t *xuio = NULL; 621c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int i_iov = 0; 622c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int iovcnt = uio->uio_iovcnt; 623c242f9a0Schunli zhang - Sun Microsystems - Irvine United States iovec_t *iovp = uio->uio_iov; 624c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int write_eof; 6250a586ceaSMark Shellenbaum int count = 0; 6260a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[4]; 6270a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 628fa9e4066Sahrens 629fa9e4066Sahrens /* 630fa9e4066Sahrens * Fasttrack empty write 631fa9e4066Sahrens */ 632104e2ed7Sperrin n = start_resid; 633fa9e4066Sahrens if (n == 0) 634fa9e4066Sahrens return (0); 635fa9e4066Sahrens 636104e2ed7Sperrin if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 637104e2ed7Sperrin limit = MAXOFFSET_T; 638104e2ed7Sperrin 6393cb34c60Sahrens ZFS_ENTER(zfsvfs); 6403cb34c60Sahrens ZFS_VERIFY_ZP(zp); 641c09193bfSmarks 6420a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 6430a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 6440a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 6450a586ceaSMark Shellenbaum &zp->z_size, 8); 6460a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 6470a586ceaSMark Shellenbaum &zp->z_pflags, 8); 6480a586ceaSMark Shellenbaum 6492144b121SMarcel Telka /* 6502144b121SMarcel Telka * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 6512144b121SMarcel Telka * callers might not be able to detect properly that we are read-only, 6522144b121SMarcel Telka * so check it explicitly here. 6532144b121SMarcel Telka */ 6542144b121SMarcel Telka if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 6552144b121SMarcel Telka ZFS_EXIT(zfsvfs); 6562144b121SMarcel Telka return (SET_ERROR(EROFS)); 6572144b121SMarcel Telka } 6582144b121SMarcel Telka 659c09193bfSmarks /* 660c09193bfSmarks * If immutable or not appending then return EPERM 661c09193bfSmarks */ 6620a586ceaSMark Shellenbaum if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 6630a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 6640a586ceaSMark Shellenbaum (uio->uio_loffset < zp->z_size))) { 665c09193bfSmarks ZFS_EXIT(zfsvfs); 666be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 667c09193bfSmarks } 668c09193bfSmarks 669f18faf3fSek zilog = zfsvfs->z_log; 670fa9e4066Sahrens 67141865f27SWilliam Gorrell /* 67241865f27SWilliam Gorrell * Validate file offset 67341865f27SWilliam Gorrell */ 6740a586ceaSMark Shellenbaum woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 67541865f27SWilliam Gorrell if (woff < 0) { 67641865f27SWilliam Gorrell ZFS_EXIT(zfsvfs); 677be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 67841865f27SWilliam Gorrell } 67941865f27SWilliam Gorrell 68041865f27SWilliam Gorrell /* 68141865f27SWilliam Gorrell * Check for mandatory locks before calling zfs_range_lock() 68241865f27SWilliam Gorrell * in order to prevent a deadlock with locks set via fcntl(). 68341865f27SWilliam Gorrell */ 6840a586ceaSMark Shellenbaum if (MANDMODE((mode_t)zp->z_mode) && 68541865f27SWilliam Gorrell (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 68641865f27SWilliam Gorrell ZFS_EXIT(zfsvfs); 68741865f27SWilliam Gorrell return (error); 68841865f27SWilliam Gorrell } 68941865f27SWilliam Gorrell 690fa9e4066Sahrens /* 691c5c6ffa0Smaybee * Pre-fault the pages to ensure slow (eg NFS) pages 692104e2ed7Sperrin * don't hold up txg. 693c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Skip this if uio contains loaned arc_buf. 694fa9e4066Sahrens */ 695c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((uio->uio_extflg == UIO_XUIO) && 696c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 697c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio = (xuio_t *)uio; 698c242f9a0Schunli zhang - Sun Microsystems - Irvine United States else 699ff866947SSanjeev Bagewadi uio_prefaultpages(MIN(n, max_blksz), uio); 700fa9e4066Sahrens 701fa9e4066Sahrens /* 702fa9e4066Sahrens * If in append mode, set the io offset pointer to eof. 703fa9e4066Sahrens */ 704104e2ed7Sperrin if (ioflag & FAPPEND) { 705104e2ed7Sperrin /* 70641865f27SWilliam Gorrell * Obtain an appending range lock to guarantee file append 70741865f27SWilliam Gorrell * semantics. We reset the write offset once we have the lock. 708104e2ed7Sperrin */ 709104e2ed7Sperrin rl = zfs_range_lock(zp, 0, n, RL_APPEND); 71041865f27SWilliam Gorrell woff = rl->r_off; 711104e2ed7Sperrin if (rl->r_len == UINT64_MAX) { 71241865f27SWilliam Gorrell /* 71341865f27SWilliam Gorrell * We overlocked the file because this write will cause 71441865f27SWilliam Gorrell * the file block size to increase. 71541865f27SWilliam Gorrell * Note that zp_size cannot change with this lock held. 71641865f27SWilliam Gorrell */ 7170a586ceaSMark Shellenbaum woff = zp->z_size; 718104e2ed7Sperrin } 71941865f27SWilliam Gorrell uio->uio_loffset = woff; 720fa9e4066Sahrens } else { 721fa9e4066Sahrens /* 72241865f27SWilliam Gorrell * Note that if the file block size will change as a result of 72341865f27SWilliam Gorrell * this write, then this range lock will lock the entire file 72441865f27SWilliam Gorrell * so that we can re-write the block safely. 725fa9e4066Sahrens */ 726104e2ed7Sperrin rl = zfs_range_lock(zp, woff, n, RL_WRITER); 727fa9e4066Sahrens } 728fa9e4066Sahrens 729fa9e4066Sahrens if (woff >= limit) { 730feb08c6bSbillm zfs_range_unlock(rl); 731feb08c6bSbillm ZFS_EXIT(zfsvfs); 732be6fd75aSMatthew Ahrens return (SET_ERROR(EFBIG)); 733fa9e4066Sahrens } 734fa9e4066Sahrens 735fa9e4066Sahrens if ((woff + n) > limit || woff > (limit - n)) 736fa9e4066Sahrens n = limit - woff; 737fa9e4066Sahrens 738c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* Will this write extend the file length? */ 7390a586ceaSMark Shellenbaum write_eof = (woff + n > zp->z_size); 740c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 7410a586ceaSMark Shellenbaum end_size = MAX(zp->z_size, woff + n); 742fa9e4066Sahrens 743104e2ed7Sperrin /* 744feb08c6bSbillm * Write the file in reasonable size chunks. Each chunk is written 745feb08c6bSbillm * in a separate transaction; this keeps the intent log records small 746feb08c6bSbillm * and allows us to do more fine-grained space accounting. 747104e2ed7Sperrin */ 748feb08c6bSbillm while (n > 0) { 7492fdbea25SAleksandr Guzovskiy abuf = NULL; 7502fdbea25SAleksandr Guzovskiy woff = uio->uio_loffset; 7510a586ceaSMark Shellenbaum if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 7520a586ceaSMark Shellenbaum zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 7532fdbea25SAleksandr Guzovskiy if (abuf != NULL) 7542fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 755be6fd75aSMatthew Ahrens error = SET_ERROR(EDQUOT); 75614843421SMatthew Ahrens break; 75714843421SMatthew Ahrens } 7582fdbea25SAleksandr Guzovskiy 759c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (xuio && abuf == NULL) { 760c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(i_iov < iovcnt); 761c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov = &iovp[i_iov]; 762c242f9a0Schunli zhang - Sun Microsystems - Irvine United States abuf = dmu_xuio_arcbuf(xuio, i_iov); 763c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_xuio_clear(xuio, i_iov); 764c242f9a0Schunli zhang - Sun Microsystems - Irvine United States DTRACE_PROBE3(zfs_cp_write, int, i_iov, 765c242f9a0Schunli zhang - Sun Microsystems - Irvine United States iovec_t *, aiov, arc_buf_t *, abuf); 766c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT((aiov->iov_base == abuf->b_data) || 767c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ((char *)aiov->iov_base - (char *)abuf->b_data + 768c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_len == arc_buf_size(abuf))); 769c242f9a0Schunli zhang - Sun Microsystems - Irvine United States i_iov++; 770c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else if (abuf == NULL && n >= max_blksz && 7710a586ceaSMark Shellenbaum woff >= zp->z_size && 7722fdbea25SAleksandr Guzovskiy P2PHASE(woff, max_blksz) == 0 && 7732fdbea25SAleksandr Guzovskiy zp->z_blksz == max_blksz) { 774c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 775c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * This write covers a full block. "Borrow" a buffer 776c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * from the dmu so that we can fill it before we enter 777c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * a transaction. This avoids the possibility of 778c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * holding up the transaction if the data copy hangs 779c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * up on a pagefault (e.g., from an NFS server mapping). 780c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 7812fdbea25SAleksandr Guzovskiy size_t cbytes; 7822fdbea25SAleksandr Guzovskiy 7830a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 7840a586ceaSMark Shellenbaum max_blksz); 7852fdbea25SAleksandr Guzovskiy ASSERT(abuf != NULL); 7862fdbea25SAleksandr Guzovskiy ASSERT(arc_buf_size(abuf) == max_blksz); 7872fdbea25SAleksandr Guzovskiy if (error = uiocopy(abuf->b_data, max_blksz, 7882fdbea25SAleksandr Guzovskiy UIO_WRITE, uio, &cbytes)) { 7892fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 7902fdbea25SAleksandr Guzovskiy break; 7912fdbea25SAleksandr Guzovskiy } 7922fdbea25SAleksandr Guzovskiy ASSERT(cbytes == max_blksz); 7932fdbea25SAleksandr Guzovskiy } 7942fdbea25SAleksandr Guzovskiy 7952fdbea25SAleksandr Guzovskiy /* 7962fdbea25SAleksandr Guzovskiy * Start a transaction. 7972fdbea25SAleksandr Guzovskiy */ 798feb08c6bSbillm tx = dmu_tx_create(zfsvfs->z_os); 7990a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 800feb08c6bSbillm dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 8010a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 802e722410cSMatthew Ahrens error = dmu_tx_assign(tx, TXG_WAIT); 803feb08c6bSbillm if (error) { 804feb08c6bSbillm dmu_tx_abort(tx); 8052fdbea25SAleksandr Guzovskiy if (abuf != NULL) 8062fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 807feb08c6bSbillm break; 808feb08c6bSbillm } 809104e2ed7Sperrin 810feb08c6bSbillm /* 811feb08c6bSbillm * If zfs_range_lock() over-locked we grow the blocksize 812feb08c6bSbillm * and then reduce the lock range. This will only happen 813feb08c6bSbillm * on the first iteration since zfs_range_reduce() will 814feb08c6bSbillm * shrink down r_len to the appropriate size. 815feb08c6bSbillm */ 816feb08c6bSbillm if (rl->r_len == UINT64_MAX) { 817feb08c6bSbillm uint64_t new_blksz; 818feb08c6bSbillm 819feb08c6bSbillm if (zp->z_blksz > max_blksz) { 820feb08c6bSbillm ASSERT(!ISP2(zp->z_blksz)); 821feb08c6bSbillm new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 822feb08c6bSbillm } else { 823feb08c6bSbillm new_blksz = MIN(end_size, max_blksz); 824feb08c6bSbillm } 825feb08c6bSbillm zfs_grow_blocksize(zp, new_blksz, tx); 826feb08c6bSbillm zfs_range_reduce(rl, woff, n); 827fa9e4066Sahrens } 828fa9e4066Sahrens 829fa9e4066Sahrens /* 830fa9e4066Sahrens * XXX - should we really limit each write to z_max_blksz? 831fa9e4066Sahrens * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 832fa9e4066Sahrens */ 833fa9e4066Sahrens nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 834fa9e4066Sahrens 8352fdbea25SAleksandr Guzovskiy if (abuf == NULL) { 8362fdbea25SAleksandr Guzovskiy tx_bytes = uio->uio_resid; 83794d1a210STim Haley error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 83894d1a210STim Haley uio, nbytes, tx); 8392fdbea25SAleksandr Guzovskiy tx_bytes -= uio->uio_resid; 8402fdbea25SAleksandr Guzovskiy } else { 8412fdbea25SAleksandr Guzovskiy tx_bytes = nbytes; 842c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 843c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 844c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * If this is not a full block write, but we are 845c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * extending the file past EOF and this data starts 846c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * block-aligned, use assign_arcbuf(). Otherwise, 847c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * write via dmu_write(). 848c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 849c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (tx_bytes < max_blksz && (!write_eof || 850c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_base != abuf->b_data)) { 851c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio); 852c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_write(zfsvfs->z_os, zp->z_id, woff, 853c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_len, aiov->iov_base, tx); 854c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_return_arcbuf(abuf); 855c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_stat_wbuf_copied(); 856c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 857c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio || tx_bytes == max_blksz); 8580a586ceaSMark Shellenbaum dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 8590a586ceaSMark Shellenbaum woff, abuf, tx); 860c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 8612fdbea25SAleksandr Guzovskiy ASSERT(tx_bytes <= uio->uio_resid); 8622fdbea25SAleksandr Guzovskiy uioskip(uio, tx_bytes); 8632fdbea25SAleksandr Guzovskiy } 8642fdbea25SAleksandr Guzovskiy if (tx_bytes && vn_has_cached_data(vp)) { 865ac05c741SMark Maybee update_pages(vp, woff, 866ac05c741SMark Maybee tx_bytes, zfsvfs->z_os, zp->z_id); 8672fdbea25SAleksandr Guzovskiy } 868fa9e4066Sahrens 869feb08c6bSbillm /* 870feb08c6bSbillm * If we made no progress, we're done. If we made even 871feb08c6bSbillm * partial progress, update the znode and ZIL accordingly. 872feb08c6bSbillm */ 873feb08c6bSbillm if (tx_bytes == 0) { 8740a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 8750a586ceaSMark Shellenbaum (void *)&zp->z_size, sizeof (uint64_t), tx); 876af2c4821Smaybee dmu_tx_commit(tx); 877feb08c6bSbillm ASSERT(error != 0); 878fa9e4066Sahrens break; 879fa9e4066Sahrens } 880fa9e4066Sahrens 881169cdae2Smarks /* 882169cdae2Smarks * Clear Set-UID/Set-GID bits on successful write if not 883169cdae2Smarks * privileged and at least one of the excute bits is set. 884169cdae2Smarks * 885169cdae2Smarks * It would be nice to to this after all writes have 886169cdae2Smarks * been done, but that would still expose the ISUID/ISGID 887169cdae2Smarks * to another app after the partial write is committed. 888da6c28aaSamw * 889f1696b23SMark Shellenbaum * Note: we don't call zfs_fuid_map_id() here because 890f1696b23SMark Shellenbaum * user 0 is not an ephemeral uid. 891169cdae2Smarks */ 892169cdae2Smarks mutex_enter(&zp->z_acl_lock); 8930a586ceaSMark Shellenbaum if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 894169cdae2Smarks (S_IXUSR >> 6))) != 0 && 8950a586ceaSMark Shellenbaum (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 896169cdae2Smarks secpolicy_vnode_setid_retain(cr, 8970a586ceaSMark Shellenbaum (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 8980a586ceaSMark Shellenbaum uint64_t newmode; 8990a586ceaSMark Shellenbaum zp->z_mode &= ~(S_ISUID | S_ISGID); 9000a586ceaSMark Shellenbaum newmode = zp->z_mode; 9010a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 9020a586ceaSMark Shellenbaum (void *)&newmode, sizeof (uint64_t), tx); 903169cdae2Smarks } 904169cdae2Smarks mutex_exit(&zp->z_acl_lock); 905169cdae2Smarks 9060a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 9070a586ceaSMark Shellenbaum B_TRUE); 908fa9e4066Sahrens 909fa9e4066Sahrens /* 910feb08c6bSbillm * Update the file size (zp_size) if it has changed; 911feb08c6bSbillm * account for possible concurrent updates. 912fa9e4066Sahrens */ 9130a586ceaSMark Shellenbaum while ((end_size = zp->z_size) < uio->uio_loffset) { 9140a586ceaSMark Shellenbaum (void) atomic_cas_64(&zp->z_size, end_size, 915fa9e4066Sahrens uio->uio_loffset); 9160a586ceaSMark Shellenbaum ASSERT(error == 0); 9170a586ceaSMark Shellenbaum } 918c0e50c98SNeil Perrin /* 919c0e50c98SNeil Perrin * If we are replaying and eof is non zero then force 920c0e50c98SNeil Perrin * the file size to the specified eof. Note, there's no 921c0e50c98SNeil Perrin * concurrency during replay. 922c0e50c98SNeil Perrin */ 923c0e50c98SNeil Perrin if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 924c0e50c98SNeil Perrin zp->z_size = zfsvfs->z_replay_eof; 925c0e50c98SNeil Perrin 9260a586ceaSMark Shellenbaum error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 9270a586ceaSMark Shellenbaum 928feb08c6bSbillm zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 929feb08c6bSbillm dmu_tx_commit(tx); 930fa9e4066Sahrens 931feb08c6bSbillm if (error != 0) 932feb08c6bSbillm break; 933feb08c6bSbillm ASSERT(tx_bytes == nbytes); 934feb08c6bSbillm n -= nbytes; 935ff866947SSanjeev Bagewadi 936ff866947SSanjeev Bagewadi if (!xuio && n > 0) 937ff866947SSanjeev Bagewadi uio_prefaultpages(MIN(n, max_blksz), uio); 938feb08c6bSbillm } 939fa9e4066Sahrens 940c5c6ffa0Smaybee zfs_range_unlock(rl); 941fa9e4066Sahrens 942fa9e4066Sahrens /* 943fa9e4066Sahrens * If we're in replay mode, or we made no progress, return error. 944fa9e4066Sahrens * Otherwise, it's at least a partial write, so it's successful. 945fa9e4066Sahrens */ 9461209a471SNeil Perrin if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 947fa9e4066Sahrens ZFS_EXIT(zfsvfs); 948fa9e4066Sahrens return (error); 949fa9e4066Sahrens } 950fa9e4066Sahrens 95155da60b9SMark J Musante if (ioflag & (FSYNC | FDSYNC) || 95255da60b9SMark J Musante zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 9535002558fSNeil Perrin zil_commit(zilog, zp->z_id); 954fa9e4066Sahrens 955fa9e4066Sahrens ZFS_EXIT(zfsvfs); 956fa9e4066Sahrens return (0); 957fa9e4066Sahrens } 958fa9e4066Sahrens 959c5c6ffa0Smaybee void 960b24ab676SJeff Bonwick zfs_get_done(zgd_t *zgd, int error) 961c5c6ffa0Smaybee { 962b24ab676SJeff Bonwick znode_t *zp = zgd->zgd_private; 963b24ab676SJeff Bonwick objset_t *os = zp->z_zfsvfs->z_os; 964b24ab676SJeff Bonwick 965b24ab676SJeff Bonwick if (zgd->zgd_db) 966b24ab676SJeff Bonwick dmu_buf_rele(zgd->zgd_db, zgd); 967b24ab676SJeff Bonwick 968b24ab676SJeff Bonwick zfs_range_unlock(zgd->zgd_rl); 969c5c6ffa0Smaybee 9709d3574bfSNeil Perrin /* 9719d3574bfSNeil Perrin * Release the vnode asynchronously as we currently have the 9729d3574bfSNeil Perrin * txg stopped from syncing. 9739d3574bfSNeil Perrin */ 974b24ab676SJeff Bonwick VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 975b24ab676SJeff Bonwick 976b24ab676SJeff Bonwick if (error == 0 && zgd->zgd_bp) 977b24ab676SJeff Bonwick zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 978b24ab676SJeff Bonwick 97967bd71c6Sperrin kmem_free(zgd, sizeof (zgd_t)); 980c5c6ffa0Smaybee } 981c5c6ffa0Smaybee 982c87b8fc5SMark J Musante #ifdef DEBUG 983c87b8fc5SMark J Musante static int zil_fault_io = 0; 984c87b8fc5SMark J Musante #endif 985c87b8fc5SMark J Musante 986fa9e4066Sahrens /* 987fa9e4066Sahrens * Get data to generate a TX_WRITE intent log record. 988fa9e4066Sahrens */ 989fa9e4066Sahrens int 990c5c6ffa0Smaybee zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 991fa9e4066Sahrens { 992fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 993fa9e4066Sahrens objset_t *os = zfsvfs->z_os; 994fa9e4066Sahrens znode_t *zp; 995b24ab676SJeff Bonwick uint64_t object = lr->lr_foid; 996b24ab676SJeff Bonwick uint64_t offset = lr->lr_offset; 997b24ab676SJeff Bonwick uint64_t size = lr->lr_length; 998b24ab676SJeff Bonwick blkptr_t *bp = &lr->lr_blkptr; 999c5c6ffa0Smaybee dmu_buf_t *db; 100067bd71c6Sperrin zgd_t *zgd; 1001fa9e4066Sahrens int error = 0; 1002fa9e4066Sahrens 1003b24ab676SJeff Bonwick ASSERT(zio != NULL); 1004b24ab676SJeff Bonwick ASSERT(size != 0); 1005fa9e4066Sahrens 1006fa9e4066Sahrens /* 1007104e2ed7Sperrin * Nothing to do if the file has been removed 1008fa9e4066Sahrens */ 1009b24ab676SJeff Bonwick if (zfs_zget(zfsvfs, object, &zp) != 0) 1010be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 1011893a6d32Sahrens if (zp->z_unlinked) { 10129d3574bfSNeil Perrin /* 10139d3574bfSNeil Perrin * Release the vnode asynchronously as we currently have the 10149d3574bfSNeil Perrin * txg stopped from syncing. 10159d3574bfSNeil Perrin */ 10169d3574bfSNeil Perrin VN_RELE_ASYNC(ZTOV(zp), 10179d3574bfSNeil Perrin dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1018be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 1019fa9e4066Sahrens } 1020fa9e4066Sahrens 1021b24ab676SJeff Bonwick zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1022b24ab676SJeff Bonwick zgd->zgd_zilog = zfsvfs->z_log; 1023b24ab676SJeff Bonwick zgd->zgd_private = zp; 1024b24ab676SJeff Bonwick 1025fa9e4066Sahrens /* 1026fa9e4066Sahrens * Write records come in two flavors: immediate and indirect. 1027fa9e4066Sahrens * For small writes it's cheaper to store the data with the 1028fa9e4066Sahrens * log record (immediate); for large writes it's cheaper to 1029fa9e4066Sahrens * sync the data and get a pointer to it (indirect) so that 1030fa9e4066Sahrens * we don't have to write the data twice. 1031fa9e4066Sahrens */ 1032104e2ed7Sperrin if (buf != NULL) { /* immediate write */ 1033b24ab676SJeff Bonwick zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1034104e2ed7Sperrin /* test for truncation needs to be done while range locked */ 10350a586ceaSMark Shellenbaum if (offset >= zp->z_size) { 1036be6fd75aSMatthew Ahrens error = SET_ERROR(ENOENT); 1037b24ab676SJeff Bonwick } else { 1038b24ab676SJeff Bonwick error = dmu_read(os, object, offset, size, buf, 1039b24ab676SJeff Bonwick DMU_READ_NO_PREFETCH); 1040104e2ed7Sperrin } 1041b24ab676SJeff Bonwick ASSERT(error == 0 || error == ENOENT); 1042104e2ed7Sperrin } else { /* indirect write */ 1043fa9e4066Sahrens /* 1044104e2ed7Sperrin * Have to lock the whole block to ensure when it's 1045104e2ed7Sperrin * written out and it's checksum is being calculated 1046104e2ed7Sperrin * that no one can change the data. We need to re-check 1047104e2ed7Sperrin * blocksize after we get the lock in case it's changed! 1048fa9e4066Sahrens */ 1049104e2ed7Sperrin for (;;) { 1050b24ab676SJeff Bonwick uint64_t blkoff; 1051b24ab676SJeff Bonwick size = zp->z_blksz; 1052dfe73b3dSJeff Bonwick blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1053b24ab676SJeff Bonwick offset -= blkoff; 1054b24ab676SJeff Bonwick zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1055b24ab676SJeff Bonwick RL_READER); 1056b24ab676SJeff Bonwick if (zp->z_blksz == size) 1057104e2ed7Sperrin break; 1058b24ab676SJeff Bonwick offset += blkoff; 1059b24ab676SJeff Bonwick zfs_range_unlock(zgd->zgd_rl); 1060104e2ed7Sperrin } 1061104e2ed7Sperrin /* test for truncation needs to be done while range locked */ 10620a586ceaSMark Shellenbaum if (lr->lr_offset >= zp->z_size) 1063be6fd75aSMatthew Ahrens error = SET_ERROR(ENOENT); 1064c87b8fc5SMark J Musante #ifdef DEBUG 1065c87b8fc5SMark J Musante if (zil_fault_io) { 1066be6fd75aSMatthew Ahrens error = SET_ERROR(EIO); 1067c87b8fc5SMark J Musante zil_fault_io = 0; 1068c87b8fc5SMark J Musante } 1069c87b8fc5SMark J Musante #endif 1070b24ab676SJeff Bonwick if (error == 0) 107147cb52daSJeff Bonwick error = dmu_buf_hold(os, object, offset, zgd, &db, 107247cb52daSJeff Bonwick DMU_READ_NO_PREFETCH); 1073c87b8fc5SMark J Musante 1074975c32a0SNeil Perrin if (error == 0) { 107580901aeaSGeorge Wilson blkptr_t *obp = dmu_buf_get_blkptr(db); 107680901aeaSGeorge Wilson if (obp) { 107780901aeaSGeorge Wilson ASSERT(BP_IS_HOLE(bp)); 107880901aeaSGeorge Wilson *bp = *obp; 107980901aeaSGeorge Wilson } 108080901aeaSGeorge Wilson 1081b24ab676SJeff Bonwick zgd->zgd_db = db; 1082b24ab676SJeff Bonwick zgd->zgd_bp = bp; 1083b24ab676SJeff Bonwick 1084b24ab676SJeff Bonwick ASSERT(db->db_offset == offset); 1085b24ab676SJeff Bonwick ASSERT(db->db_size == size); 1086b24ab676SJeff Bonwick 1087b24ab676SJeff Bonwick error = dmu_sync(zio, lr->lr_common.lrc_txg, 1088b24ab676SJeff Bonwick zfs_get_done, zgd); 1089b24ab676SJeff Bonwick ASSERT(error || lr->lr_length <= zp->z_blksz); 1090b24ab676SJeff Bonwick 1091975c32a0SNeil Perrin /* 1092b24ab676SJeff Bonwick * On success, we need to wait for the write I/O 1093b24ab676SJeff Bonwick * initiated by dmu_sync() to complete before we can 1094b24ab676SJeff Bonwick * release this dbuf. We will finish everything up 1095b24ab676SJeff Bonwick * in the zfs_get_done() callback. 1096975c32a0SNeil Perrin */ 1097b24ab676SJeff Bonwick if (error == 0) 1098b24ab676SJeff Bonwick return (0); 1099975c32a0SNeil Perrin 1100b24ab676SJeff Bonwick if (error == EALREADY) { 1101b24ab676SJeff Bonwick lr->lr_common.lrc_txtype = TX_WRITE2; 1102b24ab676SJeff Bonwick error = 0; 1103b24ab676SJeff Bonwick } 1104975c32a0SNeil Perrin } 1105fa9e4066Sahrens } 1106b24ab676SJeff Bonwick 1107b24ab676SJeff Bonwick zfs_get_done(zgd, error); 1108b24ab676SJeff Bonwick 1109fa9e4066Sahrens return (error); 1110fa9e4066Sahrens } 1111fa9e4066Sahrens 1112fa9e4066Sahrens /*ARGSUSED*/ 1113fa9e4066Sahrens static int 1114da6c28aaSamw zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1115da6c28aaSamw caller_context_t *ct) 1116fa9e4066Sahrens { 1117fa9e4066Sahrens znode_t *zp = VTOZ(vp); 1118fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1119fa9e4066Sahrens int error; 1120fa9e4066Sahrens 11213cb34c60Sahrens ZFS_ENTER(zfsvfs); 11223cb34c60Sahrens ZFS_VERIFY_ZP(zp); 1123da6c28aaSamw 1124da6c28aaSamw if (flag & V_ACE_MASK) 1125da6c28aaSamw error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1126da6c28aaSamw else 1127da6c28aaSamw error = zfs_zaccess_rwx(zp, mode, flag, cr); 1128da6c28aaSamw 1129fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1130fa9e4066Sahrens return (error); 1131fa9e4066Sahrens } 1132fa9e4066Sahrens 1133d47621a4STim Haley /* 1134d47621a4STim Haley * If vnode is for a device return a specfs vnode instead. 1135d47621a4STim Haley */ 1136d47621a4STim Haley static int 1137d47621a4STim Haley specvp_check(vnode_t **vpp, cred_t *cr) 1138d47621a4STim Haley { 1139d47621a4STim Haley int error = 0; 1140d47621a4STim Haley 1141d47621a4STim Haley if (IS_DEVVP(*vpp)) { 1142d47621a4STim Haley struct vnode *svp; 1143d47621a4STim Haley 1144d47621a4STim Haley svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1145d47621a4STim Haley VN_RELE(*vpp); 1146d47621a4STim Haley if (svp == NULL) 1147be6fd75aSMatthew Ahrens error = SET_ERROR(ENOSYS); 1148d47621a4STim Haley *vpp = svp; 1149d47621a4STim Haley } 1150d47621a4STim Haley return (error); 1151d47621a4STim Haley } 1152d47621a4STim Haley 1153d47621a4STim Haley 1154fa9e4066Sahrens /* 1155fa9e4066Sahrens * Lookup an entry in a directory, or an extended attribute directory. 1156fa9e4066Sahrens * If it exists, return a held vnode reference for it. 1157fa9e4066Sahrens * 1158fa9e4066Sahrens * IN: dvp - vnode of directory to search. 1159fa9e4066Sahrens * nm - name of entry to lookup. 1160fa9e4066Sahrens * pnp - full pathname to lookup [UNUSED]. 1161fa9e4066Sahrens * flags - LOOKUP_XATTR set if looking for an attribute. 1162fa9e4066Sahrens * rdir - root directory vnode [UNUSED]. 1163fa9e4066Sahrens * cr - credentials of caller. 1164da6c28aaSamw * ct - caller context 1165da6c28aaSamw * direntflags - directory lookup flags 1166da6c28aaSamw * realpnp - returned pathname. 1167fa9e4066Sahrens * 1168fa9e4066Sahrens * OUT: vpp - vnode of located entry, NULL if not found. 1169fa9e4066Sahrens * 1170f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1171fa9e4066Sahrens * 1172fa9e4066Sahrens * Timestamps: 1173fa9e4066Sahrens * NA 1174fa9e4066Sahrens */ 1175fa9e4066Sahrens /* ARGSUSED */ 1176fa9e4066Sahrens static int 1177fa9e4066Sahrens zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, 1178da6c28aaSamw int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 1179da6c28aaSamw int *direntflags, pathname_t *realpnp) 1180fa9e4066Sahrens { 1181fa9e4066Sahrens znode_t *zdp = VTOZ(dvp); 1182fa9e4066Sahrens zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1183d47621a4STim Haley int error = 0; 1184d47621a4STim Haley 1185d47621a4STim Haley /* fast path */ 1186d47621a4STim Haley if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1187d47621a4STim Haley 1188d47621a4STim Haley if (dvp->v_type != VDIR) { 1189be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTDIR)); 11900a586ceaSMark Shellenbaum } else if (zdp->z_sa_hdl == NULL) { 1191be6fd75aSMatthew Ahrens return (SET_ERROR(EIO)); 1192d47621a4STim Haley } 1193d47621a4STim Haley 1194d47621a4STim Haley if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1195d47621a4STim Haley error = zfs_fastaccesschk_execute(zdp, cr); 1196d47621a4STim Haley if (!error) { 1197d47621a4STim Haley *vpp = dvp; 1198d47621a4STim Haley VN_HOLD(*vpp); 1199d47621a4STim Haley return (0); 1200d47621a4STim Haley } 1201d47621a4STim Haley return (error); 1202d47621a4STim Haley } else { 1203d47621a4STim Haley vnode_t *tvp = dnlc_lookup(dvp, nm); 1204d47621a4STim Haley 1205d47621a4STim Haley if (tvp) { 1206d47621a4STim Haley error = zfs_fastaccesschk_execute(zdp, cr); 1207d47621a4STim Haley if (error) { 1208d47621a4STim Haley VN_RELE(tvp); 1209d47621a4STim Haley return (error); 1210d47621a4STim Haley } 1211d47621a4STim Haley if (tvp == DNLC_NO_VNODE) { 1212d47621a4STim Haley VN_RELE(tvp); 1213be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 1214d47621a4STim Haley } else { 1215d47621a4STim Haley *vpp = tvp; 1216d47621a4STim Haley return (specvp_check(vpp, cr)); 1217d47621a4STim Haley } 1218d47621a4STim Haley } 1219d47621a4STim Haley } 1220d47621a4STim Haley } 1221d47621a4STim Haley 1222d47621a4STim Haley DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1223fa9e4066Sahrens 12243cb34c60Sahrens ZFS_ENTER(zfsvfs); 12253cb34c60Sahrens ZFS_VERIFY_ZP(zdp); 1226fa9e4066Sahrens 1227fa9e4066Sahrens *vpp = NULL; 1228fa9e4066Sahrens 1229fa9e4066Sahrens if (flags & LOOKUP_XATTR) { 12307b55fa8eSck /* 12317b55fa8eSck * If the xattr property is off, refuse the lookup request. 12327b55fa8eSck */ 12337b55fa8eSck if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 12347b55fa8eSck ZFS_EXIT(zfsvfs); 1235be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 12367b55fa8eSck } 12377b55fa8eSck 1238fa9e4066Sahrens /* 1239fa9e4066Sahrens * We don't allow recursive attributes.. 1240fa9e4066Sahrens * Maybe someday we will. 1241fa9e4066Sahrens */ 12420a586ceaSMark Shellenbaum if (zdp->z_pflags & ZFS_XATTR) { 1243fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1244be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1245fa9e4066Sahrens } 1246fa9e4066Sahrens 12473f063a9dSck if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1248fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1249fa9e4066Sahrens return (error); 1250fa9e4066Sahrens } 1251fa9e4066Sahrens 1252fa9e4066Sahrens /* 1253fa9e4066Sahrens * Do we have permission to get into attribute directory? 1254fa9e4066Sahrens */ 1255fa9e4066Sahrens 1256da6c28aaSamw if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1257da6c28aaSamw B_FALSE, cr)) { 1258fa9e4066Sahrens VN_RELE(*vpp); 1259da6c28aaSamw *vpp = NULL; 1260fa9e4066Sahrens } 1261fa9e4066Sahrens 1262fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1263fa9e4066Sahrens return (error); 1264fa9e4066Sahrens } 1265fa9e4066Sahrens 12660f2dc02eSek if (dvp->v_type != VDIR) { 12670f2dc02eSek ZFS_EXIT(zfsvfs); 1268be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTDIR)); 12690f2dc02eSek } 1270736b9155Smarks 1271fa9e4066Sahrens /* 1272fa9e4066Sahrens * Check accessibility of directory. 1273fa9e4066Sahrens */ 1274fa9e4066Sahrens 1275da6c28aaSamw if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1276fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1277fa9e4066Sahrens return (error); 1278fa9e4066Sahrens } 1279fa9e4066Sahrens 1280de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1281da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1282da6c28aaSamw ZFS_EXIT(zfsvfs); 1283be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 1284da6c28aaSamw } 1285fa9e4066Sahrens 1286da6c28aaSamw error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1287d47621a4STim Haley if (error == 0) 1288d47621a4STim Haley error = specvp_check(vpp, cr); 1289fa9e4066Sahrens 1290fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1291fa9e4066Sahrens return (error); 1292fa9e4066Sahrens } 1293fa9e4066Sahrens 1294fa9e4066Sahrens /* 1295fa9e4066Sahrens * Attempt to create a new entry in a directory. If the entry 1296fa9e4066Sahrens * already exists, truncate the file if permissible, else return 1297fa9e4066Sahrens * an error. Return the vp of the created or trunc'd file. 1298fa9e4066Sahrens * 1299fa9e4066Sahrens * IN: dvp - vnode of directory to put new file entry in. 1300fa9e4066Sahrens * name - name of new file entry. 1301fa9e4066Sahrens * vap - attributes of new file. 1302fa9e4066Sahrens * excl - flag indicating exclusive or non-exclusive mode. 1303fa9e4066Sahrens * mode - mode to open file with. 1304fa9e4066Sahrens * cr - credentials of caller. 1305fa9e4066Sahrens * flag - large file flag [UNUSED]. 1306da6c28aaSamw * ct - caller context 13074bb73804SMatthew Ahrens * vsecp - ACL to be set 1308fa9e4066Sahrens * 1309fa9e4066Sahrens * OUT: vpp - vnode of created or trunc'd entry. 1310fa9e4066Sahrens * 1311f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1312fa9e4066Sahrens * 1313fa9e4066Sahrens * Timestamps: 1314fa9e4066Sahrens * dvp - ctime|mtime updated if new entry created 1315fa9e4066Sahrens * vp - ctime|mtime always, atime if new 1316fa9e4066Sahrens */ 1317da6c28aaSamw 1318fa9e4066Sahrens /* ARGSUSED */ 1319fa9e4066Sahrens static int 1320fa9e4066Sahrens zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl, 1321da6c28aaSamw int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct, 1322da6c28aaSamw vsecattr_t *vsecp) 1323fa9e4066Sahrens { 1324fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1325fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1326f18faf3fSek zilog_t *zilog; 1327f18faf3fSek objset_t *os; 1328fa9e4066Sahrens zfs_dirlock_t *dl; 1329fa9e4066Sahrens dmu_tx_t *tx; 1330fa9e4066Sahrens int error; 1331c1ce5987SMark Shellenbaum ksid_t *ksid; 1332c1ce5987SMark Shellenbaum uid_t uid; 1333c1ce5987SMark Shellenbaum gid_t gid = crgetgid(cr); 13340a586ceaSMark Shellenbaum zfs_acl_ids_t acl_ids; 133589459e17SMark Shellenbaum boolean_t fuid_dirtied; 1336c8c24165SMark Shellenbaum boolean_t have_acl = B_FALSE; 133769962b56SMatthew Ahrens boolean_t waited = B_FALSE; 1338da6c28aaSamw 1339da6c28aaSamw /* 1340da6c28aaSamw * If we have an ephemeral id, ACL, or XVATTR then 1341da6c28aaSamw * make sure file system is at proper version 1342da6c28aaSamw */ 1343da6c28aaSamw 1344c1ce5987SMark Shellenbaum ksid = crgetsid(cr, KSID_OWNER); 1345c1ce5987SMark Shellenbaum if (ksid) 1346c1ce5987SMark Shellenbaum uid = ksid_getid(ksid); 1347c1ce5987SMark Shellenbaum else 1348c1ce5987SMark Shellenbaum uid = crgetuid(cr); 1349c1ce5987SMark Shellenbaum 1350da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 1351da6c28aaSamw (vsecp || (vap->va_mask & AT_XVATTR) || 1352c1ce5987SMark Shellenbaum IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1353be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1354fa9e4066Sahrens 13553cb34c60Sahrens ZFS_ENTER(zfsvfs); 13563cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1357f18faf3fSek os = zfsvfs->z_os; 1358f18faf3fSek zilog = zfsvfs->z_log; 1359fa9e4066Sahrens 1360de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1361da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1362da6c28aaSamw ZFS_EXIT(zfsvfs); 1363be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 1364da6c28aaSamw } 1365da6c28aaSamw 1366da6c28aaSamw if (vap->va_mask & AT_XVATTR) { 1367da6c28aaSamw if ((error = secpolicy_xvattr((xvattr_t *)vap, 1368da6c28aaSamw crgetuid(cr), cr, vap->va_type)) != 0) { 1369da6c28aaSamw ZFS_EXIT(zfsvfs); 1370da6c28aaSamw return (error); 1371da6c28aaSamw } 1372da6c28aaSamw } 1373fa9e4066Sahrens top: 1374fa9e4066Sahrens *vpp = NULL; 1375fa9e4066Sahrens 1376fa9e4066Sahrens if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr)) 1377fa9e4066Sahrens vap->va_mode &= ~VSVTX; 1378fa9e4066Sahrens 1379fa9e4066Sahrens if (*name == '\0') { 1380fa9e4066Sahrens /* 1381fa9e4066Sahrens * Null component name refers to the directory itself. 1382fa9e4066Sahrens */ 1383fa9e4066Sahrens VN_HOLD(dvp); 1384fa9e4066Sahrens zp = dzp; 1385fa9e4066Sahrens dl = NULL; 1386fa9e4066Sahrens error = 0; 1387fa9e4066Sahrens } else { 1388fa9e4066Sahrens /* possible VN_HOLD(zp) */ 1389da6c28aaSamw int zflg = 0; 1390da6c28aaSamw 1391da6c28aaSamw if (flag & FIGNORECASE) 1392da6c28aaSamw zflg |= ZCILOOK; 1393da6c28aaSamw 1394da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1395da6c28aaSamw NULL, NULL); 1396da6c28aaSamw if (error) { 13970b2a8171SMark Shellenbaum if (have_acl) 13980b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1399fa9e4066Sahrens if (strcmp(name, "..") == 0) 1400be6fd75aSMatthew Ahrens error = SET_ERROR(EISDIR); 1401fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1402fa9e4066Sahrens return (error); 1403fa9e4066Sahrens } 1404fa9e4066Sahrens } 14050a586ceaSMark Shellenbaum 1406fa9e4066Sahrens if (zp == NULL) { 1407da6c28aaSamw uint64_t txtype; 1408da6c28aaSamw 1409fa9e4066Sahrens /* 1410fa9e4066Sahrens * Create a new file object and update the directory 1411fa9e4066Sahrens * to reference it. 1412fa9e4066Sahrens */ 1413da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 14140b2a8171SMark Shellenbaum if (have_acl) 14150b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1416fa9e4066Sahrens goto out; 1417fa9e4066Sahrens } 1418fa9e4066Sahrens 1419fa9e4066Sahrens /* 1420fa9e4066Sahrens * We only support the creation of regular files in 1421fa9e4066Sahrens * extended attribute directories. 1422fa9e4066Sahrens */ 14230a586ceaSMark Shellenbaum 14240a586ceaSMark Shellenbaum if ((dzp->z_pflags & ZFS_XATTR) && 1425fa9e4066Sahrens (vap->va_type != VREG)) { 14260b2a8171SMark Shellenbaum if (have_acl) 14270b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1428be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 1429fa9e4066Sahrens goto out; 1430fa9e4066Sahrens } 1431fa9e4066Sahrens 1432c8c24165SMark Shellenbaum if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, 1433c8c24165SMark Shellenbaum cr, vsecp, &acl_ids)) != 0) 143489459e17SMark Shellenbaum goto out; 1435c8c24165SMark Shellenbaum have_acl = B_TRUE; 1436c8c24165SMark Shellenbaum 143714843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 14384929fd5eSTim Haley zfs_acl_ids_free(&acl_ids); 1439be6fd75aSMatthew Ahrens error = SET_ERROR(EDQUOT); 144014843421SMatthew Ahrens goto out; 144114843421SMatthew Ahrens } 144289459e17SMark Shellenbaum 1443fa9e4066Sahrens tx = dmu_tx_create(os); 14440a586ceaSMark Shellenbaum 14450a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 14460a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE); 14470a586ceaSMark Shellenbaum 144889459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 144914843421SMatthew Ahrens if (fuid_dirtied) 145014843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 1451ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 14520a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 14530a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && 14540a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1455fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 14560a586ceaSMark Shellenbaum 0, acl_ids.z_aclp->z_acl_bytes); 1457da6c28aaSamw } 145869962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 1459fa9e4066Sahrens if (error) { 1460fa9e4066Sahrens zfs_dirent_unlock(dl); 14611209a471SNeil Perrin if (error == ERESTART) { 146269962b56SMatthew Ahrens waited = B_TRUE; 14638a2f1b91Sahrens dmu_tx_wait(tx); 14648a2f1b91Sahrens dmu_tx_abort(tx); 1465fa9e4066Sahrens goto top; 1466fa9e4066Sahrens } 1467c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 14688a2f1b91Sahrens dmu_tx_abort(tx); 1469fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1470fa9e4066Sahrens return (error); 1471fa9e4066Sahrens } 14720a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 147389459e17SMark Shellenbaum 147489459e17SMark Shellenbaum if (fuid_dirtied) 147589459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 147689459e17SMark Shellenbaum 1477fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 1478da6c28aaSamw txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1479da6c28aaSamw if (flag & FIGNORECASE) 1480da6c28aaSamw txtype |= TX_CI; 1481da6c28aaSamw zfs_log_create(zilog, tx, txtype, dzp, zp, name, 148289459e17SMark Shellenbaum vsecp, acl_ids.z_fuidp, vap); 148389459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1484fa9e4066Sahrens dmu_tx_commit(tx); 1485fa9e4066Sahrens } else { 1486da6c28aaSamw int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1487da6c28aaSamw 14880b2a8171SMark Shellenbaum if (have_acl) 14890b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 14900b2a8171SMark Shellenbaum have_acl = B_FALSE; 14910b2a8171SMark Shellenbaum 1492fa9e4066Sahrens /* 1493fa9e4066Sahrens * A directory entry already exists for this name. 1494fa9e4066Sahrens */ 1495fa9e4066Sahrens /* 1496fa9e4066Sahrens * Can't truncate an existing file if in exclusive mode. 1497fa9e4066Sahrens */ 1498fa9e4066Sahrens if (excl == EXCL) { 1499be6fd75aSMatthew Ahrens error = SET_ERROR(EEXIST); 1500fa9e4066Sahrens goto out; 1501fa9e4066Sahrens } 1502fa9e4066Sahrens /* 1503fa9e4066Sahrens * Can't open a directory for writing. 1504fa9e4066Sahrens */ 1505fa9e4066Sahrens if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1506be6fd75aSMatthew Ahrens error = SET_ERROR(EISDIR); 1507fa9e4066Sahrens goto out; 1508fa9e4066Sahrens } 1509fa9e4066Sahrens /* 1510fa9e4066Sahrens * Verify requested access to file. 1511fa9e4066Sahrens */ 1512da6c28aaSamw if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1513fa9e4066Sahrens goto out; 1514fa9e4066Sahrens } 1515fa9e4066Sahrens 1516fa9e4066Sahrens mutex_enter(&dzp->z_lock); 1517fa9e4066Sahrens dzp->z_seq++; 1518fa9e4066Sahrens mutex_exit(&dzp->z_lock); 1519fa9e4066Sahrens 15205730cc9aSmaybee /* 15215730cc9aSmaybee * Truncate regular files if requested. 15225730cc9aSmaybee */ 15235730cc9aSmaybee if ((ZTOV(zp)->v_type == VREG) && 1524fa9e4066Sahrens (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1525cdb0ab79Smaybee /* we can't hold any locks when calling zfs_freesp() */ 1526cdb0ab79Smaybee zfs_dirent_unlock(dl); 1527cdb0ab79Smaybee dl = NULL; 15285730cc9aSmaybee error = zfs_freesp(zp, 0, 0, mode, TRUE); 1529df2381bfSpraks if (error == 0) { 1530da6c28aaSamw vnevent_create(ZTOV(zp), ct); 1531df2381bfSpraks } 1532fa9e4066Sahrens } 1533fa9e4066Sahrens } 1534fa9e4066Sahrens out: 1535fa9e4066Sahrens 1536fa9e4066Sahrens if (dl) 1537fa9e4066Sahrens zfs_dirent_unlock(dl); 1538fa9e4066Sahrens 1539fa9e4066Sahrens if (error) { 1540fa9e4066Sahrens if (zp) 1541fa9e4066Sahrens VN_RELE(ZTOV(zp)); 1542fa9e4066Sahrens } else { 1543fa9e4066Sahrens *vpp = ZTOV(zp); 1544d47621a4STim Haley error = specvp_check(vpp, cr); 1545fa9e4066Sahrens } 1546fa9e4066Sahrens 154755da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 15485002558fSNeil Perrin zil_commit(zilog, 0); 154955da60b9SMark J Musante 1550fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1551fa9e4066Sahrens return (error); 1552fa9e4066Sahrens } 1553fa9e4066Sahrens 1554fa9e4066Sahrens /* 1555fa9e4066Sahrens * Remove an entry from a directory. 1556fa9e4066Sahrens * 1557fa9e4066Sahrens * IN: dvp - vnode of directory to remove entry from. 1558fa9e4066Sahrens * name - name of entry to remove. 1559fa9e4066Sahrens * cr - credentials of caller. 1560da6c28aaSamw * ct - caller context 1561da6c28aaSamw * flags - case flags 1562fa9e4066Sahrens * 1563f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1564fa9e4066Sahrens * 1565fa9e4066Sahrens * Timestamps: 1566fa9e4066Sahrens * dvp - ctime|mtime 1567fa9e4066Sahrens * vp - ctime (if nlink > 0) 1568fa9e4066Sahrens */ 15690a586ceaSMark Shellenbaum 15700a586ceaSMark Shellenbaum uint64_t null_xattr = 0; 15710a586ceaSMark Shellenbaum 1572da6c28aaSamw /*ARGSUSED*/ 1573fa9e4066Sahrens static int 1574da6c28aaSamw zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1575da6c28aaSamw int flags) 1576fa9e4066Sahrens { 1577fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 15780b2a8171SMark Shellenbaum znode_t *xzp; 1579fa9e4066Sahrens vnode_t *vp; 1580fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1581f18faf3fSek zilog_t *zilog; 15820b2a8171SMark Shellenbaum uint64_t acl_obj, xattr_obj; 15834bb73804SMatthew Ahrens uint64_t xattr_obj_unlinked = 0; 158451bd2f97SNeil Perrin uint64_t obj = 0; 1585fa9e4066Sahrens zfs_dirlock_t *dl; 1586fa9e4066Sahrens dmu_tx_t *tx; 1587893a6d32Sahrens boolean_t may_delete_now, delete_now = FALSE; 1588cdb0ab79Smaybee boolean_t unlinked, toobig = FALSE; 1589da6c28aaSamw uint64_t txtype; 1590da6c28aaSamw pathname_t *realnmp = NULL; 1591da6c28aaSamw pathname_t realnm; 1592fa9e4066Sahrens int error; 1593da6c28aaSamw int zflg = ZEXISTS; 159469962b56SMatthew Ahrens boolean_t waited = B_FALSE; 1595fa9e4066Sahrens 15963cb34c60Sahrens ZFS_ENTER(zfsvfs); 15973cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1598f18faf3fSek zilog = zfsvfs->z_log; 1599fa9e4066Sahrens 1600da6c28aaSamw if (flags & FIGNORECASE) { 1601da6c28aaSamw zflg |= ZCILOOK; 1602da6c28aaSamw pn_alloc(&realnm); 1603da6c28aaSamw realnmp = &realnm; 1604da6c28aaSamw } 1605da6c28aaSamw 1606fa9e4066Sahrens top: 16070b2a8171SMark Shellenbaum xattr_obj = 0; 16080b2a8171SMark Shellenbaum xzp = NULL; 1609fa9e4066Sahrens /* 1610fa9e4066Sahrens * Attempt to lock directory; fail if entry doesn't exist. 1611fa9e4066Sahrens */ 1612da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1613da6c28aaSamw NULL, realnmp)) { 1614da6c28aaSamw if (realnmp) 1615da6c28aaSamw pn_free(realnmp); 1616fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1617fa9e4066Sahrens return (error); 1618fa9e4066Sahrens } 1619fa9e4066Sahrens 1620fa9e4066Sahrens vp = ZTOV(zp); 1621fa9e4066Sahrens 1622fa9e4066Sahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1623fa9e4066Sahrens goto out; 1624fa9e4066Sahrens } 1625fa9e4066Sahrens 1626fa9e4066Sahrens /* 1627fa9e4066Sahrens * Need to use rmdir for removing directories. 1628fa9e4066Sahrens */ 1629fa9e4066Sahrens if (vp->v_type == VDIR) { 1630be6fd75aSMatthew Ahrens error = SET_ERROR(EPERM); 1631fa9e4066Sahrens goto out; 1632fa9e4066Sahrens } 1633fa9e4066Sahrens 1634da6c28aaSamw vnevent_remove(vp, dvp, name, ct); 1635fa9e4066Sahrens 1636da6c28aaSamw if (realnmp) 1637ab04eb8eStimh dnlc_remove(dvp, realnmp->pn_buf); 1638da6c28aaSamw else 1639da6c28aaSamw dnlc_remove(dvp, name); 1640033f9833Sek 1641fa9e4066Sahrens mutex_enter(&vp->v_lock); 1642fa9e4066Sahrens may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1643fa9e4066Sahrens mutex_exit(&vp->v_lock); 1644fa9e4066Sahrens 1645fa9e4066Sahrens /* 1646893a6d32Sahrens * We may delete the znode now, or we may put it in the unlinked set; 1647fa9e4066Sahrens * it depends on whether we're the last link, and on whether there are 1648fa9e4066Sahrens * other holds on the vnode. So we dmu_tx_hold() the right things to 1649fa9e4066Sahrens * allow for either case. 1650fa9e4066Sahrens */ 165151bd2f97SNeil Perrin obj = zp->z_id; 1652fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 1653ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 16540a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 16550a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 16560a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 1657cdb0ab79Smaybee if (may_delete_now) { 1658cdb0ab79Smaybee toobig = 16590a586ceaSMark Shellenbaum zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1660cdb0ab79Smaybee /* if the file is too big, only hold_free a token amount */ 1661cdb0ab79Smaybee dmu_tx_hold_free(tx, zp->z_id, 0, 1662cdb0ab79Smaybee (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1663cdb0ab79Smaybee } 1664fa9e4066Sahrens 1665fa9e4066Sahrens /* are there any extended attributes? */ 16660a586ceaSMark Shellenbaum error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 16670a586ceaSMark Shellenbaum &xattr_obj, sizeof (xattr_obj)); 16680b2a8171SMark Shellenbaum if (error == 0 && xattr_obj) { 16690a586ceaSMark Shellenbaum error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1670fb09f5aaSMadhav Suresh ASSERT0(error); 16710a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 16720a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1673fa9e4066Sahrens } 1674fa9e4066Sahrens 16751412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 16761412a1a2SMark Shellenbaum if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) 1677fa9e4066Sahrens dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 16781412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 1679fa9e4066Sahrens 1680fa9e4066Sahrens /* charge as an update -- would be nice not to charge at all */ 1681893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1682fa9e4066Sahrens 16834bb73804SMatthew Ahrens /* 16844bb73804SMatthew Ahrens * Mark this transaction as typically resulting in a net free of 16854bb73804SMatthew Ahrens * space, unless object removal will be delayed indefinitely 16864bb73804SMatthew Ahrens * (due to active holds on the vnode due to the file being open). 16874bb73804SMatthew Ahrens */ 16884bb73804SMatthew Ahrens if (may_delete_now) 16894bb73804SMatthew Ahrens dmu_tx_mark_netfree(tx); 16904bb73804SMatthew Ahrens 169169962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 1692fa9e4066Sahrens if (error) { 1693fa9e4066Sahrens zfs_dirent_unlock(dl); 1694fa9e4066Sahrens VN_RELE(vp); 16950b2a8171SMark Shellenbaum if (xzp) 16960b2a8171SMark Shellenbaum VN_RELE(ZTOV(xzp)); 16971209a471SNeil Perrin if (error == ERESTART) { 169869962b56SMatthew Ahrens waited = B_TRUE; 16998a2f1b91Sahrens dmu_tx_wait(tx); 17008a2f1b91Sahrens dmu_tx_abort(tx); 1701fa9e4066Sahrens goto top; 1702fa9e4066Sahrens } 1703da6c28aaSamw if (realnmp) 1704da6c28aaSamw pn_free(realnmp); 17058a2f1b91Sahrens dmu_tx_abort(tx); 1706fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1707fa9e4066Sahrens return (error); 1708fa9e4066Sahrens } 1709fa9e4066Sahrens 1710fa9e4066Sahrens /* 1711fa9e4066Sahrens * Remove the directory entry. 1712fa9e4066Sahrens */ 1713da6c28aaSamw error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1714fa9e4066Sahrens 1715fa9e4066Sahrens if (error) { 1716fa9e4066Sahrens dmu_tx_commit(tx); 1717fa9e4066Sahrens goto out; 1718fa9e4066Sahrens } 1719fa9e4066Sahrens 1720893a6d32Sahrens if (unlinked) { 17211412a1a2SMark Shellenbaum /* 17221412a1a2SMark Shellenbaum * Hold z_lock so that we can make sure that the ACL obj 17231412a1a2SMark Shellenbaum * hasn't changed. Could have been deleted due to 17241412a1a2SMark Shellenbaum * zfs_sa_upgrade(). 17251412a1a2SMark Shellenbaum */ 17261412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 1727fa9e4066Sahrens mutex_enter(&vp->v_lock); 17280a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 17290a586ceaSMark Shellenbaum &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); 1730cdb0ab79Smaybee delete_now = may_delete_now && !toobig && 1731fa9e4066Sahrens vp->v_count == 1 && !vn_has_cached_data(vp) && 17321412a1a2SMark Shellenbaum xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == 17330a586ceaSMark Shellenbaum acl_obj; 1734fa9e4066Sahrens mutex_exit(&vp->v_lock); 1735fa9e4066Sahrens } 1736fa9e4066Sahrens 1737fa9e4066Sahrens if (delete_now) { 17380a586ceaSMark Shellenbaum if (xattr_obj_unlinked) { 17390a586ceaSMark Shellenbaum ASSERT3U(xzp->z_links, ==, 2); 1740fa9e4066Sahrens mutex_enter(&xzp->z_lock); 1741893a6d32Sahrens xzp->z_unlinked = 1; 17420a586ceaSMark Shellenbaum xzp->z_links = 0; 17430a586ceaSMark Shellenbaum error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 17440a586ceaSMark Shellenbaum &xzp->z_links, sizeof (xzp->z_links), tx); 17450a586ceaSMark Shellenbaum ASSERT3U(error, ==, 0); 1746fa9e4066Sahrens mutex_exit(&xzp->z_lock); 1747893a6d32Sahrens zfs_unlinked_add(xzp, tx); 17481412a1a2SMark Shellenbaum 17490a586ceaSMark Shellenbaum if (zp->z_is_sa) 17500a586ceaSMark Shellenbaum error = sa_remove(zp->z_sa_hdl, 17510a586ceaSMark Shellenbaum SA_ZPL_XATTR(zfsvfs), tx); 17520a586ceaSMark Shellenbaum else 17530a586ceaSMark Shellenbaum error = sa_update(zp->z_sa_hdl, 17540a586ceaSMark Shellenbaum SA_ZPL_XATTR(zfsvfs), &null_xattr, 17550a586ceaSMark Shellenbaum sizeof (uint64_t), tx); 1756fb09f5aaSMadhav Suresh ASSERT0(error); 1757fa9e4066Sahrens } 1758fa9e4066Sahrens mutex_enter(&vp->v_lock); 1759fa9e4066Sahrens vp->v_count--; 1760fb09f5aaSMadhav Suresh ASSERT0(vp->v_count); 1761fa9e4066Sahrens mutex_exit(&vp->v_lock); 1762fa9e4066Sahrens mutex_exit(&zp->z_lock); 1763fa9e4066Sahrens zfs_znode_delete(zp, tx); 1764893a6d32Sahrens } else if (unlinked) { 17651412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 1766893a6d32Sahrens zfs_unlinked_add(zp, tx); 1767fa9e4066Sahrens } 1768fa9e4066Sahrens 1769da6c28aaSamw txtype = TX_REMOVE; 1770da6c28aaSamw if (flags & FIGNORECASE) 1771da6c28aaSamw txtype |= TX_CI; 177251bd2f97SNeil Perrin zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 1773fa9e4066Sahrens 1774fa9e4066Sahrens dmu_tx_commit(tx); 1775fa9e4066Sahrens out: 1776da6c28aaSamw if (realnmp) 1777da6c28aaSamw pn_free(realnmp); 1778da6c28aaSamw 1779fa9e4066Sahrens zfs_dirent_unlock(dl); 1780fa9e4066Sahrens 178106e0070dSMark Shellenbaum if (!delete_now) 1782fa9e4066Sahrens VN_RELE(vp); 178306e0070dSMark Shellenbaum if (xzp) 1784fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 1785fa9e4066Sahrens 178655da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 17875002558fSNeil Perrin zil_commit(zilog, 0); 178855da60b9SMark J Musante 1789fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1790fa9e4066Sahrens return (error); 1791fa9e4066Sahrens } 1792fa9e4066Sahrens 1793fa9e4066Sahrens /* 1794fa9e4066Sahrens * Create a new directory and insert it into dvp using the name 1795fa9e4066Sahrens * provided. Return a pointer to the inserted directory. 1796fa9e4066Sahrens * 1797fa9e4066Sahrens * IN: dvp - vnode of directory to add subdir to. 1798fa9e4066Sahrens * dirname - name of new directory. 1799fa9e4066Sahrens * vap - attributes of new directory. 1800fa9e4066Sahrens * cr - credentials of caller. 1801da6c28aaSamw * ct - caller context 1802f7170741SWill Andrews * flags - case flags 1803da6c28aaSamw * vsecp - ACL to be set 1804fa9e4066Sahrens * 1805fa9e4066Sahrens * OUT: vpp - vnode of created directory. 1806fa9e4066Sahrens * 1807f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1808fa9e4066Sahrens * 1809fa9e4066Sahrens * Timestamps: 1810fa9e4066Sahrens * dvp - ctime|mtime updated 1811fa9e4066Sahrens * vp - ctime|mtime|atime updated 1812fa9e4066Sahrens */ 1813da6c28aaSamw /*ARGSUSED*/ 1814fa9e4066Sahrens static int 1815da6c28aaSamw zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 1816da6c28aaSamw caller_context_t *ct, int flags, vsecattr_t *vsecp) 1817fa9e4066Sahrens { 1818fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1819fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1820f18faf3fSek zilog_t *zilog; 1821fa9e4066Sahrens zfs_dirlock_t *dl; 1822da6c28aaSamw uint64_t txtype; 1823fa9e4066Sahrens dmu_tx_t *tx; 1824fa9e4066Sahrens int error; 1825da6c28aaSamw int zf = ZNEW; 1826c1ce5987SMark Shellenbaum ksid_t *ksid; 1827c1ce5987SMark Shellenbaum uid_t uid; 1828c1ce5987SMark Shellenbaum gid_t gid = crgetgid(cr); 18290a586ceaSMark Shellenbaum zfs_acl_ids_t acl_ids; 183089459e17SMark Shellenbaum boolean_t fuid_dirtied; 183169962b56SMatthew Ahrens boolean_t waited = B_FALSE; 1832fa9e4066Sahrens 1833fa9e4066Sahrens ASSERT(vap->va_type == VDIR); 1834fa9e4066Sahrens 1835da6c28aaSamw /* 1836da6c28aaSamw * If we have an ephemeral id, ACL, or XVATTR then 1837da6c28aaSamw * make sure file system is at proper version 1838da6c28aaSamw */ 1839da6c28aaSamw 1840c1ce5987SMark Shellenbaum ksid = crgetsid(cr, KSID_OWNER); 1841c1ce5987SMark Shellenbaum if (ksid) 1842c1ce5987SMark Shellenbaum uid = ksid_getid(ksid); 1843c1ce5987SMark Shellenbaum else 1844c1ce5987SMark Shellenbaum uid = crgetuid(cr); 1845da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 1846c1ce5987SMark Shellenbaum (vsecp || (vap->va_mask & AT_XVATTR) || 1847756962ecSMark Shellenbaum IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1848be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1849da6c28aaSamw 18503cb34c60Sahrens ZFS_ENTER(zfsvfs); 18513cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1852f18faf3fSek zilog = zfsvfs->z_log; 1853fa9e4066Sahrens 18540a586ceaSMark Shellenbaum if (dzp->z_pflags & ZFS_XATTR) { 1855fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1856be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1857fa9e4066Sahrens } 1858da6c28aaSamw 1859de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(dirname, 1860da6c28aaSamw strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1861da6c28aaSamw ZFS_EXIT(zfsvfs); 1862be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 1863da6c28aaSamw } 1864da6c28aaSamw if (flags & FIGNORECASE) 1865da6c28aaSamw zf |= ZCILOOK; 1866da6c28aaSamw 1867c8c24165SMark Shellenbaum if (vap->va_mask & AT_XVATTR) { 1868da6c28aaSamw if ((error = secpolicy_xvattr((xvattr_t *)vap, 1869da6c28aaSamw crgetuid(cr), cr, vap->va_type)) != 0) { 1870da6c28aaSamw ZFS_EXIT(zfsvfs); 1871da6c28aaSamw return (error); 1872da6c28aaSamw } 1873c8c24165SMark Shellenbaum } 1874fa9e4066Sahrens 1875c8c24165SMark Shellenbaum if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 1876c8c24165SMark Shellenbaum vsecp, &acl_ids)) != 0) { 1877c8c24165SMark Shellenbaum ZFS_EXIT(zfsvfs); 1878c8c24165SMark Shellenbaum return (error); 1879c8c24165SMark Shellenbaum } 1880fa9e4066Sahrens /* 1881fa9e4066Sahrens * First make sure the new directory doesn't exist. 1882c8c24165SMark Shellenbaum * 1883c8c24165SMark Shellenbaum * Existence is checked first to make sure we don't return 1884c8c24165SMark Shellenbaum * EACCES instead of EEXIST which can cause some applications 1885c8c24165SMark Shellenbaum * to fail. 1886fa9e4066Sahrens */ 1887da6c28aaSamw top: 1888da6c28aaSamw *vpp = NULL; 1889da6c28aaSamw 1890da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 1891da6c28aaSamw NULL, NULL)) { 1892c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1893fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1894fa9e4066Sahrens return (error); 1895fa9e4066Sahrens } 1896fa9e4066Sahrens 1897da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 1898c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1899d2443e76Smarks zfs_dirent_unlock(dl); 1900d2443e76Smarks ZFS_EXIT(zfsvfs); 1901d2443e76Smarks return (error); 1902d2443e76Smarks } 1903d2443e76Smarks 190414843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 19054929fd5eSTim Haley zfs_acl_ids_free(&acl_ids); 190614843421SMatthew Ahrens zfs_dirent_unlock(dl); 190714843421SMatthew Ahrens ZFS_EXIT(zfsvfs); 1908be6fd75aSMatthew Ahrens return (SET_ERROR(EDQUOT)); 190914843421SMatthew Ahrens } 191089459e17SMark Shellenbaum 1911fa9e4066Sahrens /* 1912fa9e4066Sahrens * Add a new entry to the directory. 1913fa9e4066Sahrens */ 1914fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 1915ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1916ea8dc4b6Seschrock dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 191789459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 191814843421SMatthew Ahrens if (fuid_dirtied) 191914843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 19200a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 19210a586ceaSMark Shellenbaum dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 19220a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes); 19230a586ceaSMark Shellenbaum } 19240a586ceaSMark Shellenbaum 19250a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 19260a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE); 19270a586ceaSMark Shellenbaum 192869962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 1929fa9e4066Sahrens if (error) { 1930fa9e4066Sahrens zfs_dirent_unlock(dl); 19311209a471SNeil Perrin if (error == ERESTART) { 193269962b56SMatthew Ahrens waited = B_TRUE; 19338a2f1b91Sahrens dmu_tx_wait(tx); 19348a2f1b91Sahrens dmu_tx_abort(tx); 1935fa9e4066Sahrens goto top; 1936fa9e4066Sahrens } 1937c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 19388a2f1b91Sahrens dmu_tx_abort(tx); 1939fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1940fa9e4066Sahrens return (error); 1941fa9e4066Sahrens } 1942fa9e4066Sahrens 1943fa9e4066Sahrens /* 1944fa9e4066Sahrens * Create new node. 1945fa9e4066Sahrens */ 19460a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1947fa9e4066Sahrens 194889459e17SMark Shellenbaum if (fuid_dirtied) 194989459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 19500a586ceaSMark Shellenbaum 1951fa9e4066Sahrens /* 1952fa9e4066Sahrens * Now put new name in parent dir. 1953fa9e4066Sahrens */ 1954fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 1955fa9e4066Sahrens 1956fa9e4066Sahrens *vpp = ZTOV(zp); 1957fa9e4066Sahrens 1958da6c28aaSamw txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 1959da6c28aaSamw if (flags & FIGNORECASE) 1960da6c28aaSamw txtype |= TX_CI; 196189459e17SMark Shellenbaum zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 196289459e17SMark Shellenbaum acl_ids.z_fuidp, vap); 1963da6c28aaSamw 196489459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 19650a586ceaSMark Shellenbaum 1966fa9e4066Sahrens dmu_tx_commit(tx); 1967fa9e4066Sahrens 1968fa9e4066Sahrens zfs_dirent_unlock(dl); 1969fa9e4066Sahrens 197055da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 19715002558fSNeil Perrin zil_commit(zilog, 0); 197255da60b9SMark J Musante 1973fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1974fa9e4066Sahrens return (0); 1975fa9e4066Sahrens } 1976fa9e4066Sahrens 1977fa9e4066Sahrens /* 1978fa9e4066Sahrens * Remove a directory subdir entry. If the current working 1979fa9e4066Sahrens * directory is the same as the subdir to be removed, the 1980fa9e4066Sahrens * remove will fail. 1981fa9e4066Sahrens * 1982fa9e4066Sahrens * IN: dvp - vnode of directory to remove from. 1983fa9e4066Sahrens * name - name of directory to be removed. 1984fa9e4066Sahrens * cwd - vnode of current working directory. 1985fa9e4066Sahrens * cr - credentials of caller. 1986da6c28aaSamw * ct - caller context 1987da6c28aaSamw * flags - case flags 1988fa9e4066Sahrens * 1989f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1990fa9e4066Sahrens * 1991fa9e4066Sahrens * Timestamps: 1992fa9e4066Sahrens * dvp - ctime|mtime updated 1993fa9e4066Sahrens */ 1994da6c28aaSamw /*ARGSUSED*/ 1995fa9e4066Sahrens static int 1996da6c28aaSamw zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 1997da6c28aaSamw caller_context_t *ct, int flags) 1998fa9e4066Sahrens { 1999fa9e4066Sahrens znode_t *dzp = VTOZ(dvp); 2000fa9e4066Sahrens znode_t *zp; 2001fa9e4066Sahrens vnode_t *vp; 2002fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2003f18faf3fSek zilog_t *zilog; 2004fa9e4066Sahrens zfs_dirlock_t *dl; 2005fa9e4066Sahrens dmu_tx_t *tx; 2006fa9e4066Sahrens int error; 2007da6c28aaSamw int zflg = ZEXISTS; 200869962b56SMatthew Ahrens boolean_t waited = B_FALSE; 2009fa9e4066Sahrens 20103cb34c60Sahrens ZFS_ENTER(zfsvfs); 20113cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 2012f18faf3fSek zilog = zfsvfs->z_log; 2013fa9e4066Sahrens 2014da6c28aaSamw if (flags & FIGNORECASE) 2015da6c28aaSamw zflg |= ZCILOOK; 2016fa9e4066Sahrens top: 2017fa9e4066Sahrens zp = NULL; 2018fa9e4066Sahrens 2019fa9e4066Sahrens /* 2020fa9e4066Sahrens * Attempt to lock directory; fail if entry doesn't exist. 2021fa9e4066Sahrens */ 2022da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 2023da6c28aaSamw NULL, NULL)) { 2024fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2025fa9e4066Sahrens return (error); 2026fa9e4066Sahrens } 2027fa9e4066Sahrens 2028fa9e4066Sahrens vp = ZTOV(zp); 2029fa9e4066Sahrens 2030fa9e4066Sahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2031fa9e4066Sahrens goto out; 2032fa9e4066Sahrens } 2033fa9e4066Sahrens 2034fa9e4066Sahrens if (vp->v_type != VDIR) { 2035be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTDIR); 2036fa9e4066Sahrens goto out; 2037fa9e4066Sahrens } 2038fa9e4066Sahrens 2039fa9e4066Sahrens if (vp == cwd) { 2040be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 2041fa9e4066Sahrens goto out; 2042fa9e4066Sahrens } 2043fa9e4066Sahrens 2044da6c28aaSamw vnevent_rmdir(vp, dvp, name, ct); 2045fa9e4066Sahrens 2046fa9e4066Sahrens /* 2047af2c4821Smaybee * Grab a lock on the directory to make sure that noone is 2048af2c4821Smaybee * trying to add (or lookup) entries while we are removing it. 2049af2c4821Smaybee */ 2050af2c4821Smaybee rw_enter(&zp->z_name_lock, RW_WRITER); 2051af2c4821Smaybee 2052af2c4821Smaybee /* 2053af2c4821Smaybee * Grab a lock on the parent pointer to make sure we play well 2054fa9e4066Sahrens * with the treewalk and directory rename code. 2055fa9e4066Sahrens */ 2056fa9e4066Sahrens rw_enter(&zp->z_parent_lock, RW_WRITER); 2057fa9e4066Sahrens 2058fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 2059ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 20600a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2061893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 20620a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 20630a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 206469962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 2065fa9e4066Sahrens if (error) { 2066fa9e4066Sahrens rw_exit(&zp->z_parent_lock); 2067af2c4821Smaybee rw_exit(&zp->z_name_lock); 2068fa9e4066Sahrens zfs_dirent_unlock(dl); 2069fa9e4066Sahrens VN_RELE(vp); 20701209a471SNeil Perrin if (error == ERESTART) { 207169962b56SMatthew Ahrens waited = B_TRUE; 20728a2f1b91Sahrens dmu_tx_wait(tx); 20738a2f1b91Sahrens dmu_tx_abort(tx); 2074fa9e4066Sahrens goto top; 2075fa9e4066Sahrens } 20768a2f1b91Sahrens dmu_tx_abort(tx); 2077fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2078fa9e4066Sahrens return (error); 2079fa9e4066Sahrens } 2080fa9e4066Sahrens 2081da6c28aaSamw error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2082fa9e4066Sahrens 2083da6c28aaSamw if (error == 0) { 2084da6c28aaSamw uint64_t txtype = TX_RMDIR; 2085da6c28aaSamw if (flags & FIGNORECASE) 2086da6c28aaSamw txtype |= TX_CI; 20875002558fSNeil Perrin zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2088da6c28aaSamw } 2089fa9e4066Sahrens 2090fa9e4066Sahrens dmu_tx_commit(tx); 2091fa9e4066Sahrens 2092fa9e4066Sahrens rw_exit(&zp->z_parent_lock); 2093af2c4821Smaybee rw_exit(&zp->z_name_lock); 2094fa9e4066Sahrens out: 2095fa9e4066Sahrens zfs_dirent_unlock(dl); 2096fa9e4066Sahrens 2097fa9e4066Sahrens VN_RELE(vp); 2098fa9e4066Sahrens 209955da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 21005002558fSNeil Perrin zil_commit(zilog, 0); 210155da60b9SMark J Musante 2102fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2103fa9e4066Sahrens return (error); 2104fa9e4066Sahrens } 2105fa9e4066Sahrens 2106fa9e4066Sahrens /* 2107fa9e4066Sahrens * Read as many directory entries as will fit into the provided 2108fa9e4066Sahrens * buffer from the given directory cursor position (specified in 2109f7170741SWill Andrews * the uio structure). 2110fa9e4066Sahrens * 2111fa9e4066Sahrens * IN: vp - vnode of directory to read. 2112fa9e4066Sahrens * uio - structure supplying read location, range info, 2113fa9e4066Sahrens * and return buffer. 2114fa9e4066Sahrens * cr - credentials of caller. 2115da6c28aaSamw * ct - caller context 2116da6c28aaSamw * flags - case flags 2117fa9e4066Sahrens * 2118fa9e4066Sahrens * OUT: uio - updated offset and range, buffer filled. 2119fa9e4066Sahrens * eofp - set to true if end-of-file detected. 2120fa9e4066Sahrens * 2121f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 2122fa9e4066Sahrens * 2123fa9e4066Sahrens * Timestamps: 2124fa9e4066Sahrens * vp - atime updated 2125fa9e4066Sahrens * 2126fa9e4066Sahrens * Note that the low 4 bits of the cookie returned by zap is always zero. 2127fa9e4066Sahrens * This allows us to use the low range for "special" directory entries: 2128fa9e4066Sahrens * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2129fa9e4066Sahrens * we use the offset 2 for the '.zfs' directory. 2130fa9e4066Sahrens */ 2131fa9e4066Sahrens /* ARGSUSED */ 2132fa9e4066Sahrens static int 2133da6c28aaSamw zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, 2134da6c28aaSamw caller_context_t *ct, int flags) 2135fa9e4066Sahrens { 2136fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2137fa9e4066Sahrens iovec_t *iovp; 2138da6c28aaSamw edirent_t *eodp; 2139fa9e4066Sahrens dirent64_t *odp; 2140fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 21417f6e3e7dSperrin objset_t *os; 2142fa9e4066Sahrens caddr_t outbuf; 2143fa9e4066Sahrens size_t bufsize; 2144fa9e4066Sahrens zap_cursor_t zc; 2145fa9e4066Sahrens zap_attribute_t zap; 2146fa9e4066Sahrens uint_t bytes_wanted; 2147fa9e4066Sahrens uint64_t offset; /* must be unsigned; checks for < 1 */ 21480a586ceaSMark Shellenbaum uint64_t parent; 2149fa9e4066Sahrens int local_eof; 21507f6e3e7dSperrin int outcount; 21517f6e3e7dSperrin int error; 21527f6e3e7dSperrin uint8_t prefetch; 2153b38f0970Sck boolean_t check_sysattrs; 2154fa9e4066Sahrens 21553cb34c60Sahrens ZFS_ENTER(zfsvfs); 21563cb34c60Sahrens ZFS_VERIFY_ZP(zp); 2157fa9e4066Sahrens 21580a586ceaSMark Shellenbaum if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 21590a586ceaSMark Shellenbaum &parent, sizeof (parent))) != 0) { 21600a586ceaSMark Shellenbaum ZFS_EXIT(zfsvfs); 21610a586ceaSMark Shellenbaum return (error); 21620a586ceaSMark Shellenbaum } 21630a586ceaSMark Shellenbaum 2164fa9e4066Sahrens /* 2165fa9e4066Sahrens * If we are not given an eof variable, 2166fa9e4066Sahrens * use a local one. 2167fa9e4066Sahrens */ 2168fa9e4066Sahrens if (eofp == NULL) 2169fa9e4066Sahrens eofp = &local_eof; 2170fa9e4066Sahrens 2171fa9e4066Sahrens /* 2172fa9e4066Sahrens * Check for valid iov_len. 2173fa9e4066Sahrens */ 2174fa9e4066Sahrens if (uio->uio_iov->iov_len <= 0) { 2175fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2176be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 2177fa9e4066Sahrens } 2178fa9e4066Sahrens 2179fa9e4066Sahrens /* 2180fa9e4066Sahrens * Quit if directory has been removed (posix) 2181fa9e4066Sahrens */ 2182893a6d32Sahrens if ((*eofp = zp->z_unlinked) != 0) { 2183fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2184fa9e4066Sahrens return (0); 2185fa9e4066Sahrens } 2186fa9e4066Sahrens 21877f6e3e7dSperrin error = 0; 21887f6e3e7dSperrin os = zfsvfs->z_os; 21897f6e3e7dSperrin offset = uio->uio_loffset; 21907f6e3e7dSperrin prefetch = zp->z_zn_prefetch; 21917f6e3e7dSperrin 2192fa9e4066Sahrens /* 2193fa9e4066Sahrens * Initialize the iterator cursor. 2194fa9e4066Sahrens */ 2195fa9e4066Sahrens if (offset <= 3) { 2196fa9e4066Sahrens /* 2197fa9e4066Sahrens * Start iteration from the beginning of the directory. 2198fa9e4066Sahrens */ 21997f6e3e7dSperrin zap_cursor_init(&zc, os, zp->z_id); 2200fa9e4066Sahrens } else { 2201fa9e4066Sahrens /* 2202fa9e4066Sahrens * The offset is a serialized cursor. 2203fa9e4066Sahrens */ 22047f6e3e7dSperrin zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2205fa9e4066Sahrens } 2206fa9e4066Sahrens 2207fa9e4066Sahrens /* 2208fa9e4066Sahrens * Get space to change directory entries into fs independent format. 2209fa9e4066Sahrens */ 2210fa9e4066Sahrens iovp = uio->uio_iov; 2211fa9e4066Sahrens bytes_wanted = iovp->iov_len; 2212fa9e4066Sahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2213fa9e4066Sahrens bufsize = bytes_wanted; 2214fa9e4066Sahrens outbuf = kmem_alloc(bufsize, KM_SLEEP); 2215fa9e4066Sahrens odp = (struct dirent64 *)outbuf; 2216fa9e4066Sahrens } else { 2217fa9e4066Sahrens bufsize = bytes_wanted; 2218d5285caeSGeorge Wilson outbuf = NULL; 2219fa9e4066Sahrens odp = (struct dirent64 *)iovp->iov_base; 2220fa9e4066Sahrens } 2221da6c28aaSamw eodp = (struct edirent *)odp; 2222fa9e4066Sahrens 2223b38f0970Sck /* 22249660e5cbSJanice Chang * If this VFS supports the system attribute view interface; and 22259660e5cbSJanice Chang * we're looking at an extended attribute directory; and we care 22269660e5cbSJanice Chang * about normalization conflicts on this vfs; then we must check 22279660e5cbSJanice Chang * for normalization conflicts with the sysattr name space. 2228b38f0970Sck */ 22299660e5cbSJanice Chang check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2230b38f0970Sck (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2231b38f0970Sck (flags & V_RDDIR_ENTFLAGS); 2232b38f0970Sck 2233fa9e4066Sahrens /* 2234fa9e4066Sahrens * Transform to file-system independent format 2235fa9e4066Sahrens */ 2236fa9e4066Sahrens outcount = 0; 2237fa9e4066Sahrens while (outcount < bytes_wanted) { 2238b1b8ab34Slling ino64_t objnum; 2239b1b8ab34Slling ushort_t reclen; 224097f85387STim Haley off64_t *next = NULL; 2241b1b8ab34Slling 2242fa9e4066Sahrens /* 2243fa9e4066Sahrens * Special case `.', `..', and `.zfs'. 2244fa9e4066Sahrens */ 2245fa9e4066Sahrens if (offset == 0) { 2246fa9e4066Sahrens (void) strcpy(zap.za_name, "."); 2247da6c28aaSamw zap.za_normalization_conflict = 0; 2248b1b8ab34Slling objnum = zp->z_id; 2249fa9e4066Sahrens } else if (offset == 1) { 2250fa9e4066Sahrens (void) strcpy(zap.za_name, ".."); 2251da6c28aaSamw zap.za_normalization_conflict = 0; 22520a586ceaSMark Shellenbaum objnum = parent; 2253fa9e4066Sahrens } else if (offset == 2 && zfs_show_ctldir(zp)) { 2254fa9e4066Sahrens (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2255da6c28aaSamw zap.za_normalization_conflict = 0; 2256b1b8ab34Slling objnum = ZFSCTL_INO_ROOT; 2257fa9e4066Sahrens } else { 2258fa9e4066Sahrens /* 2259fa9e4066Sahrens * Grab next entry. 2260fa9e4066Sahrens */ 2261fa9e4066Sahrens if (error = zap_cursor_retrieve(&zc, &zap)) { 2262fa9e4066Sahrens if ((*eofp = (error == ENOENT)) != 0) 2263fa9e4066Sahrens break; 2264fa9e4066Sahrens else 2265fa9e4066Sahrens goto update; 2266fa9e4066Sahrens } 2267fa9e4066Sahrens 2268fa9e4066Sahrens if (zap.za_integer_length != 8 || 2269fa9e4066Sahrens zap.za_num_integers != 1) { 2270fa9e4066Sahrens cmn_err(CE_WARN, "zap_readdir: bad directory " 2271fa9e4066Sahrens "entry, obj = %lld, offset = %lld\n", 2272fa9e4066Sahrens (u_longlong_t)zp->z_id, 2273fa9e4066Sahrens (u_longlong_t)offset); 2274be6fd75aSMatthew Ahrens error = SET_ERROR(ENXIO); 2275fa9e4066Sahrens goto update; 2276fa9e4066Sahrens } 2277b1b8ab34Slling 2278b1b8ab34Slling objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2279b1b8ab34Slling /* 2280b1b8ab34Slling * MacOS X can extract the object type here such as: 2281b1b8ab34Slling * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2282b1b8ab34Slling */ 2283b38f0970Sck 2284b38f0970Sck if (check_sysattrs && !zap.za_normalization_conflict) { 2285b38f0970Sck zap.za_normalization_conflict = 2286b38f0970Sck xattr_sysattr_casechk(zap.za_name); 2287b38f0970Sck } 2288fa9e4066Sahrens } 2289da6c28aaSamw 2290e802abbdSTim Haley if (flags & V_RDDIR_ACCFILTER) { 2291e802abbdSTim Haley /* 2292e802abbdSTim Haley * If we have no access at all, don't include 2293e802abbdSTim Haley * this entry in the returned information 2294e802abbdSTim Haley */ 2295e802abbdSTim Haley znode_t *ezp; 2296e802abbdSTim Haley if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2297e802abbdSTim Haley goto skip_entry; 2298e802abbdSTim Haley if (!zfs_has_access(ezp, cr)) { 2299e802abbdSTim Haley VN_RELE(ZTOV(ezp)); 2300e802abbdSTim Haley goto skip_entry; 2301e802abbdSTim Haley } 2302e802abbdSTim Haley VN_RELE(ZTOV(ezp)); 2303e802abbdSTim Haley } 2304e802abbdSTim Haley 2305da6c28aaSamw if (flags & V_RDDIR_ENTFLAGS) 2306da6c28aaSamw reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2307da6c28aaSamw else 2308da6c28aaSamw reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2309fa9e4066Sahrens 2310fa9e4066Sahrens /* 2311fa9e4066Sahrens * Will this entry fit in the buffer? 2312fa9e4066Sahrens */ 2313b1b8ab34Slling if (outcount + reclen > bufsize) { 2314fa9e4066Sahrens /* 2315fa9e4066Sahrens * Did we manage to fit anything in the buffer? 2316fa9e4066Sahrens */ 2317fa9e4066Sahrens if (!outcount) { 2318be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 2319fa9e4066Sahrens goto update; 2320fa9e4066Sahrens } 2321fa9e4066Sahrens break; 2322fa9e4066Sahrens } 2323da6c28aaSamw if (flags & V_RDDIR_ENTFLAGS) { 2324da6c28aaSamw /* 2325da6c28aaSamw * Add extended flag entry: 2326da6c28aaSamw */ 2327da6c28aaSamw eodp->ed_ino = objnum; 2328da6c28aaSamw eodp->ed_reclen = reclen; 2329da6c28aaSamw /* NOTE: ed_off is the offset for the *next* entry */ 2330da6c28aaSamw next = &(eodp->ed_off); 2331da6c28aaSamw eodp->ed_eflags = zap.za_normalization_conflict ? 2332da6c28aaSamw ED_CASE_CONFLICT : 0; 2333da6c28aaSamw (void) strncpy(eodp->ed_name, zap.za_name, 2334da6c28aaSamw EDIRENT_NAMELEN(reclen)); 2335da6c28aaSamw eodp = (edirent_t *)((intptr_t)eodp + reclen); 2336da6c28aaSamw } else { 2337da6c28aaSamw /* 2338da6c28aaSamw * Add normal entry: 2339da6c28aaSamw */ 2340da6c28aaSamw odp->d_ino = objnum; 2341da6c28aaSamw odp->d_reclen = reclen; 2342da6c28aaSamw /* NOTE: d_off is the offset for the *next* entry */ 2343da6c28aaSamw next = &(odp->d_off); 2344da6c28aaSamw (void) strncpy(odp->d_name, zap.za_name, 2345da6c28aaSamw DIRENT64_NAMELEN(reclen)); 2346da6c28aaSamw odp = (dirent64_t *)((intptr_t)odp + reclen); 2347da6c28aaSamw } 2348b1b8ab34Slling outcount += reclen; 2349fa9e4066Sahrens 2350fa9e4066Sahrens ASSERT(outcount <= bufsize); 2351fa9e4066Sahrens 2352fa9e4066Sahrens /* Prefetch znode */ 23537f6e3e7dSperrin if (prefetch) 2354b1b8ab34Slling dmu_prefetch(os, objnum, 0, 0); 2355fa9e4066Sahrens 2356e802abbdSTim Haley skip_entry: 2357fa9e4066Sahrens /* 2358fa9e4066Sahrens * Move to the next entry, fill in the previous offset. 2359fa9e4066Sahrens */ 2360fa9e4066Sahrens if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2361fa9e4066Sahrens zap_cursor_advance(&zc); 2362fa9e4066Sahrens offset = zap_cursor_serialize(&zc); 2363fa9e4066Sahrens } else { 2364fa9e4066Sahrens offset += 1; 2365fa9e4066Sahrens } 236697f85387STim Haley if (next) 236797f85387STim Haley *next = offset; 2368fa9e4066Sahrens } 23697f6e3e7dSperrin zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2370fa9e4066Sahrens 2371fa9e4066Sahrens if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2372fa9e4066Sahrens iovp->iov_base += outcount; 2373fa9e4066Sahrens iovp->iov_len -= outcount; 2374fa9e4066Sahrens uio->uio_resid -= outcount; 2375fa9e4066Sahrens } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2376fa9e4066Sahrens /* 2377fa9e4066Sahrens * Reset the pointer. 2378fa9e4066Sahrens */ 2379fa9e4066Sahrens offset = uio->uio_loffset; 2380fa9e4066Sahrens } 2381fa9e4066Sahrens 2382fa9e4066Sahrens update: 238387e5029aSahrens zap_cursor_fini(&zc); 2384fa9e4066Sahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2385fa9e4066Sahrens kmem_free(outbuf, bufsize); 2386fa9e4066Sahrens 2387fa9e4066Sahrens if (error == ENOENT) 2388fa9e4066Sahrens error = 0; 2389fa9e4066Sahrens 2390fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2391fa9e4066Sahrens 2392fa9e4066Sahrens uio->uio_loffset = offset; 2393fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2394fa9e4066Sahrens return (error); 2395fa9e4066Sahrens } 2396fa9e4066Sahrens 2397ec533521Sfr ulong_t zfs_fsync_sync_cnt = 4; 2398ec533521Sfr 2399fa9e4066Sahrens static int 2400da6c28aaSamw zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2401fa9e4066Sahrens { 2402fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2403fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2404fa9e4066Sahrens 2405b468a217Seschrock /* 2406b468a217Seschrock * Regardless of whether this is required for standards conformance, 2407b468a217Seschrock * this is the logical behavior when fsync() is called on a file with 2408b468a217Seschrock * dirty pages. We use B_ASYNC since the ZIL transactions are already 2409b468a217Seschrock * going to be pushed out as part of the zil_commit(). 2410b468a217Seschrock */ 2411b468a217Seschrock if (vn_has_cached_data(vp) && !(syncflag & FNODSYNC) && 2412b468a217Seschrock (vp->v_type == VREG) && !(IS_SWAPVP(vp))) 2413da6c28aaSamw (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_ASYNC, cr, ct); 2414b468a217Seschrock 2415ec533521Sfr (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2416ec533521Sfr 241755da60b9SMark J Musante if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 241855da60b9SMark J Musante ZFS_ENTER(zfsvfs); 241955da60b9SMark J Musante ZFS_VERIFY_ZP(zp); 24205002558fSNeil Perrin zil_commit(zfsvfs->z_log, zp->z_id); 242155da60b9SMark J Musante ZFS_EXIT(zfsvfs); 242255da60b9SMark J Musante } 2423fa9e4066Sahrens return (0); 2424fa9e4066Sahrens } 2425fa9e4066Sahrens 2426da6c28aaSamw 2427fa9e4066Sahrens /* 2428fa9e4066Sahrens * Get the requested file attributes and place them in the provided 2429fa9e4066Sahrens * vattr structure. 2430fa9e4066Sahrens * 2431fa9e4066Sahrens * IN: vp - vnode of file. 2432fa9e4066Sahrens * vap - va_mask identifies requested attributes. 2433da6c28aaSamw * If AT_XVATTR set, then optional attrs are requested 2434da6c28aaSamw * flags - ATTR_NOACLCHECK (CIFS server context) 2435fa9e4066Sahrens * cr - credentials of caller. 2436da6c28aaSamw * ct - caller context 2437fa9e4066Sahrens * 2438fa9e4066Sahrens * OUT: vap - attribute values. 2439fa9e4066Sahrens * 2440f7170741SWill Andrews * RETURN: 0 (always succeeds). 2441fa9e4066Sahrens */ 2442fa9e4066Sahrens /* ARGSUSED */ 2443fa9e4066Sahrens static int 2444da6c28aaSamw zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2445da6c28aaSamw caller_context_t *ct) 2446fa9e4066Sahrens { 2447fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2448fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2449da6c28aaSamw int error = 0; 2450ecd6cf80Smarks uint64_t links; 24510a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 2452da6c28aaSamw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2453da6c28aaSamw xoptattr_t *xoap = NULL; 2454da6c28aaSamw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 24550a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[2]; 24560a586ceaSMark Shellenbaum int count = 0; 2457fa9e4066Sahrens 24583cb34c60Sahrens ZFS_ENTER(zfsvfs); 24593cb34c60Sahrens ZFS_VERIFY_ZP(zp); 24600a586ceaSMark Shellenbaum 2461f1696b23SMark Shellenbaum zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2462f1696b23SMark Shellenbaum 24630a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 24640a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 24650a586ceaSMark Shellenbaum 24660a586ceaSMark Shellenbaum if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 24670a586ceaSMark Shellenbaum ZFS_EXIT(zfsvfs); 24680a586ceaSMark Shellenbaum return (error); 24690a586ceaSMark Shellenbaum } 2470fa9e4066Sahrens 2471da6c28aaSamw /* 2472da6c28aaSamw * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2473da6c28aaSamw * Also, if we are the owner don't bother, since owner should 2474da6c28aaSamw * always be allowed to read basic attributes of file. 2475da6c28aaSamw */ 2476f1696b23SMark Shellenbaum if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2477f1696b23SMark Shellenbaum (vap->va_uid != crgetuid(cr))) { 2478da6c28aaSamw if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2479da6c28aaSamw skipaclchk, cr)) { 2480da6c28aaSamw ZFS_EXIT(zfsvfs); 2481da6c28aaSamw return (error); 2482da6c28aaSamw } 2483da6c28aaSamw } 2484da6c28aaSamw 2485fa9e4066Sahrens /* 2486fa9e4066Sahrens * Return all attributes. It's cheaper to provide the answer 2487fa9e4066Sahrens * than to determine whether we were asked the question. 2488fa9e4066Sahrens */ 2489fa9e4066Sahrens 249034f345efSRay Hassan mutex_enter(&zp->z_lock); 2491fa9e4066Sahrens vap->va_type = vp->v_type; 24920a586ceaSMark Shellenbaum vap->va_mode = zp->z_mode & MODEMASK; 2493fa9e4066Sahrens vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2494fa9e4066Sahrens vap->va_nodeid = zp->z_id; 2495ecd6cf80Smarks if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 24960a586ceaSMark Shellenbaum links = zp->z_links + 1; 2497ecd6cf80Smarks else 24980a586ceaSMark Shellenbaum links = zp->z_links; 2499ecd6cf80Smarks vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ 25000a586ceaSMark Shellenbaum vap->va_size = zp->z_size; 250172fc53bcSmarks vap->va_rdev = vp->v_rdev; 2502fa9e4066Sahrens vap->va_seq = zp->z_seq; 2503fa9e4066Sahrens 2504fa9e4066Sahrens /* 2505da6c28aaSamw * Add in any requested optional attributes and the create time. 2506da6c28aaSamw * Also set the corresponding bits in the returned attribute bitmap. 2507fa9e4066Sahrens */ 2508da6c28aaSamw if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2509da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2510da6c28aaSamw xoap->xoa_archive = 25110a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2512da6c28aaSamw XVA_SET_RTN(xvap, XAT_ARCHIVE); 2513da6c28aaSamw } 2514da6c28aaSamw 2515da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2516da6c28aaSamw xoap->xoa_readonly = 25170a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_READONLY) != 0); 2518da6c28aaSamw XVA_SET_RTN(xvap, XAT_READONLY); 2519da6c28aaSamw } 2520da6c28aaSamw 2521da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2522da6c28aaSamw xoap->xoa_system = 25230a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_SYSTEM) != 0); 2524da6c28aaSamw XVA_SET_RTN(xvap, XAT_SYSTEM); 2525da6c28aaSamw } 2526da6c28aaSamw 2527da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2528da6c28aaSamw xoap->xoa_hidden = 25290a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_HIDDEN) != 0); 2530da6c28aaSamw XVA_SET_RTN(xvap, XAT_HIDDEN); 2531da6c28aaSamw } 2532da6c28aaSamw 2533da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2534da6c28aaSamw xoap->xoa_nounlink = 25350a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2536da6c28aaSamw XVA_SET_RTN(xvap, XAT_NOUNLINK); 2537da6c28aaSamw } 2538da6c28aaSamw 2539da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2540da6c28aaSamw xoap->xoa_immutable = 25410a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2542da6c28aaSamw XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2543da6c28aaSamw } 2544da6c28aaSamw 2545da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2546da6c28aaSamw xoap->xoa_appendonly = 25470a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2548da6c28aaSamw XVA_SET_RTN(xvap, XAT_APPENDONLY); 2549da6c28aaSamw } 2550da6c28aaSamw 2551da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2552da6c28aaSamw xoap->xoa_nodump = 25530a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NODUMP) != 0); 2554da6c28aaSamw XVA_SET_RTN(xvap, XAT_NODUMP); 2555da6c28aaSamw } 2556da6c28aaSamw 2557da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2558da6c28aaSamw xoap->xoa_opaque = 25590a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_OPAQUE) != 0); 2560da6c28aaSamw XVA_SET_RTN(xvap, XAT_OPAQUE); 2561da6c28aaSamw } 2562da6c28aaSamw 2563da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2564da6c28aaSamw xoap->xoa_av_quarantined = 25650a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2566da6c28aaSamw XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2567da6c28aaSamw } 2568da6c28aaSamw 2569da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2570da6c28aaSamw xoap->xoa_av_modified = 25710a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2572da6c28aaSamw XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2573da6c28aaSamw } 2574da6c28aaSamw 2575da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 25760a586ceaSMark Shellenbaum vp->v_type == VREG) { 25770a586ceaSMark Shellenbaum zfs_sa_get_scanstamp(zp, xvap); 2578da6c28aaSamw } 2579da6c28aaSamw 2580da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 25810a586ceaSMark Shellenbaum uint64_t times[2]; 25820a586ceaSMark Shellenbaum 25830a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 25840a586ceaSMark Shellenbaum times, sizeof (times)); 25850a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&xoap->xoa_createtime, times); 2586da6c28aaSamw XVA_SET_RTN(xvap, XAT_CREATETIME); 2587fa9e4066Sahrens } 25887a286c47SDai Ngo 25897a286c47SDai Ngo if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 25900a586ceaSMark Shellenbaum xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 25917a286c47SDai Ngo XVA_SET_RTN(xvap, XAT_REPARSE); 25927a286c47SDai Ngo } 259399d5e173STim Haley if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 259499d5e173STim Haley xoap->xoa_generation = zp->z_gen; 259599d5e173STim Haley XVA_SET_RTN(xvap, XAT_GEN); 259699d5e173STim Haley } 2597fd9ee8b5Sjoyce mcintosh 2598fd9ee8b5Sjoyce mcintosh if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2599fd9ee8b5Sjoyce mcintosh xoap->xoa_offline = 2600fd9ee8b5Sjoyce mcintosh ((zp->z_pflags & ZFS_OFFLINE) != 0); 2601fd9ee8b5Sjoyce mcintosh XVA_SET_RTN(xvap, XAT_OFFLINE); 2602fd9ee8b5Sjoyce mcintosh } 2603fd9ee8b5Sjoyce mcintosh 2604fd9ee8b5Sjoyce mcintosh if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2605fd9ee8b5Sjoyce mcintosh xoap->xoa_sparse = 2606fd9ee8b5Sjoyce mcintosh ((zp->z_pflags & ZFS_SPARSE) != 0); 2607fd9ee8b5Sjoyce mcintosh XVA_SET_RTN(xvap, XAT_SPARSE); 2608fd9ee8b5Sjoyce mcintosh } 2609fa9e4066Sahrens } 2610fa9e4066Sahrens 26110a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 26120a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_mtime, mtime); 26130a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2614da6c28aaSamw 2615fa9e4066Sahrens mutex_exit(&zp->z_lock); 2616fa9e4066Sahrens 26170a586ceaSMark Shellenbaum sa_object_size(zp->z_sa_hdl, &vap->va_blksize, &vap->va_nblocks); 2618fa9e4066Sahrens 2619fa9e4066Sahrens if (zp->z_blksz == 0) { 2620fa9e4066Sahrens /* 2621fa9e4066Sahrens * Block size hasn't been set; suggest maximal I/O transfers. 2622fa9e4066Sahrens */ 2623fa9e4066Sahrens vap->va_blksize = zfsvfs->z_max_blksz; 2624fa9e4066Sahrens } 2625fa9e4066Sahrens 2626fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2627fa9e4066Sahrens return (0); 2628fa9e4066Sahrens } 2629fa9e4066Sahrens 2630fa9e4066Sahrens /* 2631fa9e4066Sahrens * Set the file attributes to the values contained in the 2632fa9e4066Sahrens * vattr structure. 2633fa9e4066Sahrens * 2634fa9e4066Sahrens * IN: vp - vnode of file to be modified. 2635fa9e4066Sahrens * vap - new attribute values. 2636da6c28aaSamw * If AT_XVATTR set, then optional attrs are being set 2637fa9e4066Sahrens * flags - ATTR_UTIME set if non-default time values provided. 2638da6c28aaSamw * - ATTR_NOACLCHECK (CIFS context only). 2639fa9e4066Sahrens * cr - credentials of caller. 2640da6c28aaSamw * ct - caller context 2641fa9e4066Sahrens * 2642f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 2643fa9e4066Sahrens * 2644fa9e4066Sahrens * Timestamps: 2645fa9e4066Sahrens * vp - ctime updated, mtime updated if size changed. 2646fa9e4066Sahrens */ 2647fa9e4066Sahrens /* ARGSUSED */ 2648fa9e4066Sahrens static int 2649fa9e4066Sahrens zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2650f7170741SWill Andrews caller_context_t *ct) 2651fa9e4066Sahrens { 2652f18faf3fSek znode_t *zp = VTOZ(vp); 2653fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2654f18faf3fSek zilog_t *zilog; 2655fa9e4066Sahrens dmu_tx_t *tx; 2656fa9e4066Sahrens vattr_t oldva; 2657ae4caef8SMark Shellenbaum xvattr_t tmpxvattr; 26585730cc9aSmaybee uint_t mask = vap->va_mask; 2659d5285caeSGeorge Wilson uint_t saved_mask = 0; 2660f92daba9Smarks int trim_mask = 0; 2661fa9e4066Sahrens uint64_t new_mode; 266289459e17SMark Shellenbaum uint64_t new_uid, new_gid; 26630b2a8171SMark Shellenbaum uint64_t xattr_obj; 26640a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 2665d2443e76Smarks znode_t *attrzp; 2666fa9e4066Sahrens int need_policy = FALSE; 26670a586ceaSMark Shellenbaum int err, err2; 2668da6c28aaSamw zfs_fuid_info_t *fuidp = NULL; 2669da6c28aaSamw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2670da6c28aaSamw xoptattr_t *xoap; 26710b2a8171SMark Shellenbaum zfs_acl_t *aclp; 2672da6c28aaSamw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 26730a586ceaSMark Shellenbaum boolean_t fuid_dirtied = B_FALSE; 26740a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[7], xattr_bulk[7]; 26750a586ceaSMark Shellenbaum int count = 0, xattr_count = 0; 2676fa9e4066Sahrens 2677fa9e4066Sahrens if (mask == 0) 2678fa9e4066Sahrens return (0); 2679fa9e4066Sahrens 2680fa9e4066Sahrens if (mask & AT_NOSET) 2681be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 2682fa9e4066Sahrens 26833cb34c60Sahrens ZFS_ENTER(zfsvfs); 26843cb34c60Sahrens ZFS_VERIFY_ZP(zp); 2685da6c28aaSamw 2686da6c28aaSamw zilog = zfsvfs->z_log; 2687da6c28aaSamw 2688da6c28aaSamw /* 2689da6c28aaSamw * Make sure that if we have ephemeral uid/gid or xvattr specified 2690da6c28aaSamw * that file system is at proper version level 2691da6c28aaSamw */ 2692da6c28aaSamw 2693da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 2694da6c28aaSamw (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2695da6c28aaSamw ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 269602dcba3bStimh (mask & AT_XVATTR))) { 269702dcba3bStimh ZFS_EXIT(zfsvfs); 2698be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 269902dcba3bStimh } 2700da6c28aaSamw 270102dcba3bStimh if (mask & AT_SIZE && vp->v_type == VDIR) { 270202dcba3bStimh ZFS_EXIT(zfsvfs); 2703be6fd75aSMatthew Ahrens return (SET_ERROR(EISDIR)); 270402dcba3bStimh } 2705fa9e4066Sahrens 270602dcba3bStimh if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 270702dcba3bStimh ZFS_EXIT(zfsvfs); 2708be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 270902dcba3bStimh } 271084c5a155Smarks 2711da6c28aaSamw /* 2712da6c28aaSamw * If this is an xvattr_t, then get a pointer to the structure of 2713da6c28aaSamw * optional attributes. If this is NULL, then we have a vattr_t. 2714da6c28aaSamw */ 2715da6c28aaSamw xoap = xva_getxoptattr(xvap); 2716da6c28aaSamw 2717ae4caef8SMark Shellenbaum xva_init(&tmpxvattr); 2718ae4caef8SMark Shellenbaum 2719da6c28aaSamw /* 2720da6c28aaSamw * Immutable files can only alter immutable bit and atime 2721da6c28aaSamw */ 27220a586ceaSMark Shellenbaum if ((zp->z_pflags & ZFS_IMMUTABLE) && 2723da6c28aaSamw ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 272402dcba3bStimh ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 272502dcba3bStimh ZFS_EXIT(zfsvfs); 2726be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 272702dcba3bStimh } 2728da6c28aaSamw 27290a586ceaSMark Shellenbaum if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 273002dcba3bStimh ZFS_EXIT(zfsvfs); 2731be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 273202dcba3bStimh } 2733fa9e4066Sahrens 273493129341Smarks /* 273593129341Smarks * Verify timestamps doesn't overflow 32 bits. 273693129341Smarks * ZFS can handle large timestamps, but 32bit syscalls can't 273793129341Smarks * handle times greater than 2039. This check should be removed 273893129341Smarks * once large timestamps are fully supported. 273993129341Smarks */ 274093129341Smarks if (mask & (AT_ATIME | AT_MTIME)) { 274193129341Smarks if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 274293129341Smarks ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 274393129341Smarks ZFS_EXIT(zfsvfs); 2744be6fd75aSMatthew Ahrens return (SET_ERROR(EOVERFLOW)); 274593129341Smarks } 274693129341Smarks } 274793129341Smarks 2748fa9e4066Sahrens top: 2749d2443e76Smarks attrzp = NULL; 27500b2a8171SMark Shellenbaum aclp = NULL; 2751fa9e4066Sahrens 2752d47621a4STim Haley /* Can this be moved to before the top label? */ 2753fa9e4066Sahrens if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2754fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2755be6fd75aSMatthew Ahrens return (SET_ERROR(EROFS)); 2756fa9e4066Sahrens } 2757fa9e4066Sahrens 2758fa9e4066Sahrens /* 2759fa9e4066Sahrens * First validate permissions 2760fa9e4066Sahrens */ 2761fa9e4066Sahrens 2762fa9e4066Sahrens if (mask & AT_SIZE) { 2763da6c28aaSamw err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); 2764fa9e4066Sahrens if (err) { 2765fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2766fa9e4066Sahrens return (err); 2767fa9e4066Sahrens } 27685730cc9aSmaybee /* 27695730cc9aSmaybee * XXX - Note, we are not providing any open 27705730cc9aSmaybee * mode flags here (like FNDELAY), so we may 27715730cc9aSmaybee * block if there are locks present... this 27725730cc9aSmaybee * should be addressed in openat(). 27735730cc9aSmaybee */ 2774cdb0ab79Smaybee /* XXX - would it be OK to generate a log record here? */ 2775cdb0ab79Smaybee err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 27765730cc9aSmaybee if (err) { 27775730cc9aSmaybee ZFS_EXIT(zfsvfs); 27785730cc9aSmaybee return (err); 27795730cc9aSmaybee } 278072102e74SBryan Cantrill 278172102e74SBryan Cantrill if (vap->va_size == 0) 278272102e74SBryan Cantrill vnevent_truncate(ZTOV(zp), ct); 2783fa9e4066Sahrens } 2784fa9e4066Sahrens 2785da6c28aaSamw if (mask & (AT_ATIME|AT_MTIME) || 2786da6c28aaSamw ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2787da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_READONLY) || 2788da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2789fd9ee8b5Sjoyce mcintosh XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 2790fd9ee8b5Sjoyce mcintosh XVA_ISSET_REQ(xvap, XAT_SPARSE) || 2791da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 27920a586ceaSMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 2793da6c28aaSamw need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2794da6c28aaSamw skipaclchk, cr); 27950a586ceaSMark Shellenbaum } 2796fa9e4066Sahrens 2797fa9e4066Sahrens if (mask & (AT_UID|AT_GID)) { 2798fa9e4066Sahrens int idmask = (mask & (AT_UID|AT_GID)); 2799fa9e4066Sahrens int take_owner; 2800fa9e4066Sahrens int take_group; 2801fa9e4066Sahrens 2802a933bc41Smarks /* 2803a933bc41Smarks * NOTE: even if a new mode is being set, 2804a933bc41Smarks * we may clear S_ISUID/S_ISGID bits. 2805a933bc41Smarks */ 2806a933bc41Smarks 2807a933bc41Smarks if (!(mask & AT_MODE)) 28080a586ceaSMark Shellenbaum vap->va_mode = zp->z_mode; 2809a933bc41Smarks 2810fa9e4066Sahrens /* 2811fa9e4066Sahrens * Take ownership or chgrp to group we are a member of 2812fa9e4066Sahrens */ 2813fa9e4066Sahrens 2814fa9e4066Sahrens take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2815da6c28aaSamw take_group = (mask & AT_GID) && 2816da6c28aaSamw zfs_groupmember(zfsvfs, vap->va_gid, cr); 2817fa9e4066Sahrens 2818fa9e4066Sahrens /* 2819fa9e4066Sahrens * If both AT_UID and AT_GID are set then take_owner and 2820fa9e4066Sahrens * take_group must both be set in order to allow taking 2821fa9e4066Sahrens * ownership. 2822fa9e4066Sahrens * 2823fa9e4066Sahrens * Otherwise, send the check through secpolicy_vnode_setattr() 2824fa9e4066Sahrens * 2825fa9e4066Sahrens */ 2826fa9e4066Sahrens 2827fa9e4066Sahrens if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2828fa9e4066Sahrens ((idmask == AT_UID) && take_owner) || 2829fa9e4066Sahrens ((idmask == AT_GID) && take_group)) { 2830da6c28aaSamw if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2831da6c28aaSamw skipaclchk, cr) == 0) { 2832fa9e4066Sahrens /* 2833fa9e4066Sahrens * Remove setuid/setgid for non-privileged users 2834fa9e4066Sahrens */ 283513f9f30eSmarks secpolicy_setid_clear(vap, cr); 2836f92daba9Smarks trim_mask = (mask & (AT_UID|AT_GID)); 2837fa9e4066Sahrens } else { 2838fa9e4066Sahrens need_policy = TRUE; 2839fa9e4066Sahrens } 2840fa9e4066Sahrens } else { 2841fa9e4066Sahrens need_policy = TRUE; 2842fa9e4066Sahrens } 2843fa9e4066Sahrens } 2844fa9e4066Sahrens 2845f92daba9Smarks mutex_enter(&zp->z_lock); 28460a586ceaSMark Shellenbaum oldva.va_mode = zp->z_mode; 2847f1696b23SMark Shellenbaum zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 2848da6c28aaSamw if (mask & AT_XVATTR) { 2849ae4caef8SMark Shellenbaum /* 2850ae4caef8SMark Shellenbaum * Update xvattr mask to include only those attributes 2851ae4caef8SMark Shellenbaum * that are actually changing. 2852ae4caef8SMark Shellenbaum * 2853ae4caef8SMark Shellenbaum * the bits will be restored prior to actually setting 2854ae4caef8SMark Shellenbaum * the attributes so the caller thinks they were set. 2855ae4caef8SMark Shellenbaum */ 2856ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2857ae4caef8SMark Shellenbaum if (xoap->xoa_appendonly != 28580a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 2859ae4caef8SMark Shellenbaum need_policy = TRUE; 2860ae4caef8SMark Shellenbaum } else { 2861ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_APPENDONLY); 2862ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 2863ae4caef8SMark Shellenbaum } 2864ae4caef8SMark Shellenbaum } 2865ae4caef8SMark Shellenbaum 2866ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2867ae4caef8SMark Shellenbaum if (xoap->xoa_nounlink != 28680a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 2869ae4caef8SMark Shellenbaum need_policy = TRUE; 2870ae4caef8SMark Shellenbaum } else { 2871ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_NOUNLINK); 2872ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 2873ae4caef8SMark Shellenbaum } 2874ae4caef8SMark Shellenbaum } 2875ae4caef8SMark Shellenbaum 2876ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2877ae4caef8SMark Shellenbaum if (xoap->xoa_immutable != 28780a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 2879ae4caef8SMark Shellenbaum need_policy = TRUE; 2880ae4caef8SMark Shellenbaum } else { 2881ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 2882ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 2883ae4caef8SMark Shellenbaum } 2884ae4caef8SMark Shellenbaum } 2885ae4caef8SMark Shellenbaum 2886ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2887ae4caef8SMark Shellenbaum if (xoap->xoa_nodump != 28880a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NODUMP) != 0)) { 2889ae4caef8SMark Shellenbaum need_policy = TRUE; 2890ae4caef8SMark Shellenbaum } else { 2891ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_NODUMP); 2892ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 2893ae4caef8SMark Shellenbaum } 2894ae4caef8SMark Shellenbaum } 2895ae4caef8SMark Shellenbaum 2896ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2897ae4caef8SMark Shellenbaum if (xoap->xoa_av_modified != 28980a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 2899ae4caef8SMark Shellenbaum need_policy = TRUE; 2900ae4caef8SMark Shellenbaum } else { 2901ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 2902ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 2903ae4caef8SMark Shellenbaum } 2904ae4caef8SMark Shellenbaum } 2905ae4caef8SMark Shellenbaum 2906ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2907ae4caef8SMark Shellenbaum if ((vp->v_type != VREG && 2908ae4caef8SMark Shellenbaum xoap->xoa_av_quarantined) || 2909ae4caef8SMark Shellenbaum xoap->xoa_av_quarantined != 29100a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 2911ae4caef8SMark Shellenbaum need_policy = TRUE; 2912ae4caef8SMark Shellenbaum } else { 2913ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 2914ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 2915ae4caef8SMark Shellenbaum } 2916ae4caef8SMark Shellenbaum } 2917ae4caef8SMark Shellenbaum 29187a286c47SDai Ngo if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 29197a286c47SDai Ngo mutex_exit(&zp->z_lock); 29207a286c47SDai Ngo ZFS_EXIT(zfsvfs); 2921be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 29227a286c47SDai Ngo } 29237a286c47SDai Ngo 2924ae4caef8SMark Shellenbaum if (need_policy == FALSE && 2925ae4caef8SMark Shellenbaum (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 2926ae4caef8SMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 2927da6c28aaSamw need_policy = TRUE; 2928da6c28aaSamw } 2929da6c28aaSamw } 2930da6c28aaSamw 2931f92daba9Smarks mutex_exit(&zp->z_lock); 2932fa9e4066Sahrens 2933f92daba9Smarks if (mask & AT_MODE) { 2934da6c28aaSamw if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 2935f92daba9Smarks err = secpolicy_setid_setsticky_clear(vp, vap, 2936f92daba9Smarks &oldva, cr); 2937f92daba9Smarks if (err) { 2938f92daba9Smarks ZFS_EXIT(zfsvfs); 2939f92daba9Smarks return (err); 2940f92daba9Smarks } 2941f92daba9Smarks trim_mask |= AT_MODE; 2942f92daba9Smarks } else { 2943f92daba9Smarks need_policy = TRUE; 2944f92daba9Smarks } 2945f92daba9Smarks } 294613f9f30eSmarks 2947f92daba9Smarks if (need_policy) { 294813f9f30eSmarks /* 294913f9f30eSmarks * If trim_mask is set then take ownership 2950f92daba9Smarks * has been granted or write_acl is present and user 2951f92daba9Smarks * has the ability to modify mode. In that case remove 2952f92daba9Smarks * UID|GID and or MODE from mask so that 295313f9f30eSmarks * secpolicy_vnode_setattr() doesn't revoke it. 295413f9f30eSmarks */ 295513f9f30eSmarks 2956f92daba9Smarks if (trim_mask) { 2957f92daba9Smarks saved_mask = vap->va_mask; 2958f92daba9Smarks vap->va_mask &= ~trim_mask; 2959f92daba9Smarks } 2960fa9e4066Sahrens err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 2961da6c28aaSamw (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 2962fa9e4066Sahrens if (err) { 2963fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2964fa9e4066Sahrens return (err); 2965fa9e4066Sahrens } 296613f9f30eSmarks 296713f9f30eSmarks if (trim_mask) 2968f92daba9Smarks vap->va_mask |= saved_mask; 2969fa9e4066Sahrens } 2970fa9e4066Sahrens 2971fa9e4066Sahrens /* 2972fa9e4066Sahrens * secpolicy_vnode_setattr, or take ownership may have 2973fa9e4066Sahrens * changed va_mask 2974fa9e4066Sahrens */ 2975fa9e4066Sahrens mask = vap->va_mask; 2976fa9e4066Sahrens 29770a586ceaSMark Shellenbaum if ((mask & (AT_UID | AT_GID))) { 29780b2a8171SMark Shellenbaum err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 29790b2a8171SMark Shellenbaum &xattr_obj, sizeof (xattr_obj)); 29800a586ceaSMark Shellenbaum 29810b2a8171SMark Shellenbaum if (err == 0 && xattr_obj) { 29820a586ceaSMark Shellenbaum err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 29830a586ceaSMark Shellenbaum if (err) 29840a586ceaSMark Shellenbaum goto out2; 29850a586ceaSMark Shellenbaum } 29860a586ceaSMark Shellenbaum if (mask & AT_UID) { 29870a586ceaSMark Shellenbaum new_uid = zfs_fuid_create(zfsvfs, 29880a586ceaSMark Shellenbaum (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 2989f1696b23SMark Shellenbaum if (new_uid != zp->z_uid && 29900a586ceaSMark Shellenbaum zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 29910b2a8171SMark Shellenbaum if (attrzp) 29920b2a8171SMark Shellenbaum VN_RELE(ZTOV(attrzp)); 2993be6fd75aSMatthew Ahrens err = SET_ERROR(EDQUOT); 29940a586ceaSMark Shellenbaum goto out2; 29950a586ceaSMark Shellenbaum } 29960a586ceaSMark Shellenbaum } 29970a586ceaSMark Shellenbaum 29980a586ceaSMark Shellenbaum if (mask & AT_GID) { 29990a586ceaSMark Shellenbaum new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 30000a586ceaSMark Shellenbaum cr, ZFS_GROUP, &fuidp); 30010a586ceaSMark Shellenbaum if (new_gid != zp->z_gid && 30020a586ceaSMark Shellenbaum zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 30030b2a8171SMark Shellenbaum if (attrzp) 30040b2a8171SMark Shellenbaum VN_RELE(ZTOV(attrzp)); 3005be6fd75aSMatthew Ahrens err = SET_ERROR(EDQUOT); 30060a586ceaSMark Shellenbaum goto out2; 30070a586ceaSMark Shellenbaum } 30080a586ceaSMark Shellenbaum } 30090a586ceaSMark Shellenbaum } 3010fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 3011fa9e4066Sahrens 3012fa9e4066Sahrens if (mask & AT_MODE) { 30130a586ceaSMark Shellenbaum uint64_t pmode = zp->z_mode; 30141412a1a2SMark Shellenbaum uint64_t acl_obj; 3015169cdae2Smarks new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3016fa9e4066Sahrens 301771dbfc28SPaul B. Henson if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 301871dbfc28SPaul B. Henson !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3019be6fd75aSMatthew Ahrens err = SET_ERROR(EPERM); 302071dbfc28SPaul B. Henson goto out; 302171dbfc28SPaul B. Henson } 302271dbfc28SPaul B. Henson 3023a3c49ce1SAlbert Lee if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3024a3c49ce1SAlbert Lee goto out; 30250a586ceaSMark Shellenbaum 30261412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 30271412a1a2SMark Shellenbaum if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 30280a586ceaSMark Shellenbaum /* 30290a586ceaSMark Shellenbaum * Are we upgrading ACL from old V0 format 30300a586ceaSMark Shellenbaum * to V1 format? 30310a586ceaSMark Shellenbaum */ 30322bd6c4deSMark Shellenbaum if (zfsvfs->z_version >= ZPL_VERSION_FUID && 30331412a1a2SMark Shellenbaum zfs_znode_acl_version(zp) == 3034da6c28aaSamw ZFS_ACL_VERSION_INITIAL) { 30351412a1a2SMark Shellenbaum dmu_tx_hold_free(tx, acl_obj, 0, 3036da6c28aaSamw DMU_OBJECT_END); 3037da6c28aaSamw dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 30384c841f60Smarks 0, aclp->z_acl_bytes); 3039da6c28aaSamw } else { 30401412a1a2SMark Shellenbaum dmu_tx_hold_write(tx, acl_obj, 0, 30414c841f60Smarks aclp->z_acl_bytes); 30424c841f60Smarks } 30430a586ceaSMark Shellenbaum } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 30446d38e247Smarks dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 30456d38e247Smarks 0, aclp->z_acl_bytes); 3046da6c28aaSamw } 30471412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 30480a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 30490a586ceaSMark Shellenbaum } else { 30500a586ceaSMark Shellenbaum if ((mask & AT_XVATTR) && 30510a586ceaSMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 30520a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 30530a586ceaSMark Shellenbaum else 30540a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3055fa9e4066Sahrens } 3056fa9e4066Sahrens 30570a586ceaSMark Shellenbaum if (attrzp) { 30580a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3059d2443e76Smarks } 3060d2443e76Smarks 30610a586ceaSMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 30620a586ceaSMark Shellenbaum if (fuid_dirtied) 30630a586ceaSMark Shellenbaum zfs_fuid_txhold(zfsvfs, tx); 30640a586ceaSMark Shellenbaum 30650a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 30660a586ceaSMark Shellenbaum 3067e722410cSMatthew Ahrens err = dmu_tx_assign(tx, TXG_WAIT); 3068e722410cSMatthew Ahrens if (err) 306914843421SMatthew Ahrens goto out; 3070fa9e4066Sahrens 30710a586ceaSMark Shellenbaum count = 0; 3072fa9e4066Sahrens /* 3073fa9e4066Sahrens * Set each attribute requested. 3074fa9e4066Sahrens * We group settings according to the locks they need to acquire. 3075fa9e4066Sahrens * 3076fa9e4066Sahrens * Note: you cannot set ctime directly, although it will be 3077fa9e4066Sahrens * updated as a side-effect of calling this function. 3078fa9e4066Sahrens */ 3079fa9e4066Sahrens 30801412a1a2SMark Shellenbaum 30811412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 30821412a1a2SMark Shellenbaum mutex_enter(&zp->z_acl_lock); 3083fa9e4066Sahrens mutex_enter(&zp->z_lock); 3084fa9e4066Sahrens 3085db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3086db9986c7SMark Shellenbaum &zp->z_pflags, sizeof (zp->z_pflags)); 3087db9986c7SMark Shellenbaum 3088db9986c7SMark Shellenbaum if (attrzp) { 30891412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 30901412a1a2SMark Shellenbaum mutex_enter(&attrzp->z_acl_lock); 30910a586ceaSMark Shellenbaum mutex_enter(&attrzp->z_lock); 3092db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3093db9986c7SMark Shellenbaum SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3094db9986c7SMark Shellenbaum sizeof (attrzp->z_pflags)); 3095db9986c7SMark Shellenbaum } 30960a586ceaSMark Shellenbaum 309727dd1e87SMark Shellenbaum if (mask & (AT_UID|AT_GID)) { 309827dd1e87SMark Shellenbaum 309927dd1e87SMark Shellenbaum if (mask & AT_UID) { 310027dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 310127dd1e87SMark Shellenbaum &new_uid, sizeof (new_uid)); 3102f1696b23SMark Shellenbaum zp->z_uid = new_uid; 310327dd1e87SMark Shellenbaum if (attrzp) { 310427dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 310527dd1e87SMark Shellenbaum SA_ZPL_UID(zfsvfs), NULL, &new_uid, 310627dd1e87SMark Shellenbaum sizeof (new_uid)); 3107f1696b23SMark Shellenbaum attrzp->z_uid = new_uid; 310827dd1e87SMark Shellenbaum } 31090a586ceaSMark Shellenbaum } 31100a586ceaSMark Shellenbaum 311127dd1e87SMark Shellenbaum if (mask & AT_GID) { 311227dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 311327dd1e87SMark Shellenbaum NULL, &new_gid, sizeof (new_gid)); 3114f1696b23SMark Shellenbaum zp->z_gid = new_gid; 311527dd1e87SMark Shellenbaum if (attrzp) { 311627dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 311727dd1e87SMark Shellenbaum SA_ZPL_GID(zfsvfs), NULL, &new_gid, 311827dd1e87SMark Shellenbaum sizeof (new_gid)); 3119f1696b23SMark Shellenbaum attrzp->z_gid = new_gid; 312027dd1e87SMark Shellenbaum } 312127dd1e87SMark Shellenbaum } 312227dd1e87SMark Shellenbaum if (!(mask & AT_MODE)) { 312327dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 312427dd1e87SMark Shellenbaum NULL, &new_mode, sizeof (new_mode)); 312527dd1e87SMark Shellenbaum new_mode = zp->z_mode; 312627dd1e87SMark Shellenbaum } 312727dd1e87SMark Shellenbaum err = zfs_acl_chown_setattr(zp); 312827dd1e87SMark Shellenbaum ASSERT(err == 0); 31290a586ceaSMark Shellenbaum if (attrzp) { 313027dd1e87SMark Shellenbaum err = zfs_acl_chown_setattr(attrzp); 313127dd1e87SMark Shellenbaum ASSERT(err == 0); 31320a586ceaSMark Shellenbaum } 31330a586ceaSMark Shellenbaum } 31340a586ceaSMark Shellenbaum 3135fa9e4066Sahrens if (mask & AT_MODE) { 31360a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 31370a586ceaSMark Shellenbaum &new_mode, sizeof (new_mode)); 31380a586ceaSMark Shellenbaum zp->z_mode = new_mode; 313927dd1e87SMark Shellenbaum ASSERT3U((uintptr_t)aclp, !=, NULL); 314089459e17SMark Shellenbaum err = zfs_aclset_common(zp, aclp, cr, tx); 3141fb09f5aaSMadhav Suresh ASSERT0(err); 31420b2a8171SMark Shellenbaum if (zp->z_acl_cached) 31430b2a8171SMark Shellenbaum zfs_acl_free(zp->z_acl_cached); 31444929fd5eSTim Haley zp->z_acl_cached = aclp; 31454929fd5eSTim Haley aclp = NULL; 3146fa9e4066Sahrens } 3147fa9e4066Sahrens 3148d2443e76Smarks 31490a586ceaSMark Shellenbaum if (mask & AT_ATIME) { 31500a586ceaSMark Shellenbaum ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 31510a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 31520a586ceaSMark Shellenbaum &zp->z_atime, sizeof (zp->z_atime)); 3153d2443e76Smarks } 3154fa9e4066Sahrens 31550a586ceaSMark Shellenbaum if (mask & AT_MTIME) { 31560a586ceaSMark Shellenbaum ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 31570a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 31580a586ceaSMark Shellenbaum mtime, sizeof (mtime)); 3159d2443e76Smarks } 3160d2443e76Smarks 3161cdb0ab79Smaybee /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 31620a586ceaSMark Shellenbaum if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3163db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3164db9986c7SMark Shellenbaum NULL, mtime, sizeof (mtime)); 31650a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 31660a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 31670a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 31680a586ceaSMark Shellenbaum B_TRUE); 31690a586ceaSMark Shellenbaum } else if (mask != 0) { 31700a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 31710a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 31720a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 31730a586ceaSMark Shellenbaum B_TRUE); 31740a586ceaSMark Shellenbaum if (attrzp) { 31750a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 31760a586ceaSMark Shellenbaum SA_ZPL_CTIME(zfsvfs), NULL, 31770a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 31780a586ceaSMark Shellenbaum zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 31790a586ceaSMark Shellenbaum mtime, ctime, B_TRUE); 31800a586ceaSMark Shellenbaum } 31810a586ceaSMark Shellenbaum } 3182da6c28aaSamw /* 3183da6c28aaSamw * Do this after setting timestamps to prevent timestamp 3184da6c28aaSamw * update from toggling bit 3185da6c28aaSamw */ 3186da6c28aaSamw 3187da6c28aaSamw if (xoap && (mask & AT_XVATTR)) { 3188ae4caef8SMark Shellenbaum 3189ae4caef8SMark Shellenbaum /* 3190ae4caef8SMark Shellenbaum * restore trimmed off masks 3191ae4caef8SMark Shellenbaum * so that return masks can be set for caller. 3192ae4caef8SMark Shellenbaum */ 3193ae4caef8SMark Shellenbaum 3194ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3195ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_APPENDONLY); 3196ae4caef8SMark Shellenbaum } 3197ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3198ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_NOUNLINK); 3199ae4caef8SMark Shellenbaum } 3200ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3201ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3202ae4caef8SMark Shellenbaum } 3203ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3204ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_NODUMP); 3205ae4caef8SMark Shellenbaum } 3206ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3207ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3208ae4caef8SMark Shellenbaum } 3209ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3210ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3211ae4caef8SMark Shellenbaum } 3212ae4caef8SMark Shellenbaum 32130a586ceaSMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3214da6c28aaSamw ASSERT(vp->v_type == VREG); 3215da6c28aaSamw 32160a586ceaSMark Shellenbaum zfs_xvattr_set(zp, xvap, tx); 3217da6c28aaSamw } 3218fa9e4066Sahrens 321989459e17SMark Shellenbaum if (fuid_dirtied) 322089459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 322189459e17SMark Shellenbaum 32225730cc9aSmaybee if (mask != 0) 3223da6c28aaSamw zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3224fa9e4066Sahrens 3225fa9e4066Sahrens mutex_exit(&zp->z_lock); 32261412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 32271412a1a2SMark Shellenbaum mutex_exit(&zp->z_acl_lock); 3228fa9e4066Sahrens 32291412a1a2SMark Shellenbaum if (attrzp) { 32301412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 32311412a1a2SMark Shellenbaum mutex_exit(&attrzp->z_acl_lock); 32321412a1a2SMark Shellenbaum mutex_exit(&attrzp->z_lock); 32331412a1a2SMark Shellenbaum } 323414843421SMatthew Ahrens out: 32350a586ceaSMark Shellenbaum if (err == 0 && attrzp) { 32360a586ceaSMark Shellenbaum err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 32370a586ceaSMark Shellenbaum xattr_count, tx); 32380a586ceaSMark Shellenbaum ASSERT(err2 == 0); 32390a586ceaSMark Shellenbaum } 32400a586ceaSMark Shellenbaum 3241d2443e76Smarks if (attrzp) 3242d2443e76Smarks VN_RELE(ZTOV(attrzp)); 3243f7170741SWill Andrews 32444929fd5eSTim Haley if (aclp) 32454929fd5eSTim Haley zfs_acl_free(aclp); 32464929fd5eSTim Haley 324714843421SMatthew Ahrens if (fuidp) { 324814843421SMatthew Ahrens zfs_fuid_info_free(fuidp); 324914843421SMatthew Ahrens fuidp = NULL; 325014843421SMatthew Ahrens } 325114843421SMatthew Ahrens 32520a586ceaSMark Shellenbaum if (err) { 325314843421SMatthew Ahrens dmu_tx_abort(tx); 32540a586ceaSMark Shellenbaum if (err == ERESTART) 32550a586ceaSMark Shellenbaum goto top; 32560a586ceaSMark Shellenbaum } else { 32570a586ceaSMark Shellenbaum err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 325814843421SMatthew Ahrens dmu_tx_commit(tx); 32590a586ceaSMark Shellenbaum } 326014843421SMatthew Ahrens 32610a586ceaSMark Shellenbaum out2: 326255da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 32635002558fSNeil Perrin zil_commit(zilog, 0); 326455da60b9SMark J Musante 3265fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3266fa9e4066Sahrens return (err); 3267fa9e4066Sahrens } 3268fa9e4066Sahrens 3269fa9e4066Sahrens typedef struct zfs_zlock { 3270fa9e4066Sahrens krwlock_t *zl_rwlock; /* lock we acquired */ 3271fa9e4066Sahrens znode_t *zl_znode; /* znode we held */ 3272fa9e4066Sahrens struct zfs_zlock *zl_next; /* next in list */ 3273fa9e4066Sahrens } zfs_zlock_t; 3274fa9e4066Sahrens 3275ff008e00Smaybee /* 3276ff008e00Smaybee * Drop locks and release vnodes that were held by zfs_rename_lock(). 3277ff008e00Smaybee */ 3278ff008e00Smaybee static void 3279ff008e00Smaybee zfs_rename_unlock(zfs_zlock_t **zlpp) 3280ff008e00Smaybee { 3281ff008e00Smaybee zfs_zlock_t *zl; 3282ff008e00Smaybee 3283ff008e00Smaybee while ((zl = *zlpp) != NULL) { 3284ff008e00Smaybee if (zl->zl_znode != NULL) 3285ff008e00Smaybee VN_RELE(ZTOV(zl->zl_znode)); 3286ff008e00Smaybee rw_exit(zl->zl_rwlock); 3287ff008e00Smaybee *zlpp = zl->zl_next; 3288ff008e00Smaybee kmem_free(zl, sizeof (*zl)); 3289ff008e00Smaybee } 3290ff008e00Smaybee } 3291ff008e00Smaybee 3292ff008e00Smaybee /* 3293ff008e00Smaybee * Search back through the directory tree, using the ".." entries. 3294ff008e00Smaybee * Lock each directory in the chain to prevent concurrent renames. 3295ff008e00Smaybee * Fail any attempt to move a directory into one of its own descendants. 3296ff008e00Smaybee * XXX - z_parent_lock can overlap with map or grow locks 3297ff008e00Smaybee */ 3298fa9e4066Sahrens static int 3299fa9e4066Sahrens zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3300fa9e4066Sahrens { 3301fa9e4066Sahrens zfs_zlock_t *zl; 3302feb08c6bSbillm znode_t *zp = tdzp; 3303fa9e4066Sahrens uint64_t rootid = zp->z_zfsvfs->z_root; 33040a586ceaSMark Shellenbaum uint64_t oidp = zp->z_id; 3305fa9e4066Sahrens krwlock_t *rwlp = &szp->z_parent_lock; 3306fa9e4066Sahrens krw_t rw = RW_WRITER; 3307fa9e4066Sahrens 3308fa9e4066Sahrens /* 3309fa9e4066Sahrens * First pass write-locks szp and compares to zp->z_id. 3310fa9e4066Sahrens * Later passes read-lock zp and compare to zp->z_parent. 3311fa9e4066Sahrens */ 3312fa9e4066Sahrens do { 3313ff008e00Smaybee if (!rw_tryenter(rwlp, rw)) { 3314ff008e00Smaybee /* 3315ff008e00Smaybee * Another thread is renaming in this path. 3316ff008e00Smaybee * Note that if we are a WRITER, we don't have any 3317ff008e00Smaybee * parent_locks held yet. 3318ff008e00Smaybee */ 3319ff008e00Smaybee if (rw == RW_READER && zp->z_id > szp->z_id) { 3320ff008e00Smaybee /* 3321ff008e00Smaybee * Drop our locks and restart 3322ff008e00Smaybee */ 3323ff008e00Smaybee zfs_rename_unlock(&zl); 3324ff008e00Smaybee *zlpp = NULL; 3325ff008e00Smaybee zp = tdzp; 33260a586ceaSMark Shellenbaum oidp = zp->z_id; 3327ff008e00Smaybee rwlp = &szp->z_parent_lock; 3328ff008e00Smaybee rw = RW_WRITER; 3329ff008e00Smaybee continue; 3330ff008e00Smaybee } else { 3331ff008e00Smaybee /* 3332ff008e00Smaybee * Wait for other thread to drop its locks 3333ff008e00Smaybee */ 3334ff008e00Smaybee rw_enter(rwlp, rw); 3335ff008e00Smaybee } 3336ff008e00Smaybee } 3337ff008e00Smaybee 3338fa9e4066Sahrens zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3339fa9e4066Sahrens zl->zl_rwlock = rwlp; 3340fa9e4066Sahrens zl->zl_znode = NULL; 3341fa9e4066Sahrens zl->zl_next = *zlpp; 3342fa9e4066Sahrens *zlpp = zl; 3343fa9e4066Sahrens 33440a586ceaSMark Shellenbaum if (oidp == szp->z_id) /* We're a descendant of szp */ 3345be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 3346fa9e4066Sahrens 33470a586ceaSMark Shellenbaum if (oidp == rootid) /* We've hit the top */ 3348fa9e4066Sahrens return (0); 3349fa9e4066Sahrens 3350fa9e4066Sahrens if (rw == RW_READER) { /* i.e. not the first pass */ 33510a586ceaSMark Shellenbaum int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); 3352fa9e4066Sahrens if (error) 3353fa9e4066Sahrens return (error); 3354fa9e4066Sahrens zl->zl_znode = zp; 3355fa9e4066Sahrens } 33560a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), 33570a586ceaSMark Shellenbaum &oidp, sizeof (oidp)); 3358fa9e4066Sahrens rwlp = &zp->z_parent_lock; 3359fa9e4066Sahrens rw = RW_READER; 3360fa9e4066Sahrens 3361fa9e4066Sahrens } while (zp->z_id != sdzp->z_id); 3362fa9e4066Sahrens 3363fa9e4066Sahrens return (0); 3364fa9e4066Sahrens } 3365fa9e4066Sahrens 3366fa9e4066Sahrens /* 3367fa9e4066Sahrens * Move an entry from the provided source directory to the target 3368fa9e4066Sahrens * directory. Change the entry name as indicated. 3369fa9e4066Sahrens * 3370fa9e4066Sahrens * IN: sdvp - Source directory containing the "old entry". 3371fa9e4066Sahrens * snm - Old entry name. 3372fa9e4066Sahrens * tdvp - Target directory to contain the "new entry". 3373fa9e4066Sahrens * tnm - New entry name. 3374fa9e4066Sahrens * cr - credentials of caller. 3375da6c28aaSamw * ct - caller context 3376da6c28aaSamw * flags - case flags 3377fa9e4066Sahrens * 3378f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 3379fa9e4066Sahrens * 3380fa9e4066Sahrens * Timestamps: 3381fa9e4066Sahrens * sdvp,tdvp - ctime|mtime updated 3382fa9e4066Sahrens */ 3383da6c28aaSamw /*ARGSUSED*/ 3384fa9e4066Sahrens static int 3385da6c28aaSamw zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3386da6c28aaSamw caller_context_t *ct, int flags) 3387fa9e4066Sahrens { 3388fa9e4066Sahrens znode_t *tdzp, *szp, *tzp; 3389fa9e4066Sahrens znode_t *sdzp = VTOZ(sdvp); 3390fa9e4066Sahrens zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3391f18faf3fSek zilog_t *zilog; 3392fa9e4066Sahrens vnode_t *realvp; 3393fa9e4066Sahrens zfs_dirlock_t *sdl, *tdl; 3394fa9e4066Sahrens dmu_tx_t *tx; 3395fa9e4066Sahrens zfs_zlock_t *zl; 3396da6c28aaSamw int cmp, serr, terr; 3397da6c28aaSamw int error = 0; 3398da6c28aaSamw int zflg = 0; 339969962b56SMatthew Ahrens boolean_t waited = B_FALSE; 3400fa9e4066Sahrens 34013cb34c60Sahrens ZFS_ENTER(zfsvfs); 34023cb34c60Sahrens ZFS_VERIFY_ZP(sdzp); 3403f18faf3fSek zilog = zfsvfs->z_log; 3404fa9e4066Sahrens 3405fa9e4066Sahrens /* 3406fa9e4066Sahrens * Make sure we have the real vp for the target directory. 3407fa9e4066Sahrens */ 3408da6c28aaSamw if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3409fa9e4066Sahrens tdvp = realvp; 3410fa9e4066Sahrens 341118e64978SMarcel Telka tdzp = VTOZ(tdvp); 341218e64978SMarcel Telka ZFS_VERIFY_ZP(tdzp); 341318e64978SMarcel Telka 341418e64978SMarcel Telka /* 341518e64978SMarcel Telka * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 341618e64978SMarcel Telka * ctldir appear to have the same v_vfsp. 341718e64978SMarcel Telka */ 341818e64978SMarcel Telka if (tdzp->z_zfsvfs != zfsvfs || zfsctl_is_node(tdvp)) { 3419fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3420be6fd75aSMatthew Ahrens return (SET_ERROR(EXDEV)); 3421fa9e4066Sahrens } 3422fa9e4066Sahrens 3423de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(tnm, 3424da6c28aaSamw strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3425da6c28aaSamw ZFS_EXIT(zfsvfs); 3426be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 3427da6c28aaSamw } 3428da6c28aaSamw 3429da6c28aaSamw if (flags & FIGNORECASE) 3430da6c28aaSamw zflg |= ZCILOOK; 3431da6c28aaSamw 3432fa9e4066Sahrens top: 3433fa9e4066Sahrens szp = NULL; 3434fa9e4066Sahrens tzp = NULL; 3435fa9e4066Sahrens zl = NULL; 3436fa9e4066Sahrens 3437fa9e4066Sahrens /* 3438fa9e4066Sahrens * This is to prevent the creation of links into attribute space 3439fa9e4066Sahrens * by renaming a linked file into/outof an attribute directory. 3440fa9e4066Sahrens * See the comment in zfs_link() for why this is considered bad. 3441fa9e4066Sahrens */ 34420a586ceaSMark Shellenbaum if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3443fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3444be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 3445fa9e4066Sahrens } 3446fa9e4066Sahrens 3447fa9e4066Sahrens /* 3448fa9e4066Sahrens * Lock source and target directory entries. To prevent deadlock, 3449fa9e4066Sahrens * a lock ordering must be defined. We lock the directory with 3450fa9e4066Sahrens * the smallest object id first, or if it's a tie, the one with 3451fa9e4066Sahrens * the lexically first name. 3452fa9e4066Sahrens */ 3453fa9e4066Sahrens if (sdzp->z_id < tdzp->z_id) { 3454fa9e4066Sahrens cmp = -1; 3455fa9e4066Sahrens } else if (sdzp->z_id > tdzp->z_id) { 3456fa9e4066Sahrens cmp = 1; 3457fa9e4066Sahrens } else { 3458da6c28aaSamw /* 3459da6c28aaSamw * First compare the two name arguments without 3460da6c28aaSamw * considering any case folding. 3461da6c28aaSamw */ 3462da6c28aaSamw int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3463da6c28aaSamw 3464da6c28aaSamw cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3465de8267e0Stimh ASSERT(error == 0 || !zfsvfs->z_utf8); 3466fa9e4066Sahrens if (cmp == 0) { 3467fa9e4066Sahrens /* 3468fa9e4066Sahrens * POSIX: "If the old argument and the new argument 3469fa9e4066Sahrens * both refer to links to the same existing file, 3470fa9e4066Sahrens * the rename() function shall return successfully 3471fa9e4066Sahrens * and perform no other action." 3472fa9e4066Sahrens */ 3473fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3474fa9e4066Sahrens return (0); 3475fa9e4066Sahrens } 3476da6c28aaSamw /* 3477da6c28aaSamw * If the file system is case-folding, then we may 3478da6c28aaSamw * have some more checking to do. A case-folding file 3479da6c28aaSamw * system is either supporting mixed case sensitivity 3480da6c28aaSamw * access or is completely case-insensitive. Note 3481da6c28aaSamw * that the file system is always case preserving. 3482da6c28aaSamw * 3483da6c28aaSamw * In mixed sensitivity mode case sensitive behavior 3484da6c28aaSamw * is the default. FIGNORECASE must be used to 3485da6c28aaSamw * explicitly request case insensitive behavior. 3486da6c28aaSamw * 3487da6c28aaSamw * If the source and target names provided differ only 3488da6c28aaSamw * by case (e.g., a request to rename 'tim' to 'Tim'), 3489da6c28aaSamw * we will treat this as a special case in the 3490da6c28aaSamw * case-insensitive mode: as long as the source name 3491da6c28aaSamw * is an exact match, we will allow this to proceed as 3492da6c28aaSamw * a name-change request. 3493da6c28aaSamw */ 3494de8267e0Stimh if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3495de8267e0Stimh (zfsvfs->z_case == ZFS_CASE_MIXED && 3496de8267e0Stimh flags & FIGNORECASE)) && 3497da6c28aaSamw u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3498da6c28aaSamw &error) == 0) { 3499da6c28aaSamw /* 3500da6c28aaSamw * case preserving rename request, require exact 3501da6c28aaSamw * name matches 3502da6c28aaSamw */ 3503da6c28aaSamw zflg |= ZCIEXACT; 3504da6c28aaSamw zflg &= ~ZCILOOK; 3505da6c28aaSamw } 3506fa9e4066Sahrens } 3507da6c28aaSamw 3508afefc7e4SSanjeev Bagewadi /* 3509afefc7e4SSanjeev Bagewadi * If the source and destination directories are the same, we should 3510afefc7e4SSanjeev Bagewadi * grab the z_name_lock of that directory only once. 3511afefc7e4SSanjeev Bagewadi */ 3512afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) { 3513afefc7e4SSanjeev Bagewadi zflg |= ZHAVELOCK; 3514afefc7e4SSanjeev Bagewadi rw_enter(&sdzp->z_name_lock, RW_READER); 3515afefc7e4SSanjeev Bagewadi } 3516afefc7e4SSanjeev Bagewadi 3517fa9e4066Sahrens if (cmp < 0) { 3518da6c28aaSamw serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3519da6c28aaSamw ZEXISTS | zflg, NULL, NULL); 3520da6c28aaSamw terr = zfs_dirent_lock(&tdl, 3521da6c28aaSamw tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3522fa9e4066Sahrens } else { 3523da6c28aaSamw terr = zfs_dirent_lock(&tdl, 3524da6c28aaSamw tdzp, tnm, &tzp, zflg, NULL, NULL); 3525da6c28aaSamw serr = zfs_dirent_lock(&sdl, 3526da6c28aaSamw sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3527da6c28aaSamw NULL, NULL); 3528fa9e4066Sahrens } 3529fa9e4066Sahrens 3530fa9e4066Sahrens if (serr) { 3531fa9e4066Sahrens /* 3532fa9e4066Sahrens * Source entry invalid or not there. 3533fa9e4066Sahrens */ 3534fa9e4066Sahrens if (!terr) { 3535fa9e4066Sahrens zfs_dirent_unlock(tdl); 3536fa9e4066Sahrens if (tzp) 3537fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 3538fa9e4066Sahrens } 3539afefc7e4SSanjeev Bagewadi 3540afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3541afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3542afefc7e4SSanjeev Bagewadi 3543fa9e4066Sahrens if (strcmp(snm, "..") == 0) 3544be6fd75aSMatthew Ahrens serr = SET_ERROR(EINVAL); 3545fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3546fa9e4066Sahrens return (serr); 3547fa9e4066Sahrens } 3548fa9e4066Sahrens if (terr) { 3549fa9e4066Sahrens zfs_dirent_unlock(sdl); 3550fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3551afefc7e4SSanjeev Bagewadi 3552afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3553afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3554afefc7e4SSanjeev Bagewadi 3555fa9e4066Sahrens if (strcmp(tnm, "..") == 0) 3556be6fd75aSMatthew Ahrens terr = SET_ERROR(EINVAL); 3557fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3558fa9e4066Sahrens return (terr); 3559fa9e4066Sahrens } 3560fa9e4066Sahrens 3561fa9e4066Sahrens /* 3562fa9e4066Sahrens * Must have write access at the source to remove the old entry 3563fa9e4066Sahrens * and write access at the target to create the new entry. 3564fa9e4066Sahrens * Note that if target and source are the same, this can be 3565fa9e4066Sahrens * done in a single check. 3566fa9e4066Sahrens */ 3567fa9e4066Sahrens 3568fa9e4066Sahrens if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3569fa9e4066Sahrens goto out; 3570fa9e4066Sahrens 3571fa9e4066Sahrens if (ZTOV(szp)->v_type == VDIR) { 3572fa9e4066Sahrens /* 3573fa9e4066Sahrens * Check to make sure rename is valid. 3574fa9e4066Sahrens * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3575fa9e4066Sahrens */ 3576fa9e4066Sahrens if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3577fa9e4066Sahrens goto out; 3578fa9e4066Sahrens } 3579fa9e4066Sahrens 3580fa9e4066Sahrens /* 3581fa9e4066Sahrens * Does target exist? 3582fa9e4066Sahrens */ 3583fa9e4066Sahrens if (tzp) { 3584fa9e4066Sahrens /* 3585fa9e4066Sahrens * Source and target must be the same type. 3586fa9e4066Sahrens */ 3587fa9e4066Sahrens if (ZTOV(szp)->v_type == VDIR) { 3588fa9e4066Sahrens if (ZTOV(tzp)->v_type != VDIR) { 3589be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTDIR); 3590fa9e4066Sahrens goto out; 3591fa9e4066Sahrens } 3592fa9e4066Sahrens } else { 3593fa9e4066Sahrens if (ZTOV(tzp)->v_type == VDIR) { 3594be6fd75aSMatthew Ahrens error = SET_ERROR(EISDIR); 3595fa9e4066Sahrens goto out; 3596fa9e4066Sahrens } 3597fa9e4066Sahrens } 3598fa9e4066Sahrens /* 3599fa9e4066Sahrens * POSIX dictates that when the source and target 3600fa9e4066Sahrens * entries refer to the same file object, rename 3601fa9e4066Sahrens * must do nothing and exit without error. 3602fa9e4066Sahrens */ 3603fa9e4066Sahrens if (szp->z_id == tzp->z_id) { 3604fa9e4066Sahrens error = 0; 3605fa9e4066Sahrens goto out; 3606fa9e4066Sahrens } 3607fa9e4066Sahrens } 3608fa9e4066Sahrens 3609da6c28aaSamw vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3610fa9e4066Sahrens if (tzp) 3611da6c28aaSamw vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3612df2381bfSpraks 3613df2381bfSpraks /* 3614df2381bfSpraks * notify the target directory if it is not the same 3615df2381bfSpraks * as source directory. 3616df2381bfSpraks */ 3617df2381bfSpraks if (tdvp != sdvp) { 3618da6c28aaSamw vnevent_rename_dest_dir(tdvp, ct); 3619df2381bfSpraks } 3620fa9e4066Sahrens 3621fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 36220a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 36230a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3624ea8dc4b6Seschrock dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3625ea8dc4b6Seschrock dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 36260a586ceaSMark Shellenbaum if (sdzp != tdzp) { 36270a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 36280a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, tdzp); 36290a586ceaSMark Shellenbaum } 36300a586ceaSMark Shellenbaum if (tzp) { 36310a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 36320a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, tzp); 36330a586ceaSMark Shellenbaum } 36340a586ceaSMark Shellenbaum 36350a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, szp); 3636893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 363769962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 3638fa9e4066Sahrens if (error) { 3639fa9e4066Sahrens if (zl != NULL) 3640fa9e4066Sahrens zfs_rename_unlock(&zl); 3641fa9e4066Sahrens zfs_dirent_unlock(sdl); 3642fa9e4066Sahrens zfs_dirent_unlock(tdl); 3643afefc7e4SSanjeev Bagewadi 3644afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3645afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3646afefc7e4SSanjeev Bagewadi 3647fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3648fa9e4066Sahrens if (tzp) 3649fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 36501209a471SNeil Perrin if (error == ERESTART) { 365169962b56SMatthew Ahrens waited = B_TRUE; 36528a2f1b91Sahrens dmu_tx_wait(tx); 36538a2f1b91Sahrens dmu_tx_abort(tx); 3654fa9e4066Sahrens goto top; 3655fa9e4066Sahrens } 36568a2f1b91Sahrens dmu_tx_abort(tx); 3657fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3658fa9e4066Sahrens return (error); 3659fa9e4066Sahrens } 3660fa9e4066Sahrens 3661fa9e4066Sahrens if (tzp) /* Attempt to remove the existing target */ 3662da6c28aaSamw error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3663fa9e4066Sahrens 3664fa9e4066Sahrens if (error == 0) { 3665fa9e4066Sahrens error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3666fa9e4066Sahrens if (error == 0) { 36670a586ceaSMark Shellenbaum szp->z_pflags |= ZFS_AV_MODIFIED; 36680a586ceaSMark Shellenbaum 36690a586ceaSMark Shellenbaum error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 36700a586ceaSMark Shellenbaum (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3671fb09f5aaSMadhav Suresh ASSERT0(error); 3672da6c28aaSamw 3673fa9e4066Sahrens error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 36746ed5e6abSSam Falkner if (error == 0) { 36756ed5e6abSSam Falkner zfs_log_rename(zilog, tx, TX_RENAME | 367691de656bSNeil Perrin (flags & FIGNORECASE ? TX_CI : 0), sdzp, 367791de656bSNeil Perrin sdl->dl_name, tdzp, tdl->dl_name, szp); 367851ece835Seschrock 36796ed5e6abSSam Falkner /* 36806ed5e6abSSam Falkner * Update path information for the target vnode 36816ed5e6abSSam Falkner */ 36826ed5e6abSSam Falkner vn_renamepath(tdvp, ZTOV(szp), tnm, 36836ed5e6abSSam Falkner strlen(tnm)); 36846ed5e6abSSam Falkner } else { 36856ed5e6abSSam Falkner /* 36866ed5e6abSSam Falkner * At this point, we have successfully created 36876ed5e6abSSam Falkner * the target name, but have failed to remove 36886ed5e6abSSam Falkner * the source name. Since the create was done 36896ed5e6abSSam Falkner * with the ZRENAMING flag, there are 36906ed5e6abSSam Falkner * complications; for one, the link count is 36916ed5e6abSSam Falkner * wrong. The easiest way to deal with this 36926ed5e6abSSam Falkner * is to remove the newly created target, and 36936ed5e6abSSam Falkner * return the original error. This must 36946ed5e6abSSam Falkner * succeed; fortunately, it is very unlikely to 36956ed5e6abSSam Falkner * fail, since we just created it. 36966ed5e6abSSam Falkner */ 36976ed5e6abSSam Falkner VERIFY3U(zfs_link_destroy(tdl, szp, tx, 36986ed5e6abSSam Falkner ZRENAMING, NULL), ==, 0); 36996ed5e6abSSam Falkner } 3700fa9e4066Sahrens } 3701fa9e4066Sahrens } 3702fa9e4066Sahrens 3703fa9e4066Sahrens dmu_tx_commit(tx); 3704fa9e4066Sahrens out: 3705fa9e4066Sahrens if (zl != NULL) 3706fa9e4066Sahrens zfs_rename_unlock(&zl); 3707fa9e4066Sahrens 3708fa9e4066Sahrens zfs_dirent_unlock(sdl); 3709fa9e4066Sahrens zfs_dirent_unlock(tdl); 3710fa9e4066Sahrens 3711afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3712afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3713afefc7e4SSanjeev Bagewadi 3714afefc7e4SSanjeev Bagewadi 3715fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3716fa9e4066Sahrens if (tzp) 3717fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 3718fa9e4066Sahrens 371955da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 37205002558fSNeil Perrin zil_commit(zilog, 0); 372155da60b9SMark J Musante 3722fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3723fa9e4066Sahrens return (error); 3724fa9e4066Sahrens } 3725fa9e4066Sahrens 3726fa9e4066Sahrens /* 3727fa9e4066Sahrens * Insert the indicated symbolic reference entry into the directory. 3728fa9e4066Sahrens * 3729fa9e4066Sahrens * IN: dvp - Directory to contain new symbolic link. 3730fa9e4066Sahrens * link - Name for new symlink entry. 3731fa9e4066Sahrens * vap - Attributes of new entry. 3732fa9e4066Sahrens * cr - credentials of caller. 3733da6c28aaSamw * ct - caller context 3734da6c28aaSamw * flags - case flags 3735fa9e4066Sahrens * 3736f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 3737fa9e4066Sahrens * 3738fa9e4066Sahrens * Timestamps: 3739fa9e4066Sahrens * dvp - ctime|mtime updated 3740fa9e4066Sahrens */ 3741da6c28aaSamw /*ARGSUSED*/ 3742fa9e4066Sahrens static int 3743da6c28aaSamw zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr, 3744da6c28aaSamw caller_context_t *ct, int flags) 3745fa9e4066Sahrens { 3746fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 3747fa9e4066Sahrens zfs_dirlock_t *dl; 3748fa9e4066Sahrens dmu_tx_t *tx; 3749fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3750f18faf3fSek zilog_t *zilog; 37510a586ceaSMark Shellenbaum uint64_t len = strlen(link); 3752fa9e4066Sahrens int error; 3753da6c28aaSamw int zflg = ZNEW; 375489459e17SMark Shellenbaum zfs_acl_ids_t acl_ids; 375589459e17SMark Shellenbaum boolean_t fuid_dirtied; 37560a586ceaSMark Shellenbaum uint64_t txtype = TX_SYMLINK; 375769962b56SMatthew Ahrens boolean_t waited = B_FALSE; 3758fa9e4066Sahrens 3759fa9e4066Sahrens ASSERT(vap->va_type == VLNK); 3760fa9e4066Sahrens 37613cb34c60Sahrens ZFS_ENTER(zfsvfs); 37623cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 3763f18faf3fSek zilog = zfsvfs->z_log; 3764da6c28aaSamw 3765de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 3766da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3767da6c28aaSamw ZFS_EXIT(zfsvfs); 3768be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 3769da6c28aaSamw } 3770da6c28aaSamw if (flags & FIGNORECASE) 3771da6c28aaSamw zflg |= ZCILOOK; 3772fa9e4066Sahrens 3773fa9e4066Sahrens if (len > MAXPATHLEN) { 3774fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3775be6fd75aSMatthew Ahrens return (SET_ERROR(ENAMETOOLONG)); 3776fa9e4066Sahrens } 3777fa9e4066Sahrens 3778c8c24165SMark Shellenbaum if ((error = zfs_acl_ids_create(dzp, 0, 3779c8c24165SMark Shellenbaum vap, cr, NULL, &acl_ids)) != 0) { 3780c8c24165SMark Shellenbaum ZFS_EXIT(zfsvfs); 3781c8c24165SMark Shellenbaum return (error); 3782c8c24165SMark Shellenbaum } 3783c8c24165SMark Shellenbaum top: 3784fa9e4066Sahrens /* 3785fa9e4066Sahrens * Attempt to lock directory; fail if entry already exists. 3786fa9e4066Sahrens */ 3787da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 3788da6c28aaSamw if (error) { 3789c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 3790c8c24165SMark Shellenbaum ZFS_EXIT(zfsvfs); 3791c8c24165SMark Shellenbaum return (error); 3792c8c24165SMark Shellenbaum } 3793c8c24165SMark Shellenbaum 3794c8c24165SMark Shellenbaum if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3795c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 37968e303ae0SMark Shellenbaum zfs_dirent_unlock(dl); 3797fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3798fa9e4066Sahrens return (error); 3799fa9e4066Sahrens } 3800fa9e4066Sahrens 380114843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 380214843421SMatthew Ahrens zfs_acl_ids_free(&acl_ids); 380314843421SMatthew Ahrens zfs_dirent_unlock(dl); 380414843421SMatthew Ahrens ZFS_EXIT(zfsvfs); 3805be6fd75aSMatthew Ahrens return (SET_ERROR(EDQUOT)); 380614843421SMatthew Ahrens } 3807fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 380889459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 3809fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3810ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 38110a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 38120a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE + len); 38130a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 38140a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 38150a586ceaSMark Shellenbaum dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 38160a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes); 38170a586ceaSMark Shellenbaum } 381814843421SMatthew Ahrens if (fuid_dirtied) 381914843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 382069962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 3821fa9e4066Sahrens if (error) { 3822fa9e4066Sahrens zfs_dirent_unlock(dl); 38231209a471SNeil Perrin if (error == ERESTART) { 382469962b56SMatthew Ahrens waited = B_TRUE; 38258a2f1b91Sahrens dmu_tx_wait(tx); 38268a2f1b91Sahrens dmu_tx_abort(tx); 3827fa9e4066Sahrens goto top; 3828fa9e4066Sahrens } 3829c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 38308a2f1b91Sahrens dmu_tx_abort(tx); 3831fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3832fa9e4066Sahrens return (error); 3833fa9e4066Sahrens } 3834fa9e4066Sahrens 3835fa9e4066Sahrens /* 3836fa9e4066Sahrens * Create a new object for the symlink. 38370a586ceaSMark Shellenbaum * for version 4 ZPL datsets the symlink will be an SA attribute 3838fa9e4066Sahrens */ 38390a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 3840fa9e4066Sahrens 38410a586ceaSMark Shellenbaum if (fuid_dirtied) 38420a586ceaSMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 3843fa9e4066Sahrens 38441412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 38450a586ceaSMark Shellenbaum if (zp->z_is_sa) 38460a586ceaSMark Shellenbaum error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 38470a586ceaSMark Shellenbaum link, len, tx); 38480a586ceaSMark Shellenbaum else 38490a586ceaSMark Shellenbaum zfs_sa_symlink(zp, link, len, tx); 38501412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 3851fa9e4066Sahrens 38520a586ceaSMark Shellenbaum zp->z_size = len; 38530a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 38540a586ceaSMark Shellenbaum &zp->z_size, sizeof (zp->z_size), tx); 3855fa9e4066Sahrens /* 3856fa9e4066Sahrens * Insert the new object into the directory. 3857fa9e4066Sahrens */ 3858fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 38590a586ceaSMark Shellenbaum 38600a586ceaSMark Shellenbaum if (flags & FIGNORECASE) 38610a586ceaSMark Shellenbaum txtype |= TX_CI; 38620a586ceaSMark Shellenbaum zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 386389459e17SMark Shellenbaum 386489459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 3865fa9e4066Sahrens 3866fa9e4066Sahrens dmu_tx_commit(tx); 3867fa9e4066Sahrens 3868fa9e4066Sahrens zfs_dirent_unlock(dl); 3869fa9e4066Sahrens 3870fa9e4066Sahrens VN_RELE(ZTOV(zp)); 3871fa9e4066Sahrens 387255da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 38735002558fSNeil Perrin zil_commit(zilog, 0); 387455da60b9SMark J Musante 3875fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3876fa9e4066Sahrens return (error); 3877fa9e4066Sahrens } 3878fa9e4066Sahrens 3879fa9e4066Sahrens /* 3880fa9e4066Sahrens * Return, in the buffer contained in the provided uio structure, 3881fa9e4066Sahrens * the symbolic path referred to by vp. 3882fa9e4066Sahrens * 3883fa9e4066Sahrens * IN: vp - vnode of symbolic link. 3884f7170741SWill Andrews * uio - structure to contain the link path. 3885fa9e4066Sahrens * cr - credentials of caller. 3886da6c28aaSamw * ct - caller context 3887fa9e4066Sahrens * 3888f7170741SWill Andrews * OUT: uio - structure containing the link path. 3889fa9e4066Sahrens * 3890f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 3891fa9e4066Sahrens * 3892fa9e4066Sahrens * Timestamps: 3893fa9e4066Sahrens * vp - atime updated 3894fa9e4066Sahrens */ 3895fa9e4066Sahrens /* ARGSUSED */ 3896fa9e4066Sahrens static int 3897da6c28aaSamw zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 3898fa9e4066Sahrens { 3899fa9e4066Sahrens znode_t *zp = VTOZ(vp); 3900fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3901fa9e4066Sahrens int error; 3902fa9e4066Sahrens 39033cb34c60Sahrens ZFS_ENTER(zfsvfs); 39043cb34c60Sahrens ZFS_VERIFY_ZP(zp); 3905fa9e4066Sahrens 39061412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 39070a586ceaSMark Shellenbaum if (zp->z_is_sa) 39080a586ceaSMark Shellenbaum error = sa_lookup_uio(zp->z_sa_hdl, 39090a586ceaSMark Shellenbaum SA_ZPL_SYMLINK(zfsvfs), uio); 39100a586ceaSMark Shellenbaum else 39110a586ceaSMark Shellenbaum error = zfs_sa_readlink(zp, uio); 39121412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 3913fa9e4066Sahrens 3914fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 39150a586ceaSMark Shellenbaum 3916fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3917fa9e4066Sahrens return (error); 3918fa9e4066Sahrens } 3919fa9e4066Sahrens 3920fa9e4066Sahrens /* 3921fa9e4066Sahrens * Insert a new entry into directory tdvp referencing svp. 3922fa9e4066Sahrens * 3923fa9e4066Sahrens * IN: tdvp - Directory to contain new entry. 3924fa9e4066Sahrens * svp - vnode of new entry. 3925fa9e4066Sahrens * name - name of new entry. 3926fa9e4066Sahrens * cr - credentials of caller. 3927da6c28aaSamw * ct - caller context 3928fa9e4066Sahrens * 3929f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 3930fa9e4066Sahrens * 3931fa9e4066Sahrens * Timestamps: 3932fa9e4066Sahrens * tdvp - ctime|mtime updated 3933fa9e4066Sahrens * svp - ctime updated 3934fa9e4066Sahrens */ 3935fa9e4066Sahrens /* ARGSUSED */ 3936fa9e4066Sahrens static int 3937da6c28aaSamw zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 3938da6c28aaSamw caller_context_t *ct, int flags) 3939fa9e4066Sahrens { 3940fa9e4066Sahrens znode_t *dzp = VTOZ(tdvp); 3941fa9e4066Sahrens znode_t *tzp, *szp; 3942fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3943f18faf3fSek zilog_t *zilog; 3944fa9e4066Sahrens zfs_dirlock_t *dl; 3945fa9e4066Sahrens dmu_tx_t *tx; 3946fa9e4066Sahrens vnode_t *realvp; 3947fa9e4066Sahrens int error; 3948da6c28aaSamw int zf = ZNEW; 3949d39ee142SMark Shellenbaum uint64_t parent; 3950f1696b23SMark Shellenbaum uid_t owner; 395169962b56SMatthew Ahrens boolean_t waited = B_FALSE; 3952fa9e4066Sahrens 3953fa9e4066Sahrens ASSERT(tdvp->v_type == VDIR); 3954fa9e4066Sahrens 39553cb34c60Sahrens ZFS_ENTER(zfsvfs); 39563cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 3957f18faf3fSek zilog = zfsvfs->z_log; 3958fa9e4066Sahrens 3959da6c28aaSamw if (VOP_REALVP(svp, &realvp, ct) == 0) 3960fa9e4066Sahrens svp = realvp; 3961fa9e4066Sahrens 3962d39ee142SMark Shellenbaum /* 3963d39ee142SMark Shellenbaum * POSIX dictates that we return EPERM here. 3964d39ee142SMark Shellenbaum * Better choices include ENOTSUP or EISDIR. 3965d39ee142SMark Shellenbaum */ 3966d39ee142SMark Shellenbaum if (svp->v_type == VDIR) { 3967d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 3968be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 3969d39ee142SMark Shellenbaum } 3970d39ee142SMark Shellenbaum 397118e64978SMarcel Telka szp = VTOZ(svp); 397218e64978SMarcel Telka ZFS_VERIFY_ZP(szp); 397318e64978SMarcel Telka 397418e64978SMarcel Telka /* 397518e64978SMarcel Telka * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 397618e64978SMarcel Telka * ctldir appear to have the same v_vfsp. 397718e64978SMarcel Telka */ 397818e64978SMarcel Telka if (szp->z_zfsvfs != zfsvfs || zfsctl_is_node(svp)) { 3979fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3980be6fd75aSMatthew Ahrens return (SET_ERROR(EXDEV)); 3981fa9e4066Sahrens } 3982d39ee142SMark Shellenbaum 3983d39ee142SMark Shellenbaum /* Prevent links to .zfs/shares files */ 3984d39ee142SMark Shellenbaum 3985d39ee142SMark Shellenbaum if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 3986d39ee142SMark Shellenbaum &parent, sizeof (uint64_t))) != 0) { 3987d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 3988d39ee142SMark Shellenbaum return (error); 3989d39ee142SMark Shellenbaum } 3990d39ee142SMark Shellenbaum if (parent == zfsvfs->z_shares_dir) { 3991d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 3992be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 3993d39ee142SMark Shellenbaum } 3994d39ee142SMark Shellenbaum 3995de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, 3996da6c28aaSamw strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3997da6c28aaSamw ZFS_EXIT(zfsvfs); 3998be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 3999da6c28aaSamw } 4000da6c28aaSamw if (flags & FIGNORECASE) 4001da6c28aaSamw zf |= ZCILOOK; 4002da6c28aaSamw 4003fa9e4066Sahrens /* 4004fa9e4066Sahrens * We do not support links between attributes and non-attributes 4005fa9e4066Sahrens * because of the potential security risk of creating links 4006fa9e4066Sahrens * into "normal" file space in order to circumvent restrictions 4007fa9e4066Sahrens * imposed in attribute space. 4008fa9e4066Sahrens */ 40090a586ceaSMark Shellenbaum if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4010fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4011be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 4012fa9e4066Sahrens } 4013fa9e4066Sahrens 4014fa9e4066Sahrens 4015f1696b23SMark Shellenbaum owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4016f1696b23SMark Shellenbaum if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) { 4017fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4018be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 4019fa9e4066Sahrens } 4020fa9e4066Sahrens 4021da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4022fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4023fa9e4066Sahrens return (error); 4024fa9e4066Sahrens } 4025fa9e4066Sahrens 4026d39ee142SMark Shellenbaum top: 4027fa9e4066Sahrens /* 4028fa9e4066Sahrens * Attempt to lock directory; fail if entry already exists. 4029fa9e4066Sahrens */ 4030da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 4031da6c28aaSamw if (error) { 4032fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4033fa9e4066Sahrens return (error); 4034fa9e4066Sahrens } 4035fa9e4066Sahrens 4036fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 40370a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4038ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 40390a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, szp); 40400a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 404169962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 4042fa9e4066Sahrens if (error) { 4043fa9e4066Sahrens zfs_dirent_unlock(dl); 40441209a471SNeil Perrin if (error == ERESTART) { 404569962b56SMatthew Ahrens waited = B_TRUE; 40468a2f1b91Sahrens dmu_tx_wait(tx); 40478a2f1b91Sahrens dmu_tx_abort(tx); 4048fa9e4066Sahrens goto top; 4049fa9e4066Sahrens } 40508a2f1b91Sahrens dmu_tx_abort(tx); 4051fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4052fa9e4066Sahrens return (error); 4053fa9e4066Sahrens } 4054fa9e4066Sahrens 4055fa9e4066Sahrens error = zfs_link_create(dl, szp, tx, 0); 4056fa9e4066Sahrens 4057da6c28aaSamw if (error == 0) { 4058da6c28aaSamw uint64_t txtype = TX_LINK; 4059da6c28aaSamw if (flags & FIGNORECASE) 4060da6c28aaSamw txtype |= TX_CI; 4061da6c28aaSamw zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4062da6c28aaSamw } 4063fa9e4066Sahrens 4064fa9e4066Sahrens dmu_tx_commit(tx); 4065fa9e4066Sahrens 4066fa9e4066Sahrens zfs_dirent_unlock(dl); 4067fa9e4066Sahrens 4068df2381bfSpraks if (error == 0) { 4069da6c28aaSamw vnevent_link(svp, ct); 4070df2381bfSpraks } 4071df2381bfSpraks 407255da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 40735002558fSNeil Perrin zil_commit(zilog, 0); 407455da60b9SMark J Musante 4075fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4076fa9e4066Sahrens return (error); 4077fa9e4066Sahrens } 4078fa9e4066Sahrens 4079fa9e4066Sahrens /* 4080fa9e4066Sahrens * zfs_null_putapage() is used when the file system has been force 4081fa9e4066Sahrens * unmounted. It just drops the pages. 4082fa9e4066Sahrens */ 4083fa9e4066Sahrens /* ARGSUSED */ 4084fa9e4066Sahrens static int 4085fa9e4066Sahrens zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4086fa9e4066Sahrens size_t *lenp, int flags, cred_t *cr) 4087fa9e4066Sahrens { 4088fa9e4066Sahrens pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 4089fa9e4066Sahrens return (0); 4090fa9e4066Sahrens } 4091fa9e4066Sahrens 409244eda4d7Smaybee /* 409344eda4d7Smaybee * Push a page out to disk, klustering if possible. 409444eda4d7Smaybee * 409544eda4d7Smaybee * IN: vp - file to push page to. 409644eda4d7Smaybee * pp - page to push. 409744eda4d7Smaybee * flags - additional flags. 409844eda4d7Smaybee * cr - credentials of caller. 409944eda4d7Smaybee * 410044eda4d7Smaybee * OUT: offp - start of range pushed. 410144eda4d7Smaybee * lenp - len of range pushed. 410244eda4d7Smaybee * 4103f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 410444eda4d7Smaybee * 410544eda4d7Smaybee * NOTE: callers must have locked the page to be pushed. On 410644eda4d7Smaybee * exit, the page (and all other pages in the kluster) must be 410744eda4d7Smaybee * unlocked. 410844eda4d7Smaybee */ 4109fa9e4066Sahrens /* ARGSUSED */ 4110fa9e4066Sahrens static int 4111fa9e4066Sahrens zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4112fa9e4066Sahrens size_t *lenp, int flags, cred_t *cr) 4113fa9e4066Sahrens { 4114fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4115fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4116fa9e4066Sahrens dmu_tx_t *tx; 411744eda4d7Smaybee u_offset_t off, koff; 411844eda4d7Smaybee size_t len, klen; 4119fa9e4066Sahrens int err; 4120fa9e4066Sahrens 4121fa9e4066Sahrens off = pp->p_offset; 412244eda4d7Smaybee len = PAGESIZE; 412344eda4d7Smaybee /* 412444eda4d7Smaybee * If our blocksize is bigger than the page size, try to kluster 41251209a471SNeil Perrin * multiple pages so that we write a full block (thus avoiding 412644eda4d7Smaybee * a read-modify-write). 412744eda4d7Smaybee */ 41280a586ceaSMark Shellenbaum if (off < zp->z_size && zp->z_blksz > PAGESIZE) { 4129ac05c741SMark Maybee klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 4130ac05c741SMark Maybee koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 41310a586ceaSMark Shellenbaum ASSERT(koff <= zp->z_size); 41320a586ceaSMark Shellenbaum if (koff + klen > zp->z_size) 41330a586ceaSMark Shellenbaum klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); 413444eda4d7Smaybee pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 413544eda4d7Smaybee } 413644eda4d7Smaybee ASSERT3U(btop(len), ==, btopr(len)); 4137ac05c741SMark Maybee 4138dd6ef538Smaybee /* 4139dd6ef538Smaybee * Can't push pages past end-of-file. 4140dd6ef538Smaybee */ 41410a586ceaSMark Shellenbaum if (off >= zp->z_size) { 4142f4d2e9e6Smaybee /* ignore all pages */ 414344eda4d7Smaybee err = 0; 414444eda4d7Smaybee goto out; 41450a586ceaSMark Shellenbaum } else if (off + len > zp->z_size) { 41460a586ceaSMark Shellenbaum int npages = btopr(zp->z_size - off); 414744eda4d7Smaybee page_t *trunc; 414844eda4d7Smaybee 414944eda4d7Smaybee page_list_break(&pp, &trunc, npages); 4150f4d2e9e6Smaybee /* ignore pages past end of file */ 415144eda4d7Smaybee if (trunc) 4152f4d2e9e6Smaybee pvn_write_done(trunc, flags); 41530a586ceaSMark Shellenbaum len = zp->z_size - off; 4154dd6ef538Smaybee } 415514843421SMatthew Ahrens 41560a586ceaSMark Shellenbaum if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 41570a586ceaSMark Shellenbaum zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4158be6fd75aSMatthew Ahrens err = SET_ERROR(EDQUOT); 415914843421SMatthew Ahrens goto out; 416014843421SMatthew Ahrens } 4161fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 4162fa9e4066Sahrens dmu_tx_hold_write(tx, zp->z_id, off, len); 41630a586ceaSMark Shellenbaum 41640a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 41650a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 4166e722410cSMatthew Ahrens err = dmu_tx_assign(tx, TXG_WAIT); 4167fa9e4066Sahrens if (err != 0) { 41688a2f1b91Sahrens dmu_tx_abort(tx); 4169fa9e4066Sahrens goto out; 4170fa9e4066Sahrens } 4171fa9e4066Sahrens 417244eda4d7Smaybee if (zp->z_blksz <= PAGESIZE) { 41730fab61baSJonathan W Adams caddr_t va = zfs_map_page(pp, S_READ); 417444eda4d7Smaybee ASSERT3U(len, <=, PAGESIZE); 417544eda4d7Smaybee dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 41760fab61baSJonathan W Adams zfs_unmap_page(pp, va); 417744eda4d7Smaybee } else { 417844eda4d7Smaybee err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 417944eda4d7Smaybee } 4180fa9e4066Sahrens 418144eda4d7Smaybee if (err == 0) { 41820a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 4183db9986c7SMark Shellenbaum sa_bulk_attr_t bulk[3]; 41840a586ceaSMark Shellenbaum int count = 0; 41850a586ceaSMark Shellenbaum 41860a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 41870a586ceaSMark Shellenbaum &mtime, 16); 41880a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 41890a586ceaSMark Shellenbaum &ctime, 16); 4190db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4191db9986c7SMark Shellenbaum &zp->z_pflags, 8); 41920a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 41930a586ceaSMark Shellenbaum B_TRUE); 4194ac05c741SMark Maybee zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 419544eda4d7Smaybee } 419668857716SLin Ling dmu_tx_commit(tx); 4197fa9e4066Sahrens 419844eda4d7Smaybee out: 4199f4d2e9e6Smaybee pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 4200fa9e4066Sahrens if (offp) 4201fa9e4066Sahrens *offp = off; 4202fa9e4066Sahrens if (lenp) 4203fa9e4066Sahrens *lenp = len; 4204fa9e4066Sahrens 4205fa9e4066Sahrens return (err); 4206fa9e4066Sahrens } 4207fa9e4066Sahrens 4208fa9e4066Sahrens /* 4209fa9e4066Sahrens * Copy the portion of the file indicated from pages into the file. 4210fa9e4066Sahrens * The pages are stored in a page list attached to the files vnode. 4211fa9e4066Sahrens * 4212fa9e4066Sahrens * IN: vp - vnode of file to push page data to. 4213fa9e4066Sahrens * off - position in file to put data. 4214fa9e4066Sahrens * len - amount of data to write. 4215fa9e4066Sahrens * flags - flags to control the operation. 4216fa9e4066Sahrens * cr - credentials of caller. 4217da6c28aaSamw * ct - caller context. 4218fa9e4066Sahrens * 4219f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 4220fa9e4066Sahrens * 4221fa9e4066Sahrens * Timestamps: 4222fa9e4066Sahrens * vp - ctime|mtime updated 4223fa9e4066Sahrens */ 4224da6c28aaSamw /*ARGSUSED*/ 4225fa9e4066Sahrens static int 4226da6c28aaSamw zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 4227da6c28aaSamw caller_context_t *ct) 4228fa9e4066Sahrens { 4229fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4230fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4231fa9e4066Sahrens page_t *pp; 4232fa9e4066Sahrens size_t io_len; 4233fa9e4066Sahrens u_offset_t io_off; 4234ac05c741SMark Maybee uint_t blksz; 4235ac05c741SMark Maybee rl_t *rl; 4236fa9e4066Sahrens int error = 0; 4237fa9e4066Sahrens 42383cb34c60Sahrens ZFS_ENTER(zfsvfs); 42393cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4240fa9e4066Sahrens 4241c4fc6b21SGarrett D'Amore /* 4242c4fc6b21SGarrett D'Amore * There's nothing to do if no data is cached. 4243c4fc6b21SGarrett D'Amore */ 4244c4fc6b21SGarrett D'Amore if (!vn_has_cached_data(vp)) { 4245c4fc6b21SGarrett D'Amore ZFS_EXIT(zfsvfs); 4246c4fc6b21SGarrett D'Amore return (0); 4247c4fc6b21SGarrett D'Amore } 4248c4fc6b21SGarrett D'Amore 4249ac05c741SMark Maybee /* 4250ac05c741SMark Maybee * Align this request to the file block size in case we kluster. 4251ac05c741SMark Maybee * XXX - this can result in pretty aggresive locking, which can 4252ac05c741SMark Maybee * impact simultanious read/write access. One option might be 4253ac05c741SMark Maybee * to break up long requests (len == 0) into block-by-block 4254ac05c741SMark Maybee * operations to get narrower locking. 4255ac05c741SMark Maybee */ 4256ac05c741SMark Maybee blksz = zp->z_blksz; 4257ac05c741SMark Maybee if (ISP2(blksz)) 4258ac05c741SMark Maybee io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 4259ac05c741SMark Maybee else 4260ac05c741SMark Maybee io_off = 0; 4261ac05c741SMark Maybee if (len > 0 && ISP2(blksz)) 42625a6f5619SMark Maybee io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 4263ac05c741SMark Maybee else 4264ac05c741SMark Maybee io_len = 0; 4265ac05c741SMark Maybee 4266ac05c741SMark Maybee if (io_len == 0) { 4267fa9e4066Sahrens /* 4268ac05c741SMark Maybee * Search the entire vp list for pages >= io_off. 4269fa9e4066Sahrens */ 4270ac05c741SMark Maybee rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 4271ac05c741SMark Maybee error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 4272fe9cf88cSperrin goto out; 4273fa9e4066Sahrens } 4274ac05c741SMark Maybee rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 4275fa9e4066Sahrens 42760a586ceaSMark Shellenbaum if (off > zp->z_size) { 4277fa9e4066Sahrens /* past end of file */ 4278ac05c741SMark Maybee zfs_range_unlock(rl); 4279fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4280fa9e4066Sahrens return (0); 4281fa9e4066Sahrens } 4282fa9e4066Sahrens 42830a586ceaSMark Shellenbaum len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); 4284fa9e4066Sahrens 4285ac05c741SMark Maybee for (off = io_off; io_off < off + len; io_off += io_len) { 4286fa9e4066Sahrens if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 4287104e2ed7Sperrin pp = page_lookup(vp, io_off, 4288ecb72030Sperrin (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 4289fa9e4066Sahrens } else { 4290fa9e4066Sahrens pp = page_lookup_nowait(vp, io_off, 4291ecb72030Sperrin (flags & B_FREE) ? SE_EXCL : SE_SHARED); 4292fa9e4066Sahrens } 4293fa9e4066Sahrens 4294fa9e4066Sahrens if (pp != NULL && pvn_getdirty(pp, flags)) { 4295fa9e4066Sahrens int err; 4296fa9e4066Sahrens 4297fa9e4066Sahrens /* 4298fa9e4066Sahrens * Found a dirty page to push 4299fa9e4066Sahrens */ 4300104e2ed7Sperrin err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 4301104e2ed7Sperrin if (err) 4302fa9e4066Sahrens error = err; 4303fa9e4066Sahrens } else { 4304fa9e4066Sahrens io_len = PAGESIZE; 4305fa9e4066Sahrens } 4306fa9e4066Sahrens } 4307fe9cf88cSperrin out: 4308ac05c741SMark Maybee zfs_range_unlock(rl); 430955da60b9SMark J Musante if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 43105002558fSNeil Perrin zil_commit(zfsvfs->z_log, zp->z_id); 4311fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4312fa9e4066Sahrens return (error); 4313fa9e4066Sahrens } 4314fa9e4066Sahrens 4315da6c28aaSamw /*ARGSUSED*/ 4316fa9e4066Sahrens void 4317da6c28aaSamw zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4318fa9e4066Sahrens { 4319fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4320fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4321fa9e4066Sahrens int error; 4322fa9e4066Sahrens 4323f18faf3fSek rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 43240a586ceaSMark Shellenbaum if (zp->z_sa_hdl == NULL) { 43254ccbb6e7Sahrens /* 4326874395d5Smaybee * The fs has been unmounted, or we did a 4327874395d5Smaybee * suspend/resume and this file no longer exists. 43284ccbb6e7Sahrens */ 4329fa9e4066Sahrens if (vn_has_cached_data(vp)) { 4330fa9e4066Sahrens (void) pvn_vplist_dirty(vp, 0, zfs_null_putapage, 4331fa9e4066Sahrens B_INVAL, cr); 4332fa9e4066Sahrens } 4333fa9e4066Sahrens 4334ea8dc4b6Seschrock mutex_enter(&zp->z_lock); 4335cd2adeceSChris Kirby mutex_enter(&vp->v_lock); 4336cd2adeceSChris Kirby ASSERT(vp->v_count == 1); 4337cd2adeceSChris Kirby vp->v_count = 0; 4338cd2adeceSChris Kirby mutex_exit(&vp->v_lock); 43394ccbb6e7Sahrens mutex_exit(&zp->z_lock); 4340f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 4341874395d5Smaybee zfs_znode_free(zp); 4342fa9e4066Sahrens return; 4343fa9e4066Sahrens } 4344fa9e4066Sahrens 4345fa9e4066Sahrens /* 4346fa9e4066Sahrens * Attempt to push any data in the page cache. If this fails 4347fa9e4066Sahrens * we will get kicked out later in zfs_zinactive(). 4348fa9e4066Sahrens */ 43498afd4dd6Sperrin if (vn_has_cached_data(vp)) { 43508afd4dd6Sperrin (void) pvn_vplist_dirty(vp, 0, zfs_putapage, B_INVAL|B_ASYNC, 43518afd4dd6Sperrin cr); 43528afd4dd6Sperrin } 4353fa9e4066Sahrens 4354893a6d32Sahrens if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4355fa9e4066Sahrens dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4356fa9e4066Sahrens 43570a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 43580a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 4359fa9e4066Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 4360fa9e4066Sahrens if (error) { 4361fa9e4066Sahrens dmu_tx_abort(tx); 4362fa9e4066Sahrens } else { 4363fa9e4066Sahrens mutex_enter(&zp->z_lock); 43640a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 43650a586ceaSMark Shellenbaum (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4366fa9e4066Sahrens zp->z_atime_dirty = 0; 4367fa9e4066Sahrens mutex_exit(&zp->z_lock); 4368fa9e4066Sahrens dmu_tx_commit(tx); 4369fa9e4066Sahrens } 4370fa9e4066Sahrens } 4371fa9e4066Sahrens 4372fa9e4066Sahrens zfs_zinactive(zp); 4373f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 4374fa9e4066Sahrens } 4375fa9e4066Sahrens 4376fa9e4066Sahrens /* 4377fa9e4066Sahrens * Bounds-check the seek operation. 4378fa9e4066Sahrens * 4379fa9e4066Sahrens * IN: vp - vnode seeking within 4380fa9e4066Sahrens * ooff - old file offset 4381fa9e4066Sahrens * noffp - pointer to new file offset 4382da6c28aaSamw * ct - caller context 4383fa9e4066Sahrens * 4384f7170741SWill Andrews * RETURN: 0 on success, EINVAL if new offset invalid. 4385fa9e4066Sahrens */ 4386fa9e4066Sahrens /* ARGSUSED */ 4387fa9e4066Sahrens static int 4388da6c28aaSamw zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 4389da6c28aaSamw caller_context_t *ct) 4390fa9e4066Sahrens { 4391fa9e4066Sahrens if (vp->v_type == VDIR) 4392fa9e4066Sahrens return (0); 4393fa9e4066Sahrens return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 4394fa9e4066Sahrens } 4395fa9e4066Sahrens 4396fa9e4066Sahrens /* 4397fa9e4066Sahrens * Pre-filter the generic locking function to trap attempts to place 4398fa9e4066Sahrens * a mandatory lock on a memory mapped file. 4399fa9e4066Sahrens */ 4400fa9e4066Sahrens static int 4401fa9e4066Sahrens zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 4402da6c28aaSamw flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 4403fa9e4066Sahrens { 4404fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4405fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4406fa9e4066Sahrens 44073cb34c60Sahrens ZFS_ENTER(zfsvfs); 44083cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4409fa9e4066Sahrens 4410fa9e4066Sahrens /* 4411ea8dc4b6Seschrock * We are following the UFS semantics with respect to mapcnt 4412ea8dc4b6Seschrock * here: If we see that the file is mapped already, then we will 4413ea8dc4b6Seschrock * return an error, but we don't worry about races between this 4414ea8dc4b6Seschrock * function and zfs_map(). 4415fa9e4066Sahrens */ 44160a586ceaSMark Shellenbaum if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { 4417fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4418be6fd75aSMatthew Ahrens return (SET_ERROR(EAGAIN)); 4419fa9e4066Sahrens } 4420fa9e4066Sahrens ZFS_EXIT(zfsvfs); 442104ce3d0bSMark Shellenbaum return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 4422fa9e4066Sahrens } 4423fa9e4066Sahrens 4424fa9e4066Sahrens /* 4425fa9e4066Sahrens * If we can't find a page in the cache, we will create a new page 4426fa9e4066Sahrens * and fill it with file data. For efficiency, we may try to fill 4427ac05c741SMark Maybee * multiple pages at once (klustering) to fill up the supplied page 4428ed886187SMark Maybee * list. Note that the pages to be filled are held with an exclusive 4429ed886187SMark Maybee * lock to prevent access by other threads while they are being filled. 4430fa9e4066Sahrens */ 4431fa9e4066Sahrens static int 4432fa9e4066Sahrens zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 4433fa9e4066Sahrens caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 4434fa9e4066Sahrens { 4435fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4436fa9e4066Sahrens page_t *pp, *cur_pp; 4437fa9e4066Sahrens objset_t *os = zp->z_zfsvfs->z_os; 4438fa9e4066Sahrens u_offset_t io_off, total; 4439fa9e4066Sahrens size_t io_len; 4440fa9e4066Sahrens int err; 4441fa9e4066Sahrens 444244eda4d7Smaybee if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 4443ac05c741SMark Maybee /* 4444ac05c741SMark Maybee * We only have a single page, don't bother klustering 4445ac05c741SMark Maybee */ 4446fa9e4066Sahrens io_off = off; 4447fa9e4066Sahrens io_len = PAGESIZE; 4448ed886187SMark Maybee pp = page_create_va(vp, io_off, io_len, 4449ed886187SMark Maybee PG_EXCL | PG_WAIT, seg, addr); 4450fa9e4066Sahrens } else { 4451fa9e4066Sahrens /* 4452ac05c741SMark Maybee * Try to find enough pages to fill the page list 4453fa9e4066Sahrens */ 4454fa9e4066Sahrens pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 4455ac05c741SMark Maybee &io_len, off, plsz, 0); 4456fa9e4066Sahrens } 4457fa9e4066Sahrens if (pp == NULL) { 4458fa9e4066Sahrens /* 4459ac05c741SMark Maybee * The page already exists, nothing to do here. 4460fa9e4066Sahrens */ 4461fa9e4066Sahrens *pl = NULL; 4462fa9e4066Sahrens return (0); 4463fa9e4066Sahrens } 4464fa9e4066Sahrens 4465fa9e4066Sahrens /* 4466fa9e4066Sahrens * Fill the pages in the kluster. 4467fa9e4066Sahrens */ 4468fa9e4066Sahrens cur_pp = pp; 4469fa9e4066Sahrens for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 4470ac05c741SMark Maybee caddr_t va; 4471ac05c741SMark Maybee 447244eda4d7Smaybee ASSERT3U(io_off, ==, cur_pp->p_offset); 44730fab61baSJonathan W Adams va = zfs_map_page(cur_pp, S_WRITE); 44747bfdf011SNeil Perrin err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 44757bfdf011SNeil Perrin DMU_READ_PREFETCH); 44760fab61baSJonathan W Adams zfs_unmap_page(cur_pp, va); 4477fa9e4066Sahrens if (err) { 4478fa9e4066Sahrens /* On error, toss the entire kluster */ 4479fa9e4066Sahrens pvn_read_done(pp, B_ERROR); 4480b87f3af3Sperrin /* convert checksum errors into IO errors */ 4481b87f3af3Sperrin if (err == ECKSUM) 4482be6fd75aSMatthew Ahrens err = SET_ERROR(EIO); 4483fa9e4066Sahrens return (err); 4484fa9e4066Sahrens } 4485fa9e4066Sahrens cur_pp = cur_pp->p_next; 4486fa9e4066Sahrens } 4487ac05c741SMark Maybee 4488fa9e4066Sahrens /* 4489ac05c741SMark Maybee * Fill in the page list array from the kluster starting 4490ac05c741SMark Maybee * from the desired offset `off'. 4491fa9e4066Sahrens * NOTE: the page list will always be null terminated. 4492fa9e4066Sahrens */ 4493fa9e4066Sahrens pvn_plist_init(pp, pl, plsz, off, io_len, rw); 4494ac05c741SMark Maybee ASSERT(pl == NULL || (*pl)->p_offset == off); 4495fa9e4066Sahrens 4496fa9e4066Sahrens return (0); 4497fa9e4066Sahrens } 4498fa9e4066Sahrens 4499fa9e4066Sahrens /* 4500fa9e4066Sahrens * Return pointers to the pages for the file region [off, off + len] 4501fa9e4066Sahrens * in the pl array. If plsz is greater than len, this function may 4502ac05c741SMark Maybee * also return page pointers from after the specified region 4503ac05c741SMark Maybee * (i.e. the region [off, off + plsz]). These additional pages are 4504ac05c741SMark Maybee * only returned if they are already in the cache, or were created as 4505ac05c741SMark Maybee * part of a klustered read. 4506fa9e4066Sahrens * 4507fa9e4066Sahrens * IN: vp - vnode of file to get data from. 4508fa9e4066Sahrens * off - position in file to get data from. 4509fa9e4066Sahrens * len - amount of data to retrieve. 4510fa9e4066Sahrens * plsz - length of provided page list. 4511fa9e4066Sahrens * seg - segment to obtain pages for. 4512fa9e4066Sahrens * addr - virtual address of fault. 4513fa9e4066Sahrens * rw - mode of created pages. 4514fa9e4066Sahrens * cr - credentials of caller. 4515da6c28aaSamw * ct - caller context. 4516fa9e4066Sahrens * 4517fa9e4066Sahrens * OUT: protp - protection mode of created pages. 4518fa9e4066Sahrens * pl - list of pages created. 4519fa9e4066Sahrens * 4520f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 4521fa9e4066Sahrens * 4522fa9e4066Sahrens * Timestamps: 4523fa9e4066Sahrens * vp - atime updated 4524fa9e4066Sahrens */ 4525fa9e4066Sahrens /* ARGSUSED */ 4526fa9e4066Sahrens static int 4527fa9e4066Sahrens zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 4528f7170741SWill Andrews page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 4529f7170741SWill Andrews enum seg_rw rw, cred_t *cr, caller_context_t *ct) 4530fa9e4066Sahrens { 4531fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4532fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4533ac05c741SMark Maybee page_t **pl0 = pl; 4534ac05c741SMark Maybee int err = 0; 4535ac05c741SMark Maybee 4536ac05c741SMark Maybee /* we do our own caching, faultahead is unnecessary */ 4537ac05c741SMark Maybee if (pl == NULL) 4538ac05c741SMark Maybee return (0); 4539ac05c741SMark Maybee else if (len > plsz) 4540ac05c741SMark Maybee len = plsz; 454127bd165aSMark Maybee else 454227bd165aSMark Maybee len = P2ROUNDUP(len, PAGESIZE); 4543ac05c741SMark Maybee ASSERT(plsz >= len); 4544fa9e4066Sahrens 45453cb34c60Sahrens ZFS_ENTER(zfsvfs); 45463cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4547fa9e4066Sahrens 4548fa9e4066Sahrens if (protp) 4549fa9e4066Sahrens *protp = PROT_ALL; 4550fa9e4066Sahrens 4551fa9e4066Sahrens /* 4552ed886187SMark Maybee * Loop through the requested range [off, off + len) looking 4553fa9e4066Sahrens * for pages. If we don't find a page, we will need to create 4554fa9e4066Sahrens * a new page and fill it with data from the file. 4555fa9e4066Sahrens */ 4556fa9e4066Sahrens while (len > 0) { 4557ac05c741SMark Maybee if (*pl = page_lookup(vp, off, SE_SHARED)) 4558ac05c741SMark Maybee *(pl+1) = NULL; 4559ac05c741SMark Maybee else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 4560ac05c741SMark Maybee goto out; 4561ac05c741SMark Maybee while (*pl) { 4562ac05c741SMark Maybee ASSERT3U((*pl)->p_offset, ==, off); 4563fa9e4066Sahrens off += PAGESIZE; 4564fa9e4066Sahrens addr += PAGESIZE; 456527bd165aSMark Maybee if (len > 0) { 456627bd165aSMark Maybee ASSERT3U(len, >=, PAGESIZE); 4567ac05c741SMark Maybee len -= PAGESIZE; 456827bd165aSMark Maybee } 4569ac05c741SMark Maybee ASSERT3U(plsz, >=, PAGESIZE); 4570fa9e4066Sahrens plsz -= PAGESIZE; 4571ac05c741SMark Maybee pl++; 4572fa9e4066Sahrens } 4573fa9e4066Sahrens } 4574fa9e4066Sahrens 4575fa9e4066Sahrens /* 4576fa9e4066Sahrens * Fill out the page array with any pages already in the cache. 4577fa9e4066Sahrens */ 4578ac05c741SMark Maybee while (plsz > 0 && 4579ac05c741SMark Maybee (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 4580ac05c741SMark Maybee off += PAGESIZE; 4581ac05c741SMark Maybee plsz -= PAGESIZE; 4582fa9e4066Sahrens } 4583fa9e4066Sahrens out: 4584fe2f476aSperrin if (err) { 4585fe2f476aSperrin /* 4586fe2f476aSperrin * Release any pages we have previously locked. 4587fe2f476aSperrin */ 4588fe2f476aSperrin while (pl > pl0) 4589fe2f476aSperrin page_unlock(*--pl); 4590ac05c741SMark Maybee } else { 4591ac05c741SMark Maybee ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4592fe2f476aSperrin } 4593fe2f476aSperrin 4594fa9e4066Sahrens *pl = NULL; 4595fa9e4066Sahrens 4596fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4597fa9e4066Sahrens return (err); 4598fa9e4066Sahrens } 4599fa9e4066Sahrens 4600ea8dc4b6Seschrock /* 4601ea8dc4b6Seschrock * Request a memory map for a section of a file. This code interacts 4602ea8dc4b6Seschrock * with common code and the VM system as follows: 4603ea8dc4b6Seschrock * 4604f7170741SWill Andrews * - common code calls mmap(), which ends up in smmap_common() 4605f7170741SWill Andrews * - this calls VOP_MAP(), which takes you into (say) zfs 4606f7170741SWill Andrews * - zfs_map() calls as_map(), passing segvn_create() as the callback 4607f7170741SWill Andrews * - segvn_create() creates the new segment and calls VOP_ADDMAP() 4608f7170741SWill Andrews * - zfs_addmap() updates z_mapcnt 4609ea8dc4b6Seschrock */ 4610da6c28aaSamw /*ARGSUSED*/ 4611fa9e4066Sahrens static int 4612fa9e4066Sahrens zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 4613da6c28aaSamw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4614da6c28aaSamw caller_context_t *ct) 4615fa9e4066Sahrens { 4616fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4617fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4618fa9e4066Sahrens segvn_crargs_t vn_a; 4619fa9e4066Sahrens int error; 4620fa9e4066Sahrens 46210616c50eSmarks ZFS_ENTER(zfsvfs); 46220616c50eSmarks ZFS_VERIFY_ZP(zp); 46230616c50eSmarks 46240a586ceaSMark Shellenbaum if ((prot & PROT_WRITE) && (zp->z_pflags & 46250a586ceaSMark Shellenbaum (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { 46260616c50eSmarks ZFS_EXIT(zfsvfs); 4627be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 46280616c50eSmarks } 4629da6c28aaSamw 46300616c50eSmarks if ((prot & (PROT_READ | PROT_EXEC)) && 46310a586ceaSMark Shellenbaum (zp->z_pflags & ZFS_AV_QUARANTINED)) { 46320616c50eSmarks ZFS_EXIT(zfsvfs); 4633be6fd75aSMatthew Ahrens return (SET_ERROR(EACCES)); 46340616c50eSmarks } 4635fa9e4066Sahrens 4636fa9e4066Sahrens if (vp->v_flag & VNOMAP) { 4637fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4638be6fd75aSMatthew Ahrens return (SET_ERROR(ENOSYS)); 4639fa9e4066Sahrens } 4640fa9e4066Sahrens 4641fa9e4066Sahrens if (off < 0 || len > MAXOFFSET_T - off) { 4642fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4643be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 4644fa9e4066Sahrens } 4645fa9e4066Sahrens 4646fa9e4066Sahrens if (vp->v_type != VREG) { 4647fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4648be6fd75aSMatthew Ahrens return (SET_ERROR(ENODEV)); 4649fa9e4066Sahrens } 4650fa9e4066Sahrens 4651fa9e4066Sahrens /* 4652fa9e4066Sahrens * If file is locked, disallow mapping. 4653fa9e4066Sahrens */ 46540a586ceaSMark Shellenbaum if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { 4655ea8dc4b6Seschrock ZFS_EXIT(zfsvfs); 4656be6fd75aSMatthew Ahrens return (SET_ERROR(EAGAIN)); 4657fa9e4066Sahrens } 4658fa9e4066Sahrens 4659fa9e4066Sahrens as_rangelock(as); 466060946fe0Smec error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 466160946fe0Smec if (error != 0) { 466260946fe0Smec as_rangeunlock(as); 466360946fe0Smec ZFS_EXIT(zfsvfs); 466460946fe0Smec return (error); 4665fa9e4066Sahrens } 4666fa9e4066Sahrens 4667fa9e4066Sahrens vn_a.vp = vp; 4668fa9e4066Sahrens vn_a.offset = (u_offset_t)off; 4669fa9e4066Sahrens vn_a.type = flags & MAP_TYPE; 4670fa9e4066Sahrens vn_a.prot = prot; 4671fa9e4066Sahrens vn_a.maxprot = maxprot; 4672fa9e4066Sahrens vn_a.cred = cr; 4673fa9e4066Sahrens vn_a.amp = NULL; 4674fa9e4066Sahrens vn_a.flags = flags & ~MAP_TYPE; 46754944b02eSkchow vn_a.szc = 0; 46764944b02eSkchow vn_a.lgrp_mem_policy_flags = 0; 4677fa9e4066Sahrens 4678fa9e4066Sahrens error = as_map(as, *addrp, len, segvn_create, &vn_a); 4679fa9e4066Sahrens 4680fa9e4066Sahrens as_rangeunlock(as); 4681fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4682fa9e4066Sahrens return (error); 4683fa9e4066Sahrens } 4684fa9e4066Sahrens 4685fa9e4066Sahrens /* ARGSUSED */ 4686fa9e4066Sahrens static int 4687fa9e4066Sahrens zfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4688da6c28aaSamw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4689da6c28aaSamw caller_context_t *ct) 4690fa9e4066Sahrens { 4691ea8dc4b6Seschrock uint64_t pages = btopr(len); 4692ea8dc4b6Seschrock 4693ea8dc4b6Seschrock atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 4694fa9e4066Sahrens return (0); 4695fa9e4066Sahrens } 4696fa9e4066Sahrens 4697b468a217Seschrock /* 4698b468a217Seschrock * The reason we push dirty pages as part of zfs_delmap() is so that we get a 4699b468a217Seschrock * more accurate mtime for the associated file. Since we don't have a way of 4700b468a217Seschrock * detecting when the data was actually modified, we have to resort to 4701b468a217Seschrock * heuristics. If an explicit msync() is done, then we mark the mtime when the 4702b468a217Seschrock * last page is pushed. The problem occurs when the msync() call is omitted, 4703b468a217Seschrock * which by far the most common case: 4704b468a217Seschrock * 47054bb73804SMatthew Ahrens * open() 47064bb73804SMatthew Ahrens * mmap() 47074bb73804SMatthew Ahrens * <modify memory> 47084bb73804SMatthew Ahrens * munmap() 47094bb73804SMatthew Ahrens * close() 47104bb73804SMatthew Ahrens * <time lapse> 47114bb73804SMatthew Ahrens * putpage() via fsflush 4712b468a217Seschrock * 4713b468a217Seschrock * If we wait until fsflush to come along, we can have a modification time that 4714b468a217Seschrock * is some arbitrary point in the future. In order to prevent this in the 4715b468a217Seschrock * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is 4716b468a217Seschrock * torn down. 4717b468a217Seschrock */ 4718fa9e4066Sahrens /* ARGSUSED */ 4719fa9e4066Sahrens static int 4720fa9e4066Sahrens zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4721da6c28aaSamw size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 4722da6c28aaSamw caller_context_t *ct) 4723fa9e4066Sahrens { 4724ea8dc4b6Seschrock uint64_t pages = btopr(len); 4725ea8dc4b6Seschrock 4726ea8dc4b6Seschrock ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 4727ea8dc4b6Seschrock atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 4728b468a217Seschrock 4729b468a217Seschrock if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 4730b468a217Seschrock vn_has_cached_data(vp)) 4731da6c28aaSamw (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 4732b468a217Seschrock 4733fa9e4066Sahrens return (0); 4734fa9e4066Sahrens } 4735fa9e4066Sahrens 4736fa9e4066Sahrens /* 4737fa9e4066Sahrens * Free or allocate space in a file. Currently, this function only 4738fa9e4066Sahrens * supports the `F_FREESP' command. However, this command is somewhat 4739fa9e4066Sahrens * misnamed, as its functionality includes the ability to allocate as 4740fa9e4066Sahrens * well as free space. 4741fa9e4066Sahrens * 4742fa9e4066Sahrens * IN: vp - vnode of file to free data in. 4743fa9e4066Sahrens * cmd - action to take (only F_FREESP supported). 4744fa9e4066Sahrens * bfp - section of file to free/alloc. 4745fa9e4066Sahrens * flag - current file open mode flags. 4746fa9e4066Sahrens * offset - current file offset. 4747fa9e4066Sahrens * cr - credentials of caller [UNUSED]. 4748da6c28aaSamw * ct - caller context. 4749fa9e4066Sahrens * 4750f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 4751fa9e4066Sahrens * 4752fa9e4066Sahrens * Timestamps: 4753fa9e4066Sahrens * vp - ctime|mtime updated 4754fa9e4066Sahrens */ 4755fa9e4066Sahrens /* ARGSUSED */ 4756fa9e4066Sahrens static int 4757fa9e4066Sahrens zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 4758fa9e4066Sahrens offset_t offset, cred_t *cr, caller_context_t *ct) 4759fa9e4066Sahrens { 4760fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4761fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4762fa9e4066Sahrens uint64_t off, len; 4763fa9e4066Sahrens int error; 4764fa9e4066Sahrens 47653cb34c60Sahrens ZFS_ENTER(zfsvfs); 47663cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4767fa9e4066Sahrens 4768fa9e4066Sahrens if (cmd != F_FREESP) { 4769fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4770be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 4771fa9e4066Sahrens } 4772fa9e4066Sahrens 47732144b121SMarcel Telka /* 47742144b121SMarcel Telka * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 47752144b121SMarcel Telka * callers might not be able to detect properly that we are read-only, 47762144b121SMarcel Telka * so check it explicitly here. 47772144b121SMarcel Telka */ 47782144b121SMarcel Telka if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 47792144b121SMarcel Telka ZFS_EXIT(zfsvfs); 47802144b121SMarcel Telka return (SET_ERROR(EROFS)); 47812144b121SMarcel Telka } 47822144b121SMarcel Telka 4783fa9e4066Sahrens if (error = convoff(vp, bfp, 0, offset)) { 4784fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4785fa9e4066Sahrens return (error); 4786fa9e4066Sahrens } 4787fa9e4066Sahrens 4788fa9e4066Sahrens if (bfp->l_len < 0) { 4789fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4790be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 4791fa9e4066Sahrens } 4792fa9e4066Sahrens 4793fa9e4066Sahrens off = bfp->l_start; 4794104e2ed7Sperrin len = bfp->l_len; /* 0 means from off to end of file */ 4795104e2ed7Sperrin 4796cdb0ab79Smaybee error = zfs_freesp(zp, off, len, flag, TRUE); 4797fa9e4066Sahrens 479872102e74SBryan Cantrill if (error == 0 && off == 0 && len == 0) 479972102e74SBryan Cantrill vnevent_truncate(ZTOV(zp), ct); 480072102e74SBryan Cantrill 4801fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4802fa9e4066Sahrens return (error); 4803fa9e4066Sahrens } 4804fa9e4066Sahrens 4805da6c28aaSamw /*ARGSUSED*/ 4806fa9e4066Sahrens static int 4807da6c28aaSamw zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4808fa9e4066Sahrens { 4809fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4810fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4811f18faf3fSek uint32_t gen; 48120a586ceaSMark Shellenbaum uint64_t gen64; 4813fa9e4066Sahrens uint64_t object = zp->z_id; 4814fa9e4066Sahrens zfid_short_t *zfid; 48150a586ceaSMark Shellenbaum int size, i, error; 4816fa9e4066Sahrens 48173cb34c60Sahrens ZFS_ENTER(zfsvfs); 48183cb34c60Sahrens ZFS_VERIFY_ZP(zp); 48190a586ceaSMark Shellenbaum 48200a586ceaSMark Shellenbaum if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 4821f3e6fb2fSMark Shellenbaum &gen64, sizeof (uint64_t))) != 0) { 4822f3e6fb2fSMark Shellenbaum ZFS_EXIT(zfsvfs); 48230a586ceaSMark Shellenbaum return (error); 4824f3e6fb2fSMark Shellenbaum } 48250a586ceaSMark Shellenbaum 48260a586ceaSMark Shellenbaum gen = (uint32_t)gen64; 4827fa9e4066Sahrens 4828fa9e4066Sahrens size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4829fa9e4066Sahrens if (fidp->fid_len < size) { 4830fa9e4066Sahrens fidp->fid_len = size; 48310f2dc02eSek ZFS_EXIT(zfsvfs); 4832be6fd75aSMatthew Ahrens return (SET_ERROR(ENOSPC)); 4833fa9e4066Sahrens } 4834fa9e4066Sahrens 4835fa9e4066Sahrens zfid = (zfid_short_t *)fidp; 4836fa9e4066Sahrens 4837fa9e4066Sahrens zfid->zf_len = size; 4838fa9e4066Sahrens 4839fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 4840fa9e4066Sahrens zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4841fa9e4066Sahrens 4842fa9e4066Sahrens /* Must have a non-zero generation number to distinguish from .zfs */ 4843fa9e4066Sahrens if (gen == 0) 4844fa9e4066Sahrens gen = 1; 4845fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 4846fa9e4066Sahrens zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4847fa9e4066Sahrens 4848fa9e4066Sahrens if (size == LONG_FID_LEN) { 4849fa9e4066Sahrens uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4850fa9e4066Sahrens zfid_long_t *zlfid; 4851fa9e4066Sahrens 4852fa9e4066Sahrens zlfid = (zfid_long_t *)fidp; 4853fa9e4066Sahrens 4854fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4855fa9e4066Sahrens zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4856fa9e4066Sahrens 4857fa9e4066Sahrens /* XXX - this should be the generation number for the objset */ 4858fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4859fa9e4066Sahrens zlfid->zf_setgen[i] = 0; 4860fa9e4066Sahrens } 4861fa9e4066Sahrens 4862fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4863fa9e4066Sahrens return (0); 4864fa9e4066Sahrens } 4865fa9e4066Sahrens 4866fa9e4066Sahrens static int 4867da6c28aaSamw zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4868da6c28aaSamw caller_context_t *ct) 4869fa9e4066Sahrens { 4870fa9e4066Sahrens znode_t *zp, *xzp; 4871fa9e4066Sahrens zfsvfs_t *zfsvfs; 4872fa9e4066Sahrens zfs_dirlock_t *dl; 4873fa9e4066Sahrens int error; 4874fa9e4066Sahrens 4875fa9e4066Sahrens switch (cmd) { 4876fa9e4066Sahrens case _PC_LINK_MAX: 4877fa9e4066Sahrens *valp = ULONG_MAX; 4878fa9e4066Sahrens return (0); 4879fa9e4066Sahrens 4880fa9e4066Sahrens case _PC_FILESIZEBITS: 4881fa9e4066Sahrens *valp = 64; 4882fa9e4066Sahrens return (0); 4883fa9e4066Sahrens 4884fa9e4066Sahrens case _PC_XATTR_EXISTS: 4885fa9e4066Sahrens zp = VTOZ(vp); 4886fa9e4066Sahrens zfsvfs = zp->z_zfsvfs; 48873cb34c60Sahrens ZFS_ENTER(zfsvfs); 48883cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4889fa9e4066Sahrens *valp = 0; 4890fa9e4066Sahrens error = zfs_dirent_lock(&dl, zp, "", &xzp, 4891da6c28aaSamw ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 4892fa9e4066Sahrens if (error == 0) { 4893fa9e4066Sahrens zfs_dirent_unlock(dl); 4894fa9e4066Sahrens if (!zfs_dirempty(xzp)) 4895fa9e4066Sahrens *valp = 1; 4896fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 4897fa9e4066Sahrens } else if (error == ENOENT) { 4898fa9e4066Sahrens /* 4899fa9e4066Sahrens * If there aren't extended attributes, it's the 4900fa9e4066Sahrens * same as having zero of them. 4901fa9e4066Sahrens */ 4902fa9e4066Sahrens error = 0; 4903fa9e4066Sahrens } 4904fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4905fa9e4066Sahrens return (error); 4906fa9e4066Sahrens 4907da6c28aaSamw case _PC_SATTR_ENABLED: 4908da6c28aaSamw case _PC_SATTR_EXISTS: 49099660e5cbSJanice Chang *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 4910da6c28aaSamw (vp->v_type == VREG || vp->v_type == VDIR); 4911da6c28aaSamw return (0); 4912da6c28aaSamw 4913e802abbdSTim Haley case _PC_ACCESS_FILTERING: 4914e802abbdSTim Haley *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 4915e802abbdSTim Haley vp->v_type == VDIR; 4916e802abbdSTim Haley return (0); 4917e802abbdSTim Haley 4918fa9e4066Sahrens case _PC_ACL_ENABLED: 4919fa9e4066Sahrens *valp = _ACL_ACE_ENABLED; 4920fa9e4066Sahrens return (0); 4921fa9e4066Sahrens 4922fa9e4066Sahrens case _PC_MIN_HOLE_SIZE: 4923fa9e4066Sahrens *valp = (ulong_t)SPA_MINBLOCKSIZE; 4924fa9e4066Sahrens return (0); 4925fa9e4066Sahrens 49263b862e9aSRoger A. Faulkner case _PC_TIMESTAMP_RESOLUTION: 49273b862e9aSRoger A. Faulkner /* nanosecond timestamp resolution */ 49283b862e9aSRoger A. Faulkner *valp = 1L; 49293b862e9aSRoger A. Faulkner return (0); 49303b862e9aSRoger A. Faulkner 4931fa9e4066Sahrens default: 4932da6c28aaSamw return (fs_pathconf(vp, cmd, valp, cr, ct)); 4933fa9e4066Sahrens } 4934fa9e4066Sahrens } 4935fa9e4066Sahrens 4936fa9e4066Sahrens /*ARGSUSED*/ 4937fa9e4066Sahrens static int 4938da6c28aaSamw zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4939da6c28aaSamw caller_context_t *ct) 4940fa9e4066Sahrens { 4941fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4942fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4943fa9e4066Sahrens int error; 4944da6c28aaSamw boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4945fa9e4066Sahrens 49463cb34c60Sahrens ZFS_ENTER(zfsvfs); 49473cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4948da6c28aaSamw error = zfs_getacl(zp, vsecp, skipaclchk, cr); 4949fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4950fa9e4066Sahrens 4951fa9e4066Sahrens return (error); 4952fa9e4066Sahrens } 4953fa9e4066Sahrens 4954fa9e4066Sahrens /*ARGSUSED*/ 4955fa9e4066Sahrens static int 4956da6c28aaSamw zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4957da6c28aaSamw caller_context_t *ct) 4958fa9e4066Sahrens { 4959fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4960fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4961fa9e4066Sahrens int error; 4962da6c28aaSamw boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 496355da60b9SMark J Musante zilog_t *zilog = zfsvfs->z_log; 4964fa9e4066Sahrens 49653cb34c60Sahrens ZFS_ENTER(zfsvfs); 49663cb34c60Sahrens ZFS_VERIFY_ZP(zp); 496755da60b9SMark J Musante 4968da6c28aaSamw error = zfs_setacl(zp, vsecp, skipaclchk, cr); 496955da60b9SMark J Musante 497055da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 49715002558fSNeil Perrin zil_commit(zilog, 0); 497255da60b9SMark J Musante 4973fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4974fa9e4066Sahrens return (error); 4975fa9e4066Sahrens } 4976fa9e4066Sahrens 4977c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 4978f7170741SWill Andrews * The smallest read we may consider to loan out an arcbuf. 4979f7170741SWill Andrews * This must be a power of 2. 4980c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 4981c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int zcr_blksz_min = (1 << 10); /* 1K */ 4982f7170741SWill Andrews /* 4983f7170741SWill Andrews * If set to less than the file block size, allow loaning out of an 4984f7170741SWill Andrews * arcbuf for a partial block read. This must be a power of 2. 4985f7170741SWill Andrews */ 4986c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int zcr_blksz_max = (1 << 17); /* 128K */ 4987c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4988c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /*ARGSUSED*/ 4989c242f9a0Schunli zhang - Sun Microsystems - Irvine United States static int 4990c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 4991c242f9a0Schunli zhang - Sun Microsystems - Irvine United States caller_context_t *ct) 4992c242f9a0Schunli zhang - Sun Microsystems - Irvine United States { 4993c242f9a0Schunli zhang - Sun Microsystems - Irvine United States znode_t *zp = VTOZ(vp); 4994c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4995c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int max_blksz = zfsvfs->z_max_blksz; 4996c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uio_t *uio = &xuio->xu_uio; 4997c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ssize_t size = uio->uio_resid; 4998c242f9a0Schunli zhang - Sun Microsystems - Irvine United States offset_t offset = uio->uio_loffset; 4999c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int blksz; 5000c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int fullblk, i; 5001c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_buf_t *abuf; 5002c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ssize_t maxsize; 5003c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int preamble, postamble; 5004c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5005c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (xuio->xu_type != UIOTYPE_ZEROCOPY) 5006be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5007c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5008c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_ENTER(zfsvfs); 5009c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_VERIFY_ZP(zp); 5010c242f9a0Schunli zhang - Sun Microsystems - Irvine United States switch (ioflag) { 5011c242f9a0Schunli zhang - Sun Microsystems - Irvine United States case UIO_WRITE: 5012c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5013c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Loan out an arc_buf for write if write size is bigger than 5014c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * max_blksz, and the file's block size is also max_blksz. 5015c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5016c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = max_blksz; 5017c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size < blksz || zp->z_blksz != blksz) { 5018c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5019be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5020c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5021c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5022c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Caller requests buffers for write before knowing where the 5023c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * write offset might be (e.g. NFS TCP write). 5024c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5025c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (offset == -1) { 5026c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = 0; 5027c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 5028c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = P2PHASE(offset, blksz); 5029c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (preamble) { 5030c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = blksz - preamble; 5031c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size -= preamble; 5032c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5033c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5034c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5035c242f9a0Schunli zhang - Sun Microsystems - Irvine United States postamble = P2PHASE(size, blksz); 5036c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size -= postamble; 5037c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5038c242f9a0Schunli zhang - Sun Microsystems - Irvine United States fullblk = size / blksz; 5039570de38fSSurya Prakki (void) dmu_xuio_init(xuio, 5040c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (preamble != 0) + fullblk + (postamble != 0)); 5041c242f9a0Schunli zhang - Sun Microsystems - Irvine United States DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, 5042c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int, postamble, int, 5043c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (preamble != 0) + fullblk + (postamble != 0)); 5044c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5045c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5046c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Have to fix iov base/len for partial buffers. They 5047c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * currently represent full arc_buf's. 5048c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5049c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (preamble) { 5050c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* data begins in the middle of the arc_buf */ 50510a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 50520a586ceaSMark Shellenbaum blksz); 5053c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 5054570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 5055570de38fSSurya Prakki blksz - preamble, preamble); 5056c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5057c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5058c242f9a0Schunli zhang - Sun Microsystems - Irvine United States for (i = 0; i < fullblk; i++) { 50590a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 50600a586ceaSMark Shellenbaum blksz); 5061c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 5062570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 0, blksz); 5063c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5064c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5065c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (postamble) { 5066c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* data ends in the middle of the arc_buf */ 50670a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 50680a586ceaSMark Shellenbaum blksz); 5069c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 5070570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 0, postamble); 5071c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5072c242f9a0Schunli zhang - Sun Microsystems - Irvine United States break; 5073c242f9a0Schunli zhang - Sun Microsystems - Irvine United States case UIO_READ: 5074c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5075c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Loan out an arc_buf for read if the read size is larger than 5076c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * the current file block size. Block alignment is not 5077c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * considered. Partial arc_buf will be loaned out for read. 5078c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5079c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zp->z_blksz; 5080c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz < zcr_blksz_min) 5081c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zcr_blksz_min; 5082c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz > zcr_blksz_max) 5083c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zcr_blksz_max; 5084c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* avoid potential complexity of dealing with it */ 5085c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz > max_blksz) { 5086c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5087be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5088c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5089c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 50900a586ceaSMark Shellenbaum maxsize = zp->z_size - uio->uio_loffset; 5091c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size > maxsize) 5092c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size = maxsize; 5093c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5094c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size < blksz || vn_has_cached_data(vp)) { 5095c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5096be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5097c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5098c242f9a0Schunli zhang - Sun Microsystems - Irvine United States break; 5099c242f9a0Schunli zhang - Sun Microsystems - Irvine United States default: 5100c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5101be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5102c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5103c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5104c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uio->uio_extflg = UIO_XUIO; 5105c242f9a0Schunli zhang - Sun Microsystems - Irvine United States XUIO_XUZC_RW(xuio) = ioflag; 5106c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5107c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (0); 5108c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5109c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5110c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /*ARGSUSED*/ 5111c242f9a0Schunli zhang - Sun Microsystems - Irvine United States static int 5112c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 5113c242f9a0Schunli zhang - Sun Microsystems - Irvine United States { 5114c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int i; 5115c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_buf_t *abuf; 5116c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int ioflag = XUIO_XUZC_RW(xuio); 5117c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5118c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 5119c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5120c242f9a0Schunli zhang - Sun Microsystems - Irvine United States i = dmu_xuio_cnt(xuio); 5121c242f9a0Schunli zhang - Sun Microsystems - Irvine United States while (i-- > 0) { 5122c242f9a0Schunli zhang - Sun Microsystems - Irvine United States abuf = dmu_xuio_arcbuf(xuio, i); 5123c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5124c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * if abuf == NULL, it must be a write buffer 5125c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * that has been returned in zfs_write(). 5126c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5127c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (abuf) 5128c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_return_arcbuf(abuf); 5129c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf || ioflag == UIO_WRITE); 5130c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5131c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5132c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_xuio_fini(xuio); 5133c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (0); 5134c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5135c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5136fa9e4066Sahrens /* 5137fa9e4066Sahrens * Predeclare these here so that the compiler assumes that 5138fa9e4066Sahrens * this is an "old style" function declaration that does 5139fa9e4066Sahrens * not include arguments => we won't get type mismatch errors 5140fa9e4066Sahrens * in the initializations that follow. 5141fa9e4066Sahrens */ 5142fa9e4066Sahrens static int zfs_inval(); 5143fa9e4066Sahrens static int zfs_isdir(); 5144fa9e4066Sahrens 5145fa9e4066Sahrens static int 5146fa9e4066Sahrens zfs_inval() 5147fa9e4066Sahrens { 5148be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5149fa9e4066Sahrens } 5150fa9e4066Sahrens 5151fa9e4066Sahrens static int 5152fa9e4066Sahrens zfs_isdir() 5153fa9e4066Sahrens { 5154be6fd75aSMatthew Ahrens return (SET_ERROR(EISDIR)); 5155fa9e4066Sahrens } 5156fa9e4066Sahrens /* 5157fa9e4066Sahrens * Directory vnode operations template 5158fa9e4066Sahrens */ 5159fa9e4066Sahrens vnodeops_t *zfs_dvnodeops; 5160fa9e4066Sahrens const fs_operation_def_t zfs_dvnodeops_template[] = { 5161aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5162aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5163aa59c4cbSrsb VOPNAME_READ, { .error = zfs_isdir }, 5164aa59c4cbSrsb VOPNAME_WRITE, { .error = zfs_isdir }, 5165aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5166aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5167aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5168aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5169aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5170aa59c4cbSrsb VOPNAME_CREATE, { .vop_create = zfs_create }, 5171aa59c4cbSrsb VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5172aa59c4cbSrsb VOPNAME_LINK, { .vop_link = zfs_link }, 5173aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5174aa59c4cbSrsb VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 5175aa59c4cbSrsb VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5176aa59c4cbSrsb VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5177aa59c4cbSrsb VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 5178aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5179aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5180aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5181aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5182aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5183aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5184aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 51854bb73804SMatthew Ahrens VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5186aa59c4cbSrsb NULL, NULL 5187fa9e4066Sahrens }; 5188fa9e4066Sahrens 5189fa9e4066Sahrens /* 5190fa9e4066Sahrens * Regular file vnode operations template 5191fa9e4066Sahrens */ 5192fa9e4066Sahrens vnodeops_t *zfs_fvnodeops; 5193fa9e4066Sahrens const fs_operation_def_t zfs_fvnodeops_template[] = { 5194aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5195aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5196aa59c4cbSrsb VOPNAME_READ, { .vop_read = zfs_read }, 5197aa59c4cbSrsb VOPNAME_WRITE, { .vop_write = zfs_write }, 5198aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5199aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5200aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5201aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5202aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5203aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5204aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5205aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5206aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5207aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5208aa59c4cbSrsb VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 5209aa59c4cbSrsb VOPNAME_SPACE, { .vop_space = zfs_space }, 5210aa59c4cbSrsb VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 5211aa59c4cbSrsb VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 5212aa59c4cbSrsb VOPNAME_MAP, { .vop_map = zfs_map }, 5213aa59c4cbSrsb VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 5214aa59c4cbSrsb VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 5215aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5216aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5217aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5218aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 52194bb73804SMatthew Ahrens VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, 52204bb73804SMatthew Ahrens VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, 5221aa59c4cbSrsb NULL, NULL 5222fa9e4066Sahrens }; 5223fa9e4066Sahrens 5224fa9e4066Sahrens /* 5225fa9e4066Sahrens * Symbolic link vnode operations template 5226fa9e4066Sahrens */ 5227fa9e4066Sahrens vnodeops_t *zfs_symvnodeops; 5228fa9e4066Sahrens const fs_operation_def_t zfs_symvnodeops_template[] = { 5229aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5230aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5231aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5232aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5233aa59c4cbSrsb VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 5234aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5235aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5236aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5237aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5238aa59c4cbSrsb NULL, NULL 5239fa9e4066Sahrens }; 5240fa9e4066Sahrens 5241743a77edSAlan Wright /* 5242743a77edSAlan Wright * special share hidden files vnode operations template 5243743a77edSAlan Wright */ 5244743a77edSAlan Wright vnodeops_t *zfs_sharevnodeops; 5245743a77edSAlan Wright const fs_operation_def_t zfs_sharevnodeops_template[] = { 5246743a77edSAlan Wright VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5247743a77edSAlan Wright VOPNAME_ACCESS, { .vop_access = zfs_access }, 5248743a77edSAlan Wright VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5249743a77edSAlan Wright VOPNAME_FID, { .vop_fid = zfs_fid }, 5250743a77edSAlan Wright VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5251743a77edSAlan Wright VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5252743a77edSAlan Wright VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5253743a77edSAlan Wright VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5254743a77edSAlan Wright NULL, NULL 5255743a77edSAlan Wright }; 5256743a77edSAlan Wright 5257fa9e4066Sahrens /* 5258fa9e4066Sahrens * Extended attribute directory vnode operations template 5259f7170741SWill Andrews * 5260f7170741SWill Andrews * This template is identical to the directory vnodes 5261f7170741SWill Andrews * operation template except for restricted operations: 5262f7170741SWill Andrews * VOP_MKDIR() 5263f7170741SWill Andrews * VOP_SYMLINK() 5264f7170741SWill Andrews * 5265fa9e4066Sahrens * Note that there are other restrictions embedded in: 5266fa9e4066Sahrens * zfs_create() - restrict type to VREG 5267fa9e4066Sahrens * zfs_link() - no links into/out of attribute space 5268fa9e4066Sahrens * zfs_rename() - no moves into/out of attribute space 5269fa9e4066Sahrens */ 5270fa9e4066Sahrens vnodeops_t *zfs_xdvnodeops; 5271fa9e4066Sahrens const fs_operation_def_t zfs_xdvnodeops_template[] = { 5272aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5273aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5274aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5275aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5276aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5277aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5278aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5279aa59c4cbSrsb VOPNAME_CREATE, { .vop_create = zfs_create }, 5280aa59c4cbSrsb VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5281aa59c4cbSrsb VOPNAME_LINK, { .vop_link = zfs_link }, 5282aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5283aa59c4cbSrsb VOPNAME_MKDIR, { .error = zfs_inval }, 5284aa59c4cbSrsb VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5285aa59c4cbSrsb VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5286aa59c4cbSrsb VOPNAME_SYMLINK, { .error = zfs_inval }, 5287aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5288aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5289aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5290aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5291aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5292aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5293aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5294aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5295aa59c4cbSrsb NULL, NULL 5296fa9e4066Sahrens }; 5297fa9e4066Sahrens 5298fa9e4066Sahrens /* 5299fa9e4066Sahrens * Error vnode operations template 5300fa9e4066Sahrens */ 5301fa9e4066Sahrens vnodeops_t *zfs_evnodeops; 5302fa9e4066Sahrens const fs_operation_def_t zfs_evnodeops_template[] = { 5303aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5304aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5305aa59c4cbSrsb NULL, NULL 5306fa9e4066Sahrens }; 5307