1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28*7c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 29*7c478bd9Sstevel@tonic-gate 30*7c478bd9Sstevel@tonic-gate /* 31*7c478bd9Sstevel@tonic-gate * Portions of this source code were derived from Berkeley 4.3 BSD 32*7c478bd9Sstevel@tonic-gate * under license from the Regents of the University of California. 33*7c478bd9Sstevel@tonic-gate */ 34*7c478bd9Sstevel@tonic-gate 35*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 36*7c478bd9Sstevel@tonic-gate 37*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 38*7c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 39*7c478bd9Sstevel@tonic-gate #include <sys/ksynch.h> 40*7c478bd9Sstevel@tonic-gate #include <sys/param.h> 41*7c478bd9Sstevel@tonic-gate #include <sys/time.h> 42*7c478bd9Sstevel@tonic-gate #include <sys/systm.h> 43*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 44*7c478bd9Sstevel@tonic-gate #include <sys/resource.h> 45*7c478bd9Sstevel@tonic-gate #include <sys/signal.h> 46*7c478bd9Sstevel@tonic-gate #include <sys/cred.h> 47*7c478bd9Sstevel@tonic-gate #include <sys/user.h> 48*7c478bd9Sstevel@tonic-gate #include <sys/buf.h> 49*7c478bd9Sstevel@tonic-gate #include <sys/vfs.h> 50*7c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 51*7c478bd9Sstevel@tonic-gate #include <sys/proc.h> 52*7c478bd9Sstevel@tonic-gate #include <sys/disp.h> 53*7c478bd9Sstevel@tonic-gate #include <sys/file.h> 54*7c478bd9Sstevel@tonic-gate #include <sys/fcntl.h> 55*7c478bd9Sstevel@tonic-gate #include <sys/flock.h> 56*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 57*7c478bd9Sstevel@tonic-gate #include <sys/uio.h> 58*7c478bd9Sstevel@tonic-gate #include <sys/dnlc.h> 59*7c478bd9Sstevel@tonic-gate #include <sys/conf.h> 60*7c478bd9Sstevel@tonic-gate #include <sys/mman.h> 61*7c478bd9Sstevel@tonic-gate #include <sys/pathname.h> 62*7c478bd9Sstevel@tonic-gate #include <sys/debug.h> 63*7c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h> 64*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 65*7c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 66*7c478bd9Sstevel@tonic-gate #include <sys/filio.h> 67*7c478bd9Sstevel@tonic-gate #include <sys/policy.h> 68*7c478bd9Sstevel@tonic-gate 69*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fs.h> 70*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_lockfs.h> 71*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_filio.h> 72*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 73*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fsdir.h> 74*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_quota.h> 75*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_log.h> 76*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_snap.h> 77*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_trans.h> 78*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_panic.h> 79*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 80*7c478bd9Sstevel@tonic-gate #include <sys/dirent.h> /* must be AFTER <sys/fs/fsdir.h>! */ 81*7c478bd9Sstevel@tonic-gate #include <sys/errno.h> 82*7c478bd9Sstevel@tonic-gate #include <sys/fssnap_if.h> 83*7c478bd9Sstevel@tonic-gate #include <sys/unistd.h> 84*7c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 85*7c478bd9Sstevel@tonic-gate 86*7c478bd9Sstevel@tonic-gate #include <sys/filio.h> /* _FIOIO */ 87*7c478bd9Sstevel@tonic-gate 88*7c478bd9Sstevel@tonic-gate #include <vm/hat.h> 89*7c478bd9Sstevel@tonic-gate #include <vm/page.h> 90*7c478bd9Sstevel@tonic-gate #include <vm/pvn.h> 91*7c478bd9Sstevel@tonic-gate #include <vm/as.h> 92*7c478bd9Sstevel@tonic-gate #include <vm/seg.h> 93*7c478bd9Sstevel@tonic-gate #include <vm/seg_map.h> 94*7c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h> 95*7c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 96*7c478bd9Sstevel@tonic-gate #include <vm/rm.h> 97*7c478bd9Sstevel@tonic-gate #include <sys/swap.h> 98*7c478bd9Sstevel@tonic-gate 99*7c478bd9Sstevel@tonic-gate #include <fs/fs_subr.h> 100*7c478bd9Sstevel@tonic-gate 101*7c478bd9Sstevel@tonic-gate static struct instats ins; 102*7c478bd9Sstevel@tonic-gate 103*7c478bd9Sstevel@tonic-gate static int ufs_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t); 104*7c478bd9Sstevel@tonic-gate static int ufs_getpage_miss(struct vnode *, u_offset_t, size_t, struct seg *, 105*7c478bd9Sstevel@tonic-gate caddr_t, struct page **, size_t, enum seg_rw, int); 106*7c478bd9Sstevel@tonic-gate static int ufs_open(struct vnode **, int, struct cred *); 107*7c478bd9Sstevel@tonic-gate static int ufs_close(struct vnode *, int, int, offset_t, struct cred *); 108*7c478bd9Sstevel@tonic-gate static int ufs_read(struct vnode *, struct uio *, int, struct cred *, 109*7c478bd9Sstevel@tonic-gate struct caller_context *); 110*7c478bd9Sstevel@tonic-gate static int ufs_write(struct vnode *, struct uio *, int, struct cred *, 111*7c478bd9Sstevel@tonic-gate struct caller_context *); 112*7c478bd9Sstevel@tonic-gate static int ufs_ioctl(struct vnode *, int, intptr_t, int, struct cred *, int *); 113*7c478bd9Sstevel@tonic-gate static int ufs_getattr(struct vnode *, struct vattr *, int, struct cred *); 114*7c478bd9Sstevel@tonic-gate static int ufs_setattr(struct vnode *, struct vattr *, int, struct cred *, 115*7c478bd9Sstevel@tonic-gate caller_context_t *); 116*7c478bd9Sstevel@tonic-gate static int ufs_access(struct vnode *, int, int, struct cred *); 117*7c478bd9Sstevel@tonic-gate static int ufs_lookup(struct vnode *, char *, struct vnode **, 118*7c478bd9Sstevel@tonic-gate struct pathname *, int, struct vnode *, struct cred *); 119*7c478bd9Sstevel@tonic-gate static int ufs_create(struct vnode *, char *, struct vattr *, enum vcexcl, 120*7c478bd9Sstevel@tonic-gate int, struct vnode **, struct cred *, int); 121*7c478bd9Sstevel@tonic-gate static int ufs_remove(struct vnode *, char *, struct cred *); 122*7c478bd9Sstevel@tonic-gate static int ufs_link(struct vnode *, struct vnode *, char *, struct cred *); 123*7c478bd9Sstevel@tonic-gate static int ufs_rename(struct vnode *, char *, struct vnode *, char *, 124*7c478bd9Sstevel@tonic-gate struct cred *); 125*7c478bd9Sstevel@tonic-gate static int ufs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **, 126*7c478bd9Sstevel@tonic-gate struct cred *); 127*7c478bd9Sstevel@tonic-gate static int ufs_rmdir(struct vnode *, char *, struct vnode *, struct cred *); 128*7c478bd9Sstevel@tonic-gate static int ufs_readdir(struct vnode *, struct uio *, struct cred *, int *); 129*7c478bd9Sstevel@tonic-gate static int ufs_symlink(struct vnode *, char *, struct vattr *, char *, 130*7c478bd9Sstevel@tonic-gate struct cred *); 131*7c478bd9Sstevel@tonic-gate static int ufs_readlink(struct vnode *, struct uio *, struct cred *); 132*7c478bd9Sstevel@tonic-gate static int ufs_fsync(struct vnode *, int, struct cred *); 133*7c478bd9Sstevel@tonic-gate static void ufs_inactive(struct vnode *, struct cred *); 134*7c478bd9Sstevel@tonic-gate static int ufs_fid(struct vnode *, struct fid *); 135*7c478bd9Sstevel@tonic-gate static int ufs_rwlock(struct vnode *, int, caller_context_t *); 136*7c478bd9Sstevel@tonic-gate static void ufs_rwunlock(struct vnode *, int, caller_context_t *); 137*7c478bd9Sstevel@tonic-gate static int ufs_seek(struct vnode *, offset_t, offset_t *); 138*7c478bd9Sstevel@tonic-gate static int ufs_frlock(struct vnode *, int, struct flock64 *, int, offset_t, 139*7c478bd9Sstevel@tonic-gate struct flk_callback *, struct cred *); 140*7c478bd9Sstevel@tonic-gate static int ufs_space(struct vnode *, int, struct flock64 *, int, offset_t, 141*7c478bd9Sstevel@tonic-gate cred_t *, caller_context_t *); 142*7c478bd9Sstevel@tonic-gate static int ufs_getpage(struct vnode *, offset_t, size_t, uint_t *, 143*7c478bd9Sstevel@tonic-gate struct page **, size_t, struct seg *, caddr_t, 144*7c478bd9Sstevel@tonic-gate enum seg_rw, struct cred *); 145*7c478bd9Sstevel@tonic-gate static int ufs_putpage(struct vnode *, offset_t, size_t, int, struct cred *); 146*7c478bd9Sstevel@tonic-gate static int ufs_putpages(struct vnode *, offset_t, size_t, int, struct cred *); 147*7c478bd9Sstevel@tonic-gate static int ufs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t, 148*7c478bd9Sstevel@tonic-gate uchar_t, uchar_t, uint_t, struct cred *); 149*7c478bd9Sstevel@tonic-gate static int ufs_addmap(struct vnode *, offset_t, struct as *, caddr_t, size_t, 150*7c478bd9Sstevel@tonic-gate uchar_t, uchar_t, uint_t, struct cred *); 151*7c478bd9Sstevel@tonic-gate static int ufs_delmap(struct vnode *, offset_t, struct as *, caddr_t, size_t, 152*7c478bd9Sstevel@tonic-gate uint_t, uint_t, uint_t, struct cred *); 153*7c478bd9Sstevel@tonic-gate static int ufs_poll(vnode_t *, short, int, short *, struct pollhead **); 154*7c478bd9Sstevel@tonic-gate static int ufs_dump(vnode_t *, caddr_t, int, int); 155*7c478bd9Sstevel@tonic-gate static int ufs_l_pathconf(struct vnode *, int, ulong_t *, struct cred *); 156*7c478bd9Sstevel@tonic-gate static int ufs_pageio(struct vnode *, struct page *, u_offset_t, size_t, int, 157*7c478bd9Sstevel@tonic-gate struct cred *); 158*7c478bd9Sstevel@tonic-gate static int ufs_dump(vnode_t *, caddr_t, int, int); 159*7c478bd9Sstevel@tonic-gate static int ufs_dumpctl(vnode_t *, int, int *); 160*7c478bd9Sstevel@tonic-gate static daddr32_t *save_dblks(struct inode *, struct ufsvfs *, daddr32_t *, 161*7c478bd9Sstevel@tonic-gate daddr32_t *, int, int); 162*7c478bd9Sstevel@tonic-gate static int ufs_getsecattr(struct vnode *, vsecattr_t *, int, struct cred *); 163*7c478bd9Sstevel@tonic-gate static int ufs_setsecattr(struct vnode *, vsecattr_t *, int, struct cred *); 164*7c478bd9Sstevel@tonic-gate 165*7c478bd9Sstevel@tonic-gate /* 166*7c478bd9Sstevel@tonic-gate * For lockfs: ulockfs begin/end is now inlined in the ufs_xxx functions. 167*7c478bd9Sstevel@tonic-gate * 168*7c478bd9Sstevel@tonic-gate * XXX - ULOCKFS in fs_pathconf and ufs_ioctl is not inlined yet. 169*7c478bd9Sstevel@tonic-gate */ 170*7c478bd9Sstevel@tonic-gate struct vnodeops *ufs_vnodeops; 171*7c478bd9Sstevel@tonic-gate 172*7c478bd9Sstevel@tonic-gate const fs_operation_def_t ufs_vnodeops_template[] = { 173*7c478bd9Sstevel@tonic-gate VOPNAME_OPEN, ufs_open, /* will not be blocked by lockfs */ 174*7c478bd9Sstevel@tonic-gate VOPNAME_CLOSE, ufs_close, /* will not be blocked by lockfs */ 175*7c478bd9Sstevel@tonic-gate VOPNAME_READ, ufs_read, 176*7c478bd9Sstevel@tonic-gate VOPNAME_WRITE, ufs_write, 177*7c478bd9Sstevel@tonic-gate VOPNAME_IOCTL, ufs_ioctl, 178*7c478bd9Sstevel@tonic-gate VOPNAME_GETATTR, ufs_getattr, 179*7c478bd9Sstevel@tonic-gate VOPNAME_SETATTR, ufs_setattr, 180*7c478bd9Sstevel@tonic-gate VOPNAME_ACCESS, ufs_access, 181*7c478bd9Sstevel@tonic-gate VOPNAME_LOOKUP, ufs_lookup, 182*7c478bd9Sstevel@tonic-gate VOPNAME_CREATE, ufs_create, 183*7c478bd9Sstevel@tonic-gate VOPNAME_REMOVE, ufs_remove, 184*7c478bd9Sstevel@tonic-gate VOPNAME_LINK, ufs_link, 185*7c478bd9Sstevel@tonic-gate VOPNAME_RENAME, ufs_rename, 186*7c478bd9Sstevel@tonic-gate VOPNAME_MKDIR, ufs_mkdir, 187*7c478bd9Sstevel@tonic-gate VOPNAME_RMDIR, ufs_rmdir, 188*7c478bd9Sstevel@tonic-gate VOPNAME_READDIR, ufs_readdir, 189*7c478bd9Sstevel@tonic-gate VOPNAME_SYMLINK, ufs_symlink, 190*7c478bd9Sstevel@tonic-gate VOPNAME_READLINK, ufs_readlink, 191*7c478bd9Sstevel@tonic-gate VOPNAME_FSYNC, ufs_fsync, 192*7c478bd9Sstevel@tonic-gate VOPNAME_INACTIVE, (fs_generic_func_p) ufs_inactive, /* not blocked */ 193*7c478bd9Sstevel@tonic-gate VOPNAME_FID, ufs_fid, 194*7c478bd9Sstevel@tonic-gate VOPNAME_RWLOCK, ufs_rwlock, /* not blocked */ 195*7c478bd9Sstevel@tonic-gate VOPNAME_RWUNLOCK, (fs_generic_func_p) ufs_rwunlock, /* not blocked */ 196*7c478bd9Sstevel@tonic-gate VOPNAME_SEEK, ufs_seek, 197*7c478bd9Sstevel@tonic-gate VOPNAME_FRLOCK, ufs_frlock, 198*7c478bd9Sstevel@tonic-gate VOPNAME_SPACE, ufs_space, 199*7c478bd9Sstevel@tonic-gate VOPNAME_GETPAGE, ufs_getpage, 200*7c478bd9Sstevel@tonic-gate VOPNAME_PUTPAGE, ufs_putpage, 201*7c478bd9Sstevel@tonic-gate VOPNAME_MAP, (fs_generic_func_p) ufs_map, 202*7c478bd9Sstevel@tonic-gate VOPNAME_ADDMAP, (fs_generic_func_p) ufs_addmap, /* not blocked */ 203*7c478bd9Sstevel@tonic-gate VOPNAME_DELMAP, ufs_delmap, /* will not be blocked by lockfs */ 204*7c478bd9Sstevel@tonic-gate VOPNAME_POLL, (fs_generic_func_p) ufs_poll, /* not blocked */ 205*7c478bd9Sstevel@tonic-gate VOPNAME_DUMP, ufs_dump, 206*7c478bd9Sstevel@tonic-gate VOPNAME_PATHCONF, ufs_l_pathconf, 207*7c478bd9Sstevel@tonic-gate VOPNAME_PAGEIO, ufs_pageio, 208*7c478bd9Sstevel@tonic-gate VOPNAME_DUMPCTL, ufs_dumpctl, 209*7c478bd9Sstevel@tonic-gate VOPNAME_GETSECATTR, ufs_getsecattr, 210*7c478bd9Sstevel@tonic-gate VOPNAME_SETSECATTR, ufs_setsecattr, 211*7c478bd9Sstevel@tonic-gate VOPNAME_VNEVENT, fs_vnevent_support, 212*7c478bd9Sstevel@tonic-gate NULL, NULL 213*7c478bd9Sstevel@tonic-gate }; 214*7c478bd9Sstevel@tonic-gate 215*7c478bd9Sstevel@tonic-gate #define MAX_BACKFILE_COUNT 9999 216*7c478bd9Sstevel@tonic-gate 217*7c478bd9Sstevel@tonic-gate /* 218*7c478bd9Sstevel@tonic-gate * Created by ufs_dumpctl() to store a file's disk block info into memory. 219*7c478bd9Sstevel@tonic-gate * Used by ufs_dump() to dump data to disk directly. 220*7c478bd9Sstevel@tonic-gate */ 221*7c478bd9Sstevel@tonic-gate struct dump { 222*7c478bd9Sstevel@tonic-gate struct inode *ip; /* the file we contain */ 223*7c478bd9Sstevel@tonic-gate daddr_t fsbs; /* number of blocks stored */ 224*7c478bd9Sstevel@tonic-gate struct timeval32 time; /* time stamp for the struct */ 225*7c478bd9Sstevel@tonic-gate daddr32_t dblk[1]; /* place holder for block info */ 226*7c478bd9Sstevel@tonic-gate }; 227*7c478bd9Sstevel@tonic-gate 228*7c478bd9Sstevel@tonic-gate static struct dump *dump_info = NULL; 229*7c478bd9Sstevel@tonic-gate 230*7c478bd9Sstevel@tonic-gate /* 231*7c478bd9Sstevel@tonic-gate * Previously there was no special action required for ordinary files. 232*7c478bd9Sstevel@tonic-gate * (Devices are handled through the device file system.) 233*7c478bd9Sstevel@tonic-gate * Now we support Large Files and Large File API requires open to 234*7c478bd9Sstevel@tonic-gate * fail if file is large. 235*7c478bd9Sstevel@tonic-gate * We could take care to prevent data corruption 236*7c478bd9Sstevel@tonic-gate * by doing an atomic check of size and truncate if file is opened with 237*7c478bd9Sstevel@tonic-gate * FTRUNC flag set but traditionally this is being done by the vfs/vnode 238*7c478bd9Sstevel@tonic-gate * layers. So taking care of truncation here is a change in the existing 239*7c478bd9Sstevel@tonic-gate * semantics of VOP_OPEN and therefore we chose not to implement any thing 240*7c478bd9Sstevel@tonic-gate * here. The check for the size of the file > 2GB is being done at the 241*7c478bd9Sstevel@tonic-gate * vfs layer in routine vn_open(). 242*7c478bd9Sstevel@tonic-gate */ 243*7c478bd9Sstevel@tonic-gate 244*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 245*7c478bd9Sstevel@tonic-gate static int 246*7c478bd9Sstevel@tonic-gate ufs_open(struct vnode **vpp, int flag, struct cred *cr) 247*7c478bd9Sstevel@tonic-gate { 248*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_OPEN, "ufs_open:vpp %p", vpp); 249*7c478bd9Sstevel@tonic-gate return (0); 250*7c478bd9Sstevel@tonic-gate } 251*7c478bd9Sstevel@tonic-gate 252*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 253*7c478bd9Sstevel@tonic-gate static int 254*7c478bd9Sstevel@tonic-gate ufs_close(struct vnode *vp, int flag, int count, offset_t offset, 255*7c478bd9Sstevel@tonic-gate struct cred *cr) 256*7c478bd9Sstevel@tonic-gate { 257*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_CLOSE, "ufs_close:vp %p", vp); 258*7c478bd9Sstevel@tonic-gate 259*7c478bd9Sstevel@tonic-gate cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 260*7c478bd9Sstevel@tonic-gate cleanshares(vp, ttoproc(curthread)->p_pid); 261*7c478bd9Sstevel@tonic-gate 262*7c478bd9Sstevel@tonic-gate /* 263*7c478bd9Sstevel@tonic-gate * Push partially filled cluster at last close. 264*7c478bd9Sstevel@tonic-gate * ``last close'' is approximated because the dnlc 265*7c478bd9Sstevel@tonic-gate * may have a hold on the vnode. 266*7c478bd9Sstevel@tonic-gate * Checking for VBAD here will also act as a forced umount check. 267*7c478bd9Sstevel@tonic-gate */ 268*7c478bd9Sstevel@tonic-gate if (vp->v_count <= 2 && vp->v_type != VBAD) { 269*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 270*7c478bd9Sstevel@tonic-gate if (ip->i_delaylen) { 271*7c478bd9Sstevel@tonic-gate ins.in_poc.value.ul++; 272*7c478bd9Sstevel@tonic-gate (void) ufs_putpages(vp, ip->i_delayoff, ip->i_delaylen, 273*7c478bd9Sstevel@tonic-gate B_ASYNC | B_FREE, cr); 274*7c478bd9Sstevel@tonic-gate ip->i_delaylen = 0; 275*7c478bd9Sstevel@tonic-gate } 276*7c478bd9Sstevel@tonic-gate } 277*7c478bd9Sstevel@tonic-gate 278*7c478bd9Sstevel@tonic-gate return (0); 279*7c478bd9Sstevel@tonic-gate } 280*7c478bd9Sstevel@tonic-gate 281*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 282*7c478bd9Sstevel@tonic-gate static int 283*7c478bd9Sstevel@tonic-gate ufs_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr, 284*7c478bd9Sstevel@tonic-gate struct caller_context *ct) 285*7c478bd9Sstevel@tonic-gate { 286*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 287*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 288*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp = NULL; 289*7c478bd9Sstevel@tonic-gate int error = 0; 290*7c478bd9Sstevel@tonic-gate int intrans = 0; 291*7c478bd9Sstevel@tonic-gate 292*7c478bd9Sstevel@tonic-gate ASSERT(RW_READ_HELD(&ip->i_rwlock)); 293*7c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_UFS, TR_UFS_READ_START, 294*7c478bd9Sstevel@tonic-gate "ufs_read_start:vp %p uiop %p ioflag %x", 295*7c478bd9Sstevel@tonic-gate vp, uiop, ioflag); 296*7c478bd9Sstevel@tonic-gate 297*7c478bd9Sstevel@tonic-gate /* 298*7c478bd9Sstevel@tonic-gate * Mandatory locking needs to be done before ufs_lockfs_begin() 299*7c478bd9Sstevel@tonic-gate * and TRANS_BEGIN_SYNC() calls since mandatory locks can sleep. 300*7c478bd9Sstevel@tonic-gate */ 301*7c478bd9Sstevel@tonic-gate if (MANDLOCK(vp, ip->i_mode)) { 302*7c478bd9Sstevel@tonic-gate /* 303*7c478bd9Sstevel@tonic-gate * ufs_getattr ends up being called by chklock 304*7c478bd9Sstevel@tonic-gate */ 305*7c478bd9Sstevel@tonic-gate error = chklock(vp, FREAD, uiop->uio_loffset, 306*7c478bd9Sstevel@tonic-gate uiop->uio_resid, uiop->uio_fmode, ct); 307*7c478bd9Sstevel@tonic-gate if (error) 308*7c478bd9Sstevel@tonic-gate goto out; 309*7c478bd9Sstevel@tonic-gate } 310*7c478bd9Sstevel@tonic-gate 311*7c478bd9Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 312*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_READ_MASK); 313*7c478bd9Sstevel@tonic-gate if (error) 314*7c478bd9Sstevel@tonic-gate goto out; 315*7c478bd9Sstevel@tonic-gate 316*7c478bd9Sstevel@tonic-gate /* 317*7c478bd9Sstevel@tonic-gate * In the case that a directory is opened for reading as a file 318*7c478bd9Sstevel@tonic-gate * (eg "cat .") with the O_RSYNC, O_SYNC and O_DSYNC flags set. 319*7c478bd9Sstevel@tonic-gate * The locking order had to be changed to avoid a deadlock with 320*7c478bd9Sstevel@tonic-gate * an update taking place on that directory at the same time. 321*7c478bd9Sstevel@tonic-gate */ 322*7c478bd9Sstevel@tonic-gate if ((ip->i_mode & IFMT) == IFDIR) { 323*7c478bd9Sstevel@tonic-gate 324*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 325*7c478bd9Sstevel@tonic-gate error = rdip(ip, uiop, ioflag, cr); 326*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 327*7c478bd9Sstevel@tonic-gate 328*7c478bd9Sstevel@tonic-gate if (error) { 329*7c478bd9Sstevel@tonic-gate if (ulp) 330*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 331*7c478bd9Sstevel@tonic-gate goto out; 332*7c478bd9Sstevel@tonic-gate } 333*7c478bd9Sstevel@tonic-gate 334*7c478bd9Sstevel@tonic-gate if (ulp && (ioflag & FRSYNC) && (ioflag & (FSYNC | FDSYNC)) && 335*7c478bd9Sstevel@tonic-gate TRANS_ISTRANS(ufsvfsp)) { 336*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 337*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_SYNC(ufsvfsp, TOP_READ_SYNC, TOP_READ_SIZE, 338*7c478bd9Sstevel@tonic-gate error); 339*7c478bd9Sstevel@tonic-gate ASSERT(!error); 340*7c478bd9Sstevel@tonic-gate TRANS_END_SYNC(ufsvfsp, error, TOP_READ_SYNC, 341*7c478bd9Sstevel@tonic-gate TOP_READ_SIZE); 342*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_READER); 343*7c478bd9Sstevel@tonic-gate } 344*7c478bd9Sstevel@tonic-gate } else { 345*7c478bd9Sstevel@tonic-gate /* 346*7c478bd9Sstevel@tonic-gate * Only transact reads to files opened for sync-read and 347*7c478bd9Sstevel@tonic-gate * sync-write on a file system that is not write locked. 348*7c478bd9Sstevel@tonic-gate * 349*7c478bd9Sstevel@tonic-gate * The ``not write locked'' check prevents problems with 350*7c478bd9Sstevel@tonic-gate * enabling/disabling logging on a busy file system. E.g., 351*7c478bd9Sstevel@tonic-gate * logging exists at the beginning of the read but does not 352*7c478bd9Sstevel@tonic-gate * at the end. 353*7c478bd9Sstevel@tonic-gate * 354*7c478bd9Sstevel@tonic-gate */ 355*7c478bd9Sstevel@tonic-gate if (ulp && (ioflag & FRSYNC) && (ioflag & (FSYNC | FDSYNC)) && 356*7c478bd9Sstevel@tonic-gate TRANS_ISTRANS(ufsvfsp)) { 357*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_SYNC(ufsvfsp, TOP_READ_SYNC, TOP_READ_SIZE, 358*7c478bd9Sstevel@tonic-gate error); 359*7c478bd9Sstevel@tonic-gate ASSERT(!error); 360*7c478bd9Sstevel@tonic-gate intrans = 1; 361*7c478bd9Sstevel@tonic-gate } 362*7c478bd9Sstevel@tonic-gate 363*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 364*7c478bd9Sstevel@tonic-gate error = rdip(ip, uiop, ioflag, cr); 365*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 366*7c478bd9Sstevel@tonic-gate 367*7c478bd9Sstevel@tonic-gate if (intrans) { 368*7c478bd9Sstevel@tonic-gate TRANS_END_SYNC(ufsvfsp, error, TOP_READ_SYNC, 369*7c478bd9Sstevel@tonic-gate TOP_READ_SIZE); 370*7c478bd9Sstevel@tonic-gate } 371*7c478bd9Sstevel@tonic-gate } 372*7c478bd9Sstevel@tonic-gate 373*7c478bd9Sstevel@tonic-gate if (ulp) { 374*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 375*7c478bd9Sstevel@tonic-gate } 376*7c478bd9Sstevel@tonic-gate out: 377*7c478bd9Sstevel@tonic-gate 378*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_READ_END, 379*7c478bd9Sstevel@tonic-gate "ufs_read_end:vp %p error %d", vp, error); 380*7c478bd9Sstevel@tonic-gate return (error); 381*7c478bd9Sstevel@tonic-gate } 382*7c478bd9Sstevel@tonic-gate 383*7c478bd9Sstevel@tonic-gate extern int ufs_HW; /* high water mark */ 384*7c478bd9Sstevel@tonic-gate extern int ufs_LW; /* low water mark */ 385*7c478bd9Sstevel@tonic-gate int ufs_WRITES = 1; /* XXX - enable/disable */ 386*7c478bd9Sstevel@tonic-gate int ufs_throttles = 0; /* throttling count */ 387*7c478bd9Sstevel@tonic-gate int ufs_allow_shared_writes = 1; /* directio shared writes */ 388*7c478bd9Sstevel@tonic-gate 389*7c478bd9Sstevel@tonic-gate static int 390*7c478bd9Sstevel@tonic-gate ufs_check_rewrite(struct inode *ip, struct uio *uiop, int ioflag) 391*7c478bd9Sstevel@tonic-gate { 392*7c478bd9Sstevel@tonic-gate 393*7c478bd9Sstevel@tonic-gate /* 394*7c478bd9Sstevel@tonic-gate * Filter to determine if this request is suitable as a 395*7c478bd9Sstevel@tonic-gate * concurrent rewrite. This write must not allocate blocks 396*7c478bd9Sstevel@tonic-gate * by extending the file or filling in holes. No use trying 397*7c478bd9Sstevel@tonic-gate * through FSYNC descriptors as the inode will be synchronously 398*7c478bd9Sstevel@tonic-gate * updated after the write. The uio structure has not yet been 399*7c478bd9Sstevel@tonic-gate * checked for sanity, so assume nothing. 400*7c478bd9Sstevel@tonic-gate */ 401*7c478bd9Sstevel@tonic-gate return (((ip->i_mode & IFMT) == IFREG) && !(ioflag & FAPPEND) && 402*7c478bd9Sstevel@tonic-gate (uiop->uio_loffset >= (offset_t)0) && 403*7c478bd9Sstevel@tonic-gate (uiop->uio_loffset < ip->i_size) && (uiop->uio_resid > 0) && 404*7c478bd9Sstevel@tonic-gate ((ip->i_size - uiop->uio_loffset) >= uiop->uio_resid) && 405*7c478bd9Sstevel@tonic-gate !(ioflag & FSYNC) && !bmap_has_holes(ip) && 406*7c478bd9Sstevel@tonic-gate ufs_allow_shared_writes); 407*7c478bd9Sstevel@tonic-gate } 408*7c478bd9Sstevel@tonic-gate 409*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 410*7c478bd9Sstevel@tonic-gate static int 411*7c478bd9Sstevel@tonic-gate ufs_write(struct vnode *vp, struct uio *uiop, int ioflag, cred_t *cr, 412*7c478bd9Sstevel@tonic-gate caller_context_t *ct) 413*7c478bd9Sstevel@tonic-gate { 414*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 415*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 416*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 417*7c478bd9Sstevel@tonic-gate int retry = 1; 418*7c478bd9Sstevel@tonic-gate int error, resv, resid = 0; 419*7c478bd9Sstevel@tonic-gate int directio_status; 420*7c478bd9Sstevel@tonic-gate int exclusive; 421*7c478bd9Sstevel@tonic-gate long start_resid = uiop->uio_resid; 422*7c478bd9Sstevel@tonic-gate 423*7c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_UFS, TR_UFS_WRITE_START, 424*7c478bd9Sstevel@tonic-gate "ufs_write_start:vp %p uiop %p ioflag %x", 425*7c478bd9Sstevel@tonic-gate vp, uiop, ioflag); 426*7c478bd9Sstevel@tonic-gate 427*7c478bd9Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&ip->i_rwlock)); 428*7c478bd9Sstevel@tonic-gate 429*7c478bd9Sstevel@tonic-gate retry_mandlock: 430*7c478bd9Sstevel@tonic-gate /* 431*7c478bd9Sstevel@tonic-gate * Mandatory locking needs to be done before ufs_lockfs_begin() 432*7c478bd9Sstevel@tonic-gate * and TRANS_BEGIN_[A]SYNC() calls since mandatory locks can sleep. 433*7c478bd9Sstevel@tonic-gate * Check for forced unmounts normally done in ufs_lockfs_begin(). 434*7c478bd9Sstevel@tonic-gate */ 435*7c478bd9Sstevel@tonic-gate if ((ufsvfsp = ip->i_ufsvfs) == NULL) { 436*7c478bd9Sstevel@tonic-gate error = EIO; 437*7c478bd9Sstevel@tonic-gate goto out; 438*7c478bd9Sstevel@tonic-gate } 439*7c478bd9Sstevel@tonic-gate if (MANDLOCK(vp, ip->i_mode)) { 440*7c478bd9Sstevel@tonic-gate 441*7c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&ip->i_rwlock)); 442*7c478bd9Sstevel@tonic-gate 443*7c478bd9Sstevel@tonic-gate /* 444*7c478bd9Sstevel@tonic-gate * ufs_getattr ends up being called by chklock 445*7c478bd9Sstevel@tonic-gate */ 446*7c478bd9Sstevel@tonic-gate error = chklock(vp, FWRITE, uiop->uio_loffset, 447*7c478bd9Sstevel@tonic-gate uiop->uio_resid, uiop->uio_fmode, ct); 448*7c478bd9Sstevel@tonic-gate if (error) 449*7c478bd9Sstevel@tonic-gate goto out; 450*7c478bd9Sstevel@tonic-gate } 451*7c478bd9Sstevel@tonic-gate 452*7c478bd9Sstevel@tonic-gate /* i_rwlock can change in chklock */ 453*7c478bd9Sstevel@tonic-gate exclusive = rw_write_held(&ip->i_rwlock); 454*7c478bd9Sstevel@tonic-gate 455*7c478bd9Sstevel@tonic-gate /* 456*7c478bd9Sstevel@tonic-gate * Check for fast-path special case of directio re-writes. 457*7c478bd9Sstevel@tonic-gate */ 458*7c478bd9Sstevel@tonic-gate if ((ip->i_flag & IDIRECTIO || ufsvfsp->vfs_forcedirectio) && 459*7c478bd9Sstevel@tonic-gate !exclusive && ufs_check_rewrite(ip, uiop, ioflag)) { 460*7c478bd9Sstevel@tonic-gate 461*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_WRITE_MASK); 462*7c478bd9Sstevel@tonic-gate if (error) 463*7c478bd9Sstevel@tonic-gate goto out; 464*7c478bd9Sstevel@tonic-gate 465*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 466*7c478bd9Sstevel@tonic-gate error = ufs_directio_write(ip, uiop, ioflag, 1, cr, 467*7c478bd9Sstevel@tonic-gate &directio_status); 468*7c478bd9Sstevel@tonic-gate if (directio_status == DIRECTIO_SUCCESS) { 469*7c478bd9Sstevel@tonic-gate uint_t i_flag_save; 470*7c478bd9Sstevel@tonic-gate 471*7c478bd9Sstevel@tonic-gate if (start_resid != uiop->uio_resid) 472*7c478bd9Sstevel@tonic-gate error = 0; 473*7c478bd9Sstevel@tonic-gate /* 474*7c478bd9Sstevel@tonic-gate * Special treatment of access times for re-writes. 475*7c478bd9Sstevel@tonic-gate * If IMOD is not already set, then convert it 476*7c478bd9Sstevel@tonic-gate * to IMODACC for this operation. This defers 477*7c478bd9Sstevel@tonic-gate * entering a delta into the log until the inode 478*7c478bd9Sstevel@tonic-gate * is flushed. This mimics what is done for read 479*7c478bd9Sstevel@tonic-gate * operations and inode access time. 480*7c478bd9Sstevel@tonic-gate */ 481*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 482*7c478bd9Sstevel@tonic-gate i_flag_save = ip->i_flag; 483*7c478bd9Sstevel@tonic-gate ip->i_flag |= IUPD | ICHG; 484*7c478bd9Sstevel@tonic-gate ip->i_seq++; 485*7c478bd9Sstevel@tonic-gate ITIMES_NOLOCK(ip); 486*7c478bd9Sstevel@tonic-gate if ((i_flag_save & IMOD) == 0) { 487*7c478bd9Sstevel@tonic-gate ip->i_flag &= ~IMOD; 488*7c478bd9Sstevel@tonic-gate ip->i_flag |= IMODACC; 489*7c478bd9Sstevel@tonic-gate } 490*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 491*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 492*7c478bd9Sstevel@tonic-gate if (ulp) 493*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 494*7c478bd9Sstevel@tonic-gate goto out; 495*7c478bd9Sstevel@tonic-gate } 496*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 497*7c478bd9Sstevel@tonic-gate if (ulp) 498*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 499*7c478bd9Sstevel@tonic-gate } 500*7c478bd9Sstevel@tonic-gate 501*7c478bd9Sstevel@tonic-gate if (!exclusive && !rw_tryupgrade(&ip->i_rwlock)) { 502*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 503*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 504*7c478bd9Sstevel@tonic-gate /* 505*7c478bd9Sstevel@tonic-gate * Mandatory locking could have been enabled 506*7c478bd9Sstevel@tonic-gate * after dropping the i_rwlock. 507*7c478bd9Sstevel@tonic-gate */ 508*7c478bd9Sstevel@tonic-gate if (MANDLOCK(vp, ip->i_mode)) 509*7c478bd9Sstevel@tonic-gate goto retry_mandlock; 510*7c478bd9Sstevel@tonic-gate } 511*7c478bd9Sstevel@tonic-gate 512*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_WRITE_MASK); 513*7c478bd9Sstevel@tonic-gate if (error) 514*7c478bd9Sstevel@tonic-gate goto out; 515*7c478bd9Sstevel@tonic-gate 516*7c478bd9Sstevel@tonic-gate /* 517*7c478bd9Sstevel@tonic-gate * Amount of log space needed for this write 518*7c478bd9Sstevel@tonic-gate */ 519*7c478bd9Sstevel@tonic-gate TRANS_WRITE_RESV(ip, uiop, ulp, &resv, &resid); 520*7c478bd9Sstevel@tonic-gate 521*7c478bd9Sstevel@tonic-gate /* 522*7c478bd9Sstevel@tonic-gate * Throttle writes. 523*7c478bd9Sstevel@tonic-gate */ 524*7c478bd9Sstevel@tonic-gate if (ufs_WRITES && (ip->i_writes > ufs_HW)) { 525*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 526*7c478bd9Sstevel@tonic-gate while (ip->i_writes > ufs_HW) { 527*7c478bd9Sstevel@tonic-gate ufs_throttles++; 528*7c478bd9Sstevel@tonic-gate cv_wait(&ip->i_wrcv, &ip->i_tlock); 529*7c478bd9Sstevel@tonic-gate } 530*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 531*7c478bd9Sstevel@tonic-gate } 532*7c478bd9Sstevel@tonic-gate 533*7c478bd9Sstevel@tonic-gate /* 534*7c478bd9Sstevel@tonic-gate * Enter Transaction 535*7c478bd9Sstevel@tonic-gate */ 536*7c478bd9Sstevel@tonic-gate if (ioflag & (FSYNC|FDSYNC)) { 537*7c478bd9Sstevel@tonic-gate if (ulp) { 538*7c478bd9Sstevel@tonic-gate int terr = 0; 539*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_SYNC(ufsvfsp, TOP_WRITE_SYNC, resv, terr); 540*7c478bd9Sstevel@tonic-gate ASSERT(!terr); 541*7c478bd9Sstevel@tonic-gate } 542*7c478bd9Sstevel@tonic-gate } else { 543*7c478bd9Sstevel@tonic-gate if (ulp) 544*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, TOP_WRITE, resv); 545*7c478bd9Sstevel@tonic-gate } 546*7c478bd9Sstevel@tonic-gate 547*7c478bd9Sstevel@tonic-gate /* 548*7c478bd9Sstevel@tonic-gate * Write the file 549*7c478bd9Sstevel@tonic-gate */ 550*7c478bd9Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 551*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 552*7c478bd9Sstevel@tonic-gate if ((ioflag & FAPPEND) != 0 && (ip->i_mode & IFMT) == IFREG) { 553*7c478bd9Sstevel@tonic-gate /* 554*7c478bd9Sstevel@tonic-gate * In append mode start at end of file. 555*7c478bd9Sstevel@tonic-gate */ 556*7c478bd9Sstevel@tonic-gate uiop->uio_loffset = ip->i_size; 557*7c478bd9Sstevel@tonic-gate } 558*7c478bd9Sstevel@tonic-gate 559*7c478bd9Sstevel@tonic-gate /* 560*7c478bd9Sstevel@tonic-gate * Mild optimisation, don't call ufs_trans_write() unless we have to 561*7c478bd9Sstevel@tonic-gate * Also, suppress file system full messages if we will retry. 562*7c478bd9Sstevel@tonic-gate */ 563*7c478bd9Sstevel@tonic-gate if (retry) 564*7c478bd9Sstevel@tonic-gate ip->i_flag |= IQUIET; 565*7c478bd9Sstevel@tonic-gate if (resid) { 566*7c478bd9Sstevel@tonic-gate TRANS_WRITE(ip, uiop, ioflag, error, ulp, cr, resv, resid); 567*7c478bd9Sstevel@tonic-gate } else { 568*7c478bd9Sstevel@tonic-gate error = wrip(ip, uiop, ioflag, cr); 569*7c478bd9Sstevel@tonic-gate } 570*7c478bd9Sstevel@tonic-gate ip->i_flag &= ~IQUIET; 571*7c478bd9Sstevel@tonic-gate 572*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 573*7c478bd9Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 574*7c478bd9Sstevel@tonic-gate 575*7c478bd9Sstevel@tonic-gate /* 576*7c478bd9Sstevel@tonic-gate * Leave Transaction 577*7c478bd9Sstevel@tonic-gate */ 578*7c478bd9Sstevel@tonic-gate if (ulp) { 579*7c478bd9Sstevel@tonic-gate if (ioflag & (FSYNC|FDSYNC)) { 580*7c478bd9Sstevel@tonic-gate int terr = 0; 581*7c478bd9Sstevel@tonic-gate TRANS_END_SYNC(ufsvfsp, terr, TOP_WRITE_SYNC, resv); 582*7c478bd9Sstevel@tonic-gate if (error == 0) 583*7c478bd9Sstevel@tonic-gate error = terr; 584*7c478bd9Sstevel@tonic-gate } else { 585*7c478bd9Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, TOP_WRITE, resv); 586*7c478bd9Sstevel@tonic-gate } 587*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 588*7c478bd9Sstevel@tonic-gate } 589*7c478bd9Sstevel@tonic-gate out: 590*7c478bd9Sstevel@tonic-gate if ((error == ENOSPC) && retry && TRANS_ISTRANS(ufsvfsp)) { 591*7c478bd9Sstevel@tonic-gate /* 592*7c478bd9Sstevel@tonic-gate * Any blocks tied up in pending deletes? 593*7c478bd9Sstevel@tonic-gate */ 594*7c478bd9Sstevel@tonic-gate ufs_delete_drain_wait(ufsvfsp, 1); 595*7c478bd9Sstevel@tonic-gate retry = 0; 596*7c478bd9Sstevel@tonic-gate goto retry_mandlock; 597*7c478bd9Sstevel@tonic-gate } 598*7c478bd9Sstevel@tonic-gate 599*7c478bd9Sstevel@tonic-gate if (error == ENOSPC && (start_resid != uiop->uio_resid)) 600*7c478bd9Sstevel@tonic-gate error = 0; 601*7c478bd9Sstevel@tonic-gate 602*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_WRITE_END, 603*7c478bd9Sstevel@tonic-gate "ufs_write_end:vp %p error %d", vp, error); 604*7c478bd9Sstevel@tonic-gate return (error); 605*7c478bd9Sstevel@tonic-gate } 606*7c478bd9Sstevel@tonic-gate 607*7c478bd9Sstevel@tonic-gate /* 608*7c478bd9Sstevel@tonic-gate * Don't cache write blocks to files with the sticky bit set. 609*7c478bd9Sstevel@tonic-gate * Used to keep swap files from blowing the page cache on a server. 610*7c478bd9Sstevel@tonic-gate */ 611*7c478bd9Sstevel@tonic-gate int stickyhack = 1; 612*7c478bd9Sstevel@tonic-gate 613*7c478bd9Sstevel@tonic-gate /* 614*7c478bd9Sstevel@tonic-gate * Free behind hacks. The pager is busted. 615*7c478bd9Sstevel@tonic-gate * XXX - need to pass the information down to writedone() in a flag like B_SEQ 616*7c478bd9Sstevel@tonic-gate * or B_FREE_IF_TIGHT_ON_MEMORY. 617*7c478bd9Sstevel@tonic-gate */ 618*7c478bd9Sstevel@tonic-gate int freebehind = 1; 619*7c478bd9Sstevel@tonic-gate int smallfile = 32 * 1024; 620*7c478bd9Sstevel@tonic-gate 621*7c478bd9Sstevel@tonic-gate /* 622*7c478bd9Sstevel@tonic-gate * While we should, in most cases, cache the pages for write, we 623*7c478bd9Sstevel@tonic-gate * may also want to cache the pages for read as long as they are 624*7c478bd9Sstevel@tonic-gate * frequently re-usable. 625*7c478bd9Sstevel@tonic-gate * 626*7c478bd9Sstevel@tonic-gate * If cache_read_ahead = 1, the pages for read will go to the tail 627*7c478bd9Sstevel@tonic-gate * of the cache list when they are released, otherwise go to the head. 628*7c478bd9Sstevel@tonic-gate */ 629*7c478bd9Sstevel@tonic-gate int cache_read_ahead = 0; 630*7c478bd9Sstevel@tonic-gate 631*7c478bd9Sstevel@tonic-gate /* 632*7c478bd9Sstevel@tonic-gate * wrip does the real work of write requests for ufs. 633*7c478bd9Sstevel@tonic-gate */ 634*7c478bd9Sstevel@tonic-gate int 635*7c478bd9Sstevel@tonic-gate wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr) 636*7c478bd9Sstevel@tonic-gate { 637*7c478bd9Sstevel@tonic-gate rlim64_t limit = uio->uio_llimit; 638*7c478bd9Sstevel@tonic-gate u_offset_t off; 639*7c478bd9Sstevel@tonic-gate u_offset_t old_i_size; 640*7c478bd9Sstevel@tonic-gate struct fs *fs; 641*7c478bd9Sstevel@tonic-gate struct vnode *vp; 642*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 643*7c478bd9Sstevel@tonic-gate caddr_t base; 644*7c478bd9Sstevel@tonic-gate long start_resid = uio->uio_resid; /* save starting resid */ 645*7c478bd9Sstevel@tonic-gate long premove_resid; /* resid before uiomove() */ 646*7c478bd9Sstevel@tonic-gate uint_t flags; 647*7c478bd9Sstevel@tonic-gate int newpage; 648*7c478bd9Sstevel@tonic-gate int iupdat_flag, directio_status; 649*7c478bd9Sstevel@tonic-gate int n, on, mapon; 650*7c478bd9Sstevel@tonic-gate int error, pagecreate; 651*7c478bd9Sstevel@tonic-gate int do_dqrwlock; /* drop/reacquire vfs_dqrwlock */ 652*7c478bd9Sstevel@tonic-gate int32_t iblocks; 653*7c478bd9Sstevel@tonic-gate int new_iblocks; 654*7c478bd9Sstevel@tonic-gate 655*7c478bd9Sstevel@tonic-gate /* 656*7c478bd9Sstevel@tonic-gate * ip->i_size is incremented before the uiomove 657*7c478bd9Sstevel@tonic-gate * is done on a write. If the move fails (bad user 658*7c478bd9Sstevel@tonic-gate * address) reset ip->i_size. 659*7c478bd9Sstevel@tonic-gate * The better way would be to increment ip->i_size 660*7c478bd9Sstevel@tonic-gate * only if the uiomove succeeds. 661*7c478bd9Sstevel@tonic-gate */ 662*7c478bd9Sstevel@tonic-gate int i_size_changed = 0; 663*7c478bd9Sstevel@tonic-gate o_mode_t type; 664*7c478bd9Sstevel@tonic-gate int i_seq_needed = 0; 665*7c478bd9Sstevel@tonic-gate 666*7c478bd9Sstevel@tonic-gate vp = ITOV(ip); 667*7c478bd9Sstevel@tonic-gate 668*7c478bd9Sstevel@tonic-gate /* 669*7c478bd9Sstevel@tonic-gate * check for forced unmount - should not happen as 670*7c478bd9Sstevel@tonic-gate * the request passed the lockfs checks. 671*7c478bd9Sstevel@tonic-gate */ 672*7c478bd9Sstevel@tonic-gate if ((ufsvfsp = ip->i_ufsvfs) == NULL) 673*7c478bd9Sstevel@tonic-gate return (EIO); 674*7c478bd9Sstevel@tonic-gate 675*7c478bd9Sstevel@tonic-gate fs = ip->i_fs; 676*7c478bd9Sstevel@tonic-gate 677*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_RWIP_START, 678*7c478bd9Sstevel@tonic-gate "ufs_wrip_start:vp %p", vp); 679*7c478bd9Sstevel@tonic-gate 680*7c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&ip->i_contents)); 681*7c478bd9Sstevel@tonic-gate 682*7c478bd9Sstevel@tonic-gate /* check for valid filetype */ 683*7c478bd9Sstevel@tonic-gate type = ip->i_mode & IFMT; 684*7c478bd9Sstevel@tonic-gate if ((type != IFREG) && (type != IFDIR) && (type != IFATTRDIR) && 685*7c478bd9Sstevel@tonic-gate (type != IFLNK) && (type != IFSHAD)) { 686*7c478bd9Sstevel@tonic-gate return (EIO); 687*7c478bd9Sstevel@tonic-gate } 688*7c478bd9Sstevel@tonic-gate 689*7c478bd9Sstevel@tonic-gate /* 690*7c478bd9Sstevel@tonic-gate * the actual limit of UFS file size 691*7c478bd9Sstevel@tonic-gate * is UFS_MAXOFFSET_T 692*7c478bd9Sstevel@tonic-gate */ 693*7c478bd9Sstevel@tonic-gate if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 694*7c478bd9Sstevel@tonic-gate limit = MAXOFFSET_T; 695*7c478bd9Sstevel@tonic-gate 696*7c478bd9Sstevel@tonic-gate if (uio->uio_loffset >= limit) { 697*7c478bd9Sstevel@tonic-gate proc_t *p = ttoproc(curthread); 698*7c478bd9Sstevel@tonic-gate 699*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_RWIP_END, 700*7c478bd9Sstevel@tonic-gate "ufs_wrip_end:vp %p error %d", vp, EINVAL); 701*7c478bd9Sstevel@tonic-gate 702*7c478bd9Sstevel@tonic-gate mutex_enter(&p->p_lock); 703*7c478bd9Sstevel@tonic-gate (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, 704*7c478bd9Sstevel@tonic-gate p, RCA_UNSAFE_SIGINFO); 705*7c478bd9Sstevel@tonic-gate mutex_exit(&p->p_lock); 706*7c478bd9Sstevel@tonic-gate return (EFBIG); 707*7c478bd9Sstevel@tonic-gate } 708*7c478bd9Sstevel@tonic-gate 709*7c478bd9Sstevel@tonic-gate /* 710*7c478bd9Sstevel@tonic-gate * if largefiles are disallowed, the limit is 711*7c478bd9Sstevel@tonic-gate * the pre-largefiles value of 2GB 712*7c478bd9Sstevel@tonic-gate */ 713*7c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_lfflags & UFS_LARGEFILES) 714*7c478bd9Sstevel@tonic-gate limit = MIN(UFS_MAXOFFSET_T, limit); 715*7c478bd9Sstevel@tonic-gate else 716*7c478bd9Sstevel@tonic-gate limit = MIN(MAXOFF32_T, limit); 717*7c478bd9Sstevel@tonic-gate 718*7c478bd9Sstevel@tonic-gate if (uio->uio_loffset < (offset_t)0) { 719*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_RWIP_END, 720*7c478bd9Sstevel@tonic-gate "ufs_wrip_end:vp %p error %d", vp, EINVAL); 721*7c478bd9Sstevel@tonic-gate return (EINVAL); 722*7c478bd9Sstevel@tonic-gate } 723*7c478bd9Sstevel@tonic-gate if (uio->uio_resid == 0) { 724*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_RWIP_END, 725*7c478bd9Sstevel@tonic-gate "ufs_wrip_end:vp %p error %d", vp, 0); 726*7c478bd9Sstevel@tonic-gate return (0); 727*7c478bd9Sstevel@tonic-gate } 728*7c478bd9Sstevel@tonic-gate 729*7c478bd9Sstevel@tonic-gate if (uio->uio_loffset >= limit) 730*7c478bd9Sstevel@tonic-gate return (EFBIG); 731*7c478bd9Sstevel@tonic-gate 732*7c478bd9Sstevel@tonic-gate ip->i_flag |= INOACC; /* don't update ref time in getpage */ 733*7c478bd9Sstevel@tonic-gate 734*7c478bd9Sstevel@tonic-gate if (ioflag & (FSYNC|FDSYNC)) { 735*7c478bd9Sstevel@tonic-gate ip->i_flag |= ISYNC; 736*7c478bd9Sstevel@tonic-gate iupdat_flag = 1; 737*7c478bd9Sstevel@tonic-gate } 738*7c478bd9Sstevel@tonic-gate /* 739*7c478bd9Sstevel@tonic-gate * Try to go direct 740*7c478bd9Sstevel@tonic-gate */ 741*7c478bd9Sstevel@tonic-gate if (ip->i_flag & IDIRECTIO || ufsvfsp->vfs_forcedirectio) { 742*7c478bd9Sstevel@tonic-gate uio->uio_llimit = limit; 743*7c478bd9Sstevel@tonic-gate error = ufs_directio_write(ip, uio, ioflag, 0, cr, 744*7c478bd9Sstevel@tonic-gate &directio_status); 745*7c478bd9Sstevel@tonic-gate /* 746*7c478bd9Sstevel@tonic-gate * If ufs_directio wrote to the file or set the flags, 747*7c478bd9Sstevel@tonic-gate * we need to update i_seq, but it may be deferred. 748*7c478bd9Sstevel@tonic-gate */ 749*7c478bd9Sstevel@tonic-gate if (start_resid != uio->uio_resid || 750*7c478bd9Sstevel@tonic-gate (ip->i_flag & (ICHG|IUPD))) { 751*7c478bd9Sstevel@tonic-gate i_seq_needed = 1; 752*7c478bd9Sstevel@tonic-gate ip->i_flag |= ISEQ; 753*7c478bd9Sstevel@tonic-gate } 754*7c478bd9Sstevel@tonic-gate if (directio_status == DIRECTIO_SUCCESS) 755*7c478bd9Sstevel@tonic-gate goto out; 756*7c478bd9Sstevel@tonic-gate } 757*7c478bd9Sstevel@tonic-gate 758*7c478bd9Sstevel@tonic-gate /* 759*7c478bd9Sstevel@tonic-gate * Behavior with respect to dropping/reacquiring vfs_dqrwlock: 760*7c478bd9Sstevel@tonic-gate * 761*7c478bd9Sstevel@tonic-gate * o shadow inodes: vfs_dqrwlock is not held at all 762*7c478bd9Sstevel@tonic-gate * o quota updates: vfs_dqrwlock is read or write held 763*7c478bd9Sstevel@tonic-gate * o other updates: vfs_dqrwlock is read held 764*7c478bd9Sstevel@tonic-gate * 765*7c478bd9Sstevel@tonic-gate * The first case is the only one where we do not hold 766*7c478bd9Sstevel@tonic-gate * vfs_dqrwlock at all while entering wrip(). 767*7c478bd9Sstevel@tonic-gate * We must make sure not to downgrade/drop vfs_dqrwlock if we 768*7c478bd9Sstevel@tonic-gate * have it as writer, i.e. if we are updating the quota inode. 769*7c478bd9Sstevel@tonic-gate * There is no potential deadlock scenario in this case as 770*7c478bd9Sstevel@tonic-gate * ufs_getpage() takes care of this and avoids reacquiring 771*7c478bd9Sstevel@tonic-gate * vfs_dqrwlock in that case. 772*7c478bd9Sstevel@tonic-gate * 773*7c478bd9Sstevel@tonic-gate * This check is done here since the above conditions do not change 774*7c478bd9Sstevel@tonic-gate * and we possibly loop below, so save a few cycles. 775*7c478bd9Sstevel@tonic-gate */ 776*7c478bd9Sstevel@tonic-gate if ((type == IFSHAD) || 777*7c478bd9Sstevel@tonic-gate (rw_owner(&ufsvfsp->vfs_dqrwlock) == curthread)) { 778*7c478bd9Sstevel@tonic-gate do_dqrwlock = 0; 779*7c478bd9Sstevel@tonic-gate } else { 780*7c478bd9Sstevel@tonic-gate do_dqrwlock = 1; 781*7c478bd9Sstevel@tonic-gate } 782*7c478bd9Sstevel@tonic-gate 783*7c478bd9Sstevel@tonic-gate /* 784*7c478bd9Sstevel@tonic-gate * Large Files: We cast MAXBMASK to offset_t 785*7c478bd9Sstevel@tonic-gate * inorder to mask out the higher bits. Since offset_t 786*7c478bd9Sstevel@tonic-gate * is a signed value, the high order bit set in MAXBMASK 787*7c478bd9Sstevel@tonic-gate * value makes it do the right thing by having all bits 1 788*7c478bd9Sstevel@tonic-gate * in the higher word. May be removed for _SOLARIS64_. 789*7c478bd9Sstevel@tonic-gate */ 790*7c478bd9Sstevel@tonic-gate 791*7c478bd9Sstevel@tonic-gate fs = ip->i_fs; 792*7c478bd9Sstevel@tonic-gate do { 793*7c478bd9Sstevel@tonic-gate u_offset_t uoff = uio->uio_loffset; 794*7c478bd9Sstevel@tonic-gate off = uoff & (offset_t)MAXBMASK; 795*7c478bd9Sstevel@tonic-gate mapon = (int)(uoff & (offset_t)MAXBOFFSET); 796*7c478bd9Sstevel@tonic-gate on = (int)blkoff(fs, uoff); 797*7c478bd9Sstevel@tonic-gate n = (int)MIN(fs->fs_bsize - on, uio->uio_resid); 798*7c478bd9Sstevel@tonic-gate new_iblocks = 1; 799*7c478bd9Sstevel@tonic-gate 800*7c478bd9Sstevel@tonic-gate if (type == IFREG && uoff + n >= limit) { 801*7c478bd9Sstevel@tonic-gate if (uoff >= limit) { 802*7c478bd9Sstevel@tonic-gate error = EFBIG; 803*7c478bd9Sstevel@tonic-gate goto out; 804*7c478bd9Sstevel@tonic-gate } 805*7c478bd9Sstevel@tonic-gate /* 806*7c478bd9Sstevel@tonic-gate * since uoff + n >= limit, 807*7c478bd9Sstevel@tonic-gate * therefore n >= limit - uoff, and n is an int 808*7c478bd9Sstevel@tonic-gate * so it is safe to cast it to an int 809*7c478bd9Sstevel@tonic-gate */ 810*7c478bd9Sstevel@tonic-gate n = (int)(limit - (rlim64_t)uoff); 811*7c478bd9Sstevel@tonic-gate } 812*7c478bd9Sstevel@tonic-gate if (uoff + n > ip->i_size) { 813*7c478bd9Sstevel@tonic-gate /* 814*7c478bd9Sstevel@tonic-gate * We are extending the length of the file. 815*7c478bd9Sstevel@tonic-gate * bmap is used so that we are sure that 816*7c478bd9Sstevel@tonic-gate * if we need to allocate new blocks, that it 817*7c478bd9Sstevel@tonic-gate * is done here before we up the file size. 818*7c478bd9Sstevel@tonic-gate */ 819*7c478bd9Sstevel@tonic-gate error = bmap_write(ip, uoff, (int)(on + n), 820*7c478bd9Sstevel@tonic-gate mapon == 0, cr); 821*7c478bd9Sstevel@tonic-gate /* 822*7c478bd9Sstevel@tonic-gate * bmap_write never drops i_contents so if 823*7c478bd9Sstevel@tonic-gate * the flags are set it changed the file. 824*7c478bd9Sstevel@tonic-gate */ 825*7c478bd9Sstevel@tonic-gate if (ip->i_flag & (ICHG|IUPD)) { 826*7c478bd9Sstevel@tonic-gate i_seq_needed = 1; 827*7c478bd9Sstevel@tonic-gate ip->i_flag |= ISEQ; 828*7c478bd9Sstevel@tonic-gate } 829*7c478bd9Sstevel@tonic-gate if (error) 830*7c478bd9Sstevel@tonic-gate break; 831*7c478bd9Sstevel@tonic-gate /* 832*7c478bd9Sstevel@tonic-gate * There is a window of vulnerability here. 833*7c478bd9Sstevel@tonic-gate * The sequence of operations: allocate file 834*7c478bd9Sstevel@tonic-gate * system blocks, uiomove the data into pages, 835*7c478bd9Sstevel@tonic-gate * and then update the size of the file in the 836*7c478bd9Sstevel@tonic-gate * inode, must happen atomically. However, due 837*7c478bd9Sstevel@tonic-gate * to current locking constraints, this can not 838*7c478bd9Sstevel@tonic-gate * be done. 839*7c478bd9Sstevel@tonic-gate */ 840*7c478bd9Sstevel@tonic-gate ASSERT(ip->i_writer == NULL); 841*7c478bd9Sstevel@tonic-gate ip->i_writer = curthread; 842*7c478bd9Sstevel@tonic-gate i_size_changed = 1; 843*7c478bd9Sstevel@tonic-gate /* 844*7c478bd9Sstevel@tonic-gate * If we are writing from the beginning of 845*7c478bd9Sstevel@tonic-gate * the mapping, we can just create the 846*7c478bd9Sstevel@tonic-gate * pages without having to read them. 847*7c478bd9Sstevel@tonic-gate */ 848*7c478bd9Sstevel@tonic-gate pagecreate = (mapon == 0); 849*7c478bd9Sstevel@tonic-gate } else if (n == MAXBSIZE) { 850*7c478bd9Sstevel@tonic-gate /* 851*7c478bd9Sstevel@tonic-gate * Going to do a whole mappings worth, 852*7c478bd9Sstevel@tonic-gate * so we can just create the pages w/o 853*7c478bd9Sstevel@tonic-gate * having to read them in. But before 854*7c478bd9Sstevel@tonic-gate * we do that, we need to make sure any 855*7c478bd9Sstevel@tonic-gate * needed blocks are allocated first. 856*7c478bd9Sstevel@tonic-gate */ 857*7c478bd9Sstevel@tonic-gate iblocks = ip->i_blocks; 858*7c478bd9Sstevel@tonic-gate error = bmap_write(ip, uoff, (int)(on + n), 1, cr); 859*7c478bd9Sstevel@tonic-gate /* 860*7c478bd9Sstevel@tonic-gate * bmap_write never drops i_contents so if 861*7c478bd9Sstevel@tonic-gate * the flags are set it changed the file. 862*7c478bd9Sstevel@tonic-gate */ 863*7c478bd9Sstevel@tonic-gate if (ip->i_flag & (ICHG|IUPD)) { 864*7c478bd9Sstevel@tonic-gate i_seq_needed = 1; 865*7c478bd9Sstevel@tonic-gate ip->i_flag |= ISEQ; 866*7c478bd9Sstevel@tonic-gate } 867*7c478bd9Sstevel@tonic-gate if (error) 868*7c478bd9Sstevel@tonic-gate break; 869*7c478bd9Sstevel@tonic-gate pagecreate = 1; 870*7c478bd9Sstevel@tonic-gate /* 871*7c478bd9Sstevel@tonic-gate * check if the new created page needed the 872*7c478bd9Sstevel@tonic-gate * allocation of new disk blocks. 873*7c478bd9Sstevel@tonic-gate */ 874*7c478bd9Sstevel@tonic-gate if (iblocks == ip->i_blocks) 875*7c478bd9Sstevel@tonic-gate new_iblocks = 0; /* no new blocks allocated */ 876*7c478bd9Sstevel@tonic-gate } else { 877*7c478bd9Sstevel@tonic-gate pagecreate = 0; 878*7c478bd9Sstevel@tonic-gate /* 879*7c478bd9Sstevel@tonic-gate * In sync mode flush the indirect blocks which 880*7c478bd9Sstevel@tonic-gate * may have been allocated and not written on 881*7c478bd9Sstevel@tonic-gate * disk. In above cases bmap_write will allocate 882*7c478bd9Sstevel@tonic-gate * in sync mode. 883*7c478bd9Sstevel@tonic-gate */ 884*7c478bd9Sstevel@tonic-gate if (ioflag & (FSYNC|FDSYNC)) { 885*7c478bd9Sstevel@tonic-gate error = ufs_indirblk_sync(ip, uoff); 886*7c478bd9Sstevel@tonic-gate if (error) 887*7c478bd9Sstevel@tonic-gate break; 888*7c478bd9Sstevel@tonic-gate } 889*7c478bd9Sstevel@tonic-gate } 890*7c478bd9Sstevel@tonic-gate 891*7c478bd9Sstevel@tonic-gate /* 892*7c478bd9Sstevel@tonic-gate * At this point we can enter ufs_getpage() in one 893*7c478bd9Sstevel@tonic-gate * of two ways: 894*7c478bd9Sstevel@tonic-gate * 1) segmap_getmapflt() calls ufs_getpage() when the 895*7c478bd9Sstevel@tonic-gate * forcefault parameter is true (pagecreate == 0) 896*7c478bd9Sstevel@tonic-gate * 2) uiomove() causes a page fault. 897*7c478bd9Sstevel@tonic-gate * 898*7c478bd9Sstevel@tonic-gate * We have to drop the contents lock to prevent the VM 899*7c478bd9Sstevel@tonic-gate * system from trying to reaquire it in ufs_getpage() 900*7c478bd9Sstevel@tonic-gate * should the uiomove cause a pagefault. 901*7c478bd9Sstevel@tonic-gate * 902*7c478bd9Sstevel@tonic-gate * We have to drop the reader vfs_dqrwlock here as well. 903*7c478bd9Sstevel@tonic-gate */ 904*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 905*7c478bd9Sstevel@tonic-gate if (do_dqrwlock) { 906*7c478bd9Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock)); 907*7c478bd9Sstevel@tonic-gate ASSERT(!(RW_WRITE_HELD(&ufsvfsp->vfs_dqrwlock))); 908*7c478bd9Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 909*7c478bd9Sstevel@tonic-gate } 910*7c478bd9Sstevel@tonic-gate 911*7c478bd9Sstevel@tonic-gate base = segmap_getmapflt(segkmap, vp, (off + mapon), 912*7c478bd9Sstevel@tonic-gate (uint_t)n, !pagecreate, S_WRITE); 913*7c478bd9Sstevel@tonic-gate 914*7c478bd9Sstevel@tonic-gate /* 915*7c478bd9Sstevel@tonic-gate * segmap_pagecreate() returns 1 if it calls 916*7c478bd9Sstevel@tonic-gate * page_create_va() to allocate any pages. 917*7c478bd9Sstevel@tonic-gate */ 918*7c478bd9Sstevel@tonic-gate newpage = 0; 919*7c478bd9Sstevel@tonic-gate 920*7c478bd9Sstevel@tonic-gate if (pagecreate) 921*7c478bd9Sstevel@tonic-gate newpage = segmap_pagecreate(segkmap, base, 922*7c478bd9Sstevel@tonic-gate (size_t)n, 0); 923*7c478bd9Sstevel@tonic-gate 924*7c478bd9Sstevel@tonic-gate premove_resid = uio->uio_resid; 925*7c478bd9Sstevel@tonic-gate error = uiomove(base + mapon, (long)n, UIO_WRITE, uio); 926*7c478bd9Sstevel@tonic-gate 927*7c478bd9Sstevel@tonic-gate /* 928*7c478bd9Sstevel@tonic-gate * If "newpage" is set, then a new page was created and it 929*7c478bd9Sstevel@tonic-gate * does not contain valid data, so it needs to be initialized 930*7c478bd9Sstevel@tonic-gate * at this point. 931*7c478bd9Sstevel@tonic-gate * Otherwise the page contains old data, which was overwritten 932*7c478bd9Sstevel@tonic-gate * partially or as a whole in uiomove. 933*7c478bd9Sstevel@tonic-gate * If there is only one iovec structure within uio, then 934*7c478bd9Sstevel@tonic-gate * on error uiomove will not be able to update uio->uio_loffset 935*7c478bd9Sstevel@tonic-gate * and we would zero the whole page here! 936*7c478bd9Sstevel@tonic-gate * 937*7c478bd9Sstevel@tonic-gate * If uiomove fails because of an error, the old valid data 938*7c478bd9Sstevel@tonic-gate * is kept instead of filling the rest of the page with zero's. 939*7c478bd9Sstevel@tonic-gate */ 940*7c478bd9Sstevel@tonic-gate if (newpage && 941*7c478bd9Sstevel@tonic-gate uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) { 942*7c478bd9Sstevel@tonic-gate /* 943*7c478bd9Sstevel@tonic-gate * We created pages w/o initializing them completely, 944*7c478bd9Sstevel@tonic-gate * thus we need to zero the part that wasn't set up. 945*7c478bd9Sstevel@tonic-gate * This happens on most EOF write cases and if 946*7c478bd9Sstevel@tonic-gate * we had some sort of error during the uiomove. 947*7c478bd9Sstevel@tonic-gate */ 948*7c478bd9Sstevel@tonic-gate int nzero, nmoved; 949*7c478bd9Sstevel@tonic-gate 950*7c478bd9Sstevel@tonic-gate nmoved = (int)(uio->uio_loffset - (off + mapon)); 951*7c478bd9Sstevel@tonic-gate ASSERT(nmoved >= 0 && nmoved <= n); 952*7c478bd9Sstevel@tonic-gate nzero = roundup(on + n, PAGESIZE) - nmoved; 953*7c478bd9Sstevel@tonic-gate ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE); 954*7c478bd9Sstevel@tonic-gate (void) kzero(base + mapon + nmoved, (uint_t)nzero); 955*7c478bd9Sstevel@tonic-gate } 956*7c478bd9Sstevel@tonic-gate 957*7c478bd9Sstevel@tonic-gate /* 958*7c478bd9Sstevel@tonic-gate * Unlock the pages allocated by page_create_va() 959*7c478bd9Sstevel@tonic-gate * in segmap_pagecreate() 960*7c478bd9Sstevel@tonic-gate */ 961*7c478bd9Sstevel@tonic-gate if (newpage) 962*7c478bd9Sstevel@tonic-gate segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE); 963*7c478bd9Sstevel@tonic-gate 964*7c478bd9Sstevel@tonic-gate /* 965*7c478bd9Sstevel@tonic-gate * If the size of the file changed, then update the 966*7c478bd9Sstevel@tonic-gate * size field in the inode now. This can't be done 967*7c478bd9Sstevel@tonic-gate * before the call to segmap_pageunlock or there is 968*7c478bd9Sstevel@tonic-gate * a potential deadlock with callers to ufs_putpage(). 969*7c478bd9Sstevel@tonic-gate * They will be holding i_contents and trying to lock 970*7c478bd9Sstevel@tonic-gate * a page, while this thread is holding a page locked 971*7c478bd9Sstevel@tonic-gate * and trying to acquire i_contents. 972*7c478bd9Sstevel@tonic-gate */ 973*7c478bd9Sstevel@tonic-gate if (i_size_changed) { 974*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 975*7c478bd9Sstevel@tonic-gate old_i_size = ip->i_size; 976*7c478bd9Sstevel@tonic-gate UFS_SET_ISIZE(uoff + n, ip); 977*7c478bd9Sstevel@tonic-gate TRANS_INODE(ufsvfsp, ip); 978*7c478bd9Sstevel@tonic-gate /* 979*7c478bd9Sstevel@tonic-gate * file has grown larger than 2GB. Set flag 980*7c478bd9Sstevel@tonic-gate * in superblock to indicate this, if it 981*7c478bd9Sstevel@tonic-gate * is not already set. 982*7c478bd9Sstevel@tonic-gate */ 983*7c478bd9Sstevel@tonic-gate if ((ip->i_size > MAXOFF32_T) && 984*7c478bd9Sstevel@tonic-gate !(fs->fs_flags & FSLARGEFILES)) { 985*7c478bd9Sstevel@tonic-gate ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES); 986*7c478bd9Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 987*7c478bd9Sstevel@tonic-gate fs->fs_flags |= FSLARGEFILES; 988*7c478bd9Sstevel@tonic-gate ufs_sbwrite(ufsvfsp); 989*7c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 990*7c478bd9Sstevel@tonic-gate } 991*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 992*7c478bd9Sstevel@tonic-gate ip->i_writer = NULL; 993*7c478bd9Sstevel@tonic-gate cv_broadcast(&ip->i_wrcv); 994*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 995*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 996*7c478bd9Sstevel@tonic-gate } 997*7c478bd9Sstevel@tonic-gate 998*7c478bd9Sstevel@tonic-gate if (error) { 999*7c478bd9Sstevel@tonic-gate /* 1000*7c478bd9Sstevel@tonic-gate * If we failed on a write, we may have already 1001*7c478bd9Sstevel@tonic-gate * allocated file blocks as well as pages. It's 1002*7c478bd9Sstevel@tonic-gate * hard to undo the block allocation, but we must 1003*7c478bd9Sstevel@tonic-gate * be sure to invalidate any pages that may have 1004*7c478bd9Sstevel@tonic-gate * been allocated. 1005*7c478bd9Sstevel@tonic-gate * 1006*7c478bd9Sstevel@tonic-gate * If the page was created without initialization 1007*7c478bd9Sstevel@tonic-gate * then we must check if it should be possible 1008*7c478bd9Sstevel@tonic-gate * to destroy the new page and to keep the old data 1009*7c478bd9Sstevel@tonic-gate * on the disk. 1010*7c478bd9Sstevel@tonic-gate * 1011*7c478bd9Sstevel@tonic-gate * It is possible to destroy the page without 1012*7c478bd9Sstevel@tonic-gate * having to write back its contents only when 1013*7c478bd9Sstevel@tonic-gate * - the size of the file keeps unchanged 1014*7c478bd9Sstevel@tonic-gate * - bmap_write() did not allocate new disk blocks 1015*7c478bd9Sstevel@tonic-gate * it is possible to create big files using "seek" and 1016*7c478bd9Sstevel@tonic-gate * write to the end of the file. A "write" to a 1017*7c478bd9Sstevel@tonic-gate * position before the end of the file would not 1018*7c478bd9Sstevel@tonic-gate * change the size of the file but it would allocate 1019*7c478bd9Sstevel@tonic-gate * new disk blocks. 1020*7c478bd9Sstevel@tonic-gate * - uiomove intended to overwrite the whole page. 1021*7c478bd9Sstevel@tonic-gate * - a new page was created (newpage == 1). 1022*7c478bd9Sstevel@tonic-gate */ 1023*7c478bd9Sstevel@tonic-gate 1024*7c478bd9Sstevel@tonic-gate if (i_size_changed == 0 && new_iblocks == 0 && 1025*7c478bd9Sstevel@tonic-gate newpage) { 1026*7c478bd9Sstevel@tonic-gate 1027*7c478bd9Sstevel@tonic-gate /* unwind what uiomove eventually last did */ 1028*7c478bd9Sstevel@tonic-gate uio->uio_resid = premove_resid; 1029*7c478bd9Sstevel@tonic-gate 1030*7c478bd9Sstevel@tonic-gate /* 1031*7c478bd9Sstevel@tonic-gate * destroy the page, do not write ambiguous 1032*7c478bd9Sstevel@tonic-gate * data to the disk. 1033*7c478bd9Sstevel@tonic-gate */ 1034*7c478bd9Sstevel@tonic-gate flags = SM_DESTROY; 1035*7c478bd9Sstevel@tonic-gate } else { 1036*7c478bd9Sstevel@tonic-gate /* 1037*7c478bd9Sstevel@tonic-gate * write the page back to the disk, if dirty, 1038*7c478bd9Sstevel@tonic-gate * and remove the page from the cache. 1039*7c478bd9Sstevel@tonic-gate */ 1040*7c478bd9Sstevel@tonic-gate flags = SM_INVAL; 1041*7c478bd9Sstevel@tonic-gate } 1042*7c478bd9Sstevel@tonic-gate (void) segmap_release(segkmap, base, flags); 1043*7c478bd9Sstevel@tonic-gate } else { 1044*7c478bd9Sstevel@tonic-gate flags = 0; 1045*7c478bd9Sstevel@tonic-gate /* 1046*7c478bd9Sstevel@tonic-gate * Force write back for synchronous write cases. 1047*7c478bd9Sstevel@tonic-gate */ 1048*7c478bd9Sstevel@tonic-gate if ((ioflag & (FSYNC|FDSYNC)) || type == IFDIR) { 1049*7c478bd9Sstevel@tonic-gate /* 1050*7c478bd9Sstevel@tonic-gate * If the sticky bit is set but the 1051*7c478bd9Sstevel@tonic-gate * execute bit is not set, we do a 1052*7c478bd9Sstevel@tonic-gate * synchronous write back and free 1053*7c478bd9Sstevel@tonic-gate * the page when done. We set up swap 1054*7c478bd9Sstevel@tonic-gate * files to be handled this way to 1055*7c478bd9Sstevel@tonic-gate * prevent servers from keeping around 1056*7c478bd9Sstevel@tonic-gate * the client's swap pages too long. 1057*7c478bd9Sstevel@tonic-gate * XXX - there ought to be a better way. 1058*7c478bd9Sstevel@tonic-gate */ 1059*7c478bd9Sstevel@tonic-gate if (IS_SWAPVP(vp)) { 1060*7c478bd9Sstevel@tonic-gate flags = SM_WRITE | SM_FREE | 1061*7c478bd9Sstevel@tonic-gate SM_DONTNEED; 1062*7c478bd9Sstevel@tonic-gate iupdat_flag = 0; 1063*7c478bd9Sstevel@tonic-gate } else { 1064*7c478bd9Sstevel@tonic-gate flags = SM_WRITE; 1065*7c478bd9Sstevel@tonic-gate } 1066*7c478bd9Sstevel@tonic-gate } else if (n + on == MAXBSIZE || IS_SWAPVP(vp)) { 1067*7c478bd9Sstevel@tonic-gate /* 1068*7c478bd9Sstevel@tonic-gate * Have written a whole block. 1069*7c478bd9Sstevel@tonic-gate * Start an asynchronous write and 1070*7c478bd9Sstevel@tonic-gate * mark the buffer to indicate that 1071*7c478bd9Sstevel@tonic-gate * it won't be needed again soon. 1072*7c478bd9Sstevel@tonic-gate */ 1073*7c478bd9Sstevel@tonic-gate flags = SM_WRITE | SM_ASYNC | SM_DONTNEED; 1074*7c478bd9Sstevel@tonic-gate } 1075*7c478bd9Sstevel@tonic-gate error = segmap_release(segkmap, base, flags); 1076*7c478bd9Sstevel@tonic-gate /* 1077*7c478bd9Sstevel@tonic-gate * If the operation failed and is synchronous, 1078*7c478bd9Sstevel@tonic-gate * then we need to unwind what uiomove() last 1079*7c478bd9Sstevel@tonic-gate * did so we can potentially return an error to 1080*7c478bd9Sstevel@tonic-gate * the caller. If this write operation was 1081*7c478bd9Sstevel@tonic-gate * done in two pieces and the first succeeded, 1082*7c478bd9Sstevel@tonic-gate * then we won't return an error for the second 1083*7c478bd9Sstevel@tonic-gate * piece that failed. However, we only want to 1084*7c478bd9Sstevel@tonic-gate * return a resid value that reflects what was 1085*7c478bd9Sstevel@tonic-gate * really done. 1086*7c478bd9Sstevel@tonic-gate * 1087*7c478bd9Sstevel@tonic-gate * Failures for non-synchronous operations can 1088*7c478bd9Sstevel@tonic-gate * be ignored since the page subsystem will 1089*7c478bd9Sstevel@tonic-gate * retry the operation until it succeeds or the 1090*7c478bd9Sstevel@tonic-gate * file system is unmounted. 1091*7c478bd9Sstevel@tonic-gate */ 1092*7c478bd9Sstevel@tonic-gate if (error) { 1093*7c478bd9Sstevel@tonic-gate if ((ioflag & (FSYNC | FDSYNC)) || 1094*7c478bd9Sstevel@tonic-gate type == IFDIR) { 1095*7c478bd9Sstevel@tonic-gate uio->uio_resid = premove_resid; 1096*7c478bd9Sstevel@tonic-gate } else { 1097*7c478bd9Sstevel@tonic-gate error = 0; 1098*7c478bd9Sstevel@tonic-gate } 1099*7c478bd9Sstevel@tonic-gate } 1100*7c478bd9Sstevel@tonic-gate } 1101*7c478bd9Sstevel@tonic-gate 1102*7c478bd9Sstevel@tonic-gate /* 1103*7c478bd9Sstevel@tonic-gate * Re-acquire contents lock. 1104*7c478bd9Sstevel@tonic-gate * If it was dropped, reacquire reader vfs_dqrwlock as well. 1105*7c478bd9Sstevel@tonic-gate */ 1106*7c478bd9Sstevel@tonic-gate if (do_dqrwlock) 1107*7c478bd9Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 1108*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 1109*7c478bd9Sstevel@tonic-gate 1110*7c478bd9Sstevel@tonic-gate /* 1111*7c478bd9Sstevel@tonic-gate * If the uiomove() failed or if a synchronous 1112*7c478bd9Sstevel@tonic-gate * page push failed, fix up i_size. 1113*7c478bd9Sstevel@tonic-gate */ 1114*7c478bd9Sstevel@tonic-gate if (error) { 1115*7c478bd9Sstevel@tonic-gate if (i_size_changed) { 1116*7c478bd9Sstevel@tonic-gate /* 1117*7c478bd9Sstevel@tonic-gate * The uiomove failed, and we 1118*7c478bd9Sstevel@tonic-gate * allocated blocks,so get rid 1119*7c478bd9Sstevel@tonic-gate * of them. 1120*7c478bd9Sstevel@tonic-gate */ 1121*7c478bd9Sstevel@tonic-gate (void) ufs_itrunc(ip, old_i_size, 0, cr); 1122*7c478bd9Sstevel@tonic-gate } 1123*7c478bd9Sstevel@tonic-gate } else { 1124*7c478bd9Sstevel@tonic-gate /* 1125*7c478bd9Sstevel@tonic-gate * XXX - Can this be out of the loop? 1126*7c478bd9Sstevel@tonic-gate */ 1127*7c478bd9Sstevel@tonic-gate ip->i_flag |= IUPD | ICHG; 1128*7c478bd9Sstevel@tonic-gate /* 1129*7c478bd9Sstevel@tonic-gate * Only do one increase of i_seq for multiple 1130*7c478bd9Sstevel@tonic-gate * pieces. Because we drop locks, record 1131*7c478bd9Sstevel@tonic-gate * the fact that we changed the timestamp and 1132*7c478bd9Sstevel@tonic-gate * are deferring the increase in case another thread 1133*7c478bd9Sstevel@tonic-gate * pushes our timestamp update. 1134*7c478bd9Sstevel@tonic-gate */ 1135*7c478bd9Sstevel@tonic-gate i_seq_needed = 1; 1136*7c478bd9Sstevel@tonic-gate ip->i_flag |= ISEQ; 1137*7c478bd9Sstevel@tonic-gate if (i_size_changed) 1138*7c478bd9Sstevel@tonic-gate ip->i_flag |= IATTCHG; 1139*7c478bd9Sstevel@tonic-gate if ((ip->i_mode & (IEXEC | (IEXEC >> 3) | 1140*7c478bd9Sstevel@tonic-gate (IEXEC >> 6))) != 0 && 1141*7c478bd9Sstevel@tonic-gate (ip->i_mode & (ISUID | ISGID)) != 0 && 1142*7c478bd9Sstevel@tonic-gate secpolicy_vnode_setid_retain(cr, 1143*7c478bd9Sstevel@tonic-gate (ip->i_mode & ISUID) != 0 && ip->i_uid == 0) != 0) { 1144*7c478bd9Sstevel@tonic-gate /* 1145*7c478bd9Sstevel@tonic-gate * Clear Set-UID & Set-GID bits on 1146*7c478bd9Sstevel@tonic-gate * successful write if not privileged 1147*7c478bd9Sstevel@tonic-gate * and at least one of the execute bits 1148*7c478bd9Sstevel@tonic-gate * is set. If we always clear Set-GID, 1149*7c478bd9Sstevel@tonic-gate * mandatory file and record locking is 1150*7c478bd9Sstevel@tonic-gate * unuseable. 1151*7c478bd9Sstevel@tonic-gate */ 1152*7c478bd9Sstevel@tonic-gate ip->i_mode &= ~(ISUID | ISGID); 1153*7c478bd9Sstevel@tonic-gate } 1154*7c478bd9Sstevel@tonic-gate } 1155*7c478bd9Sstevel@tonic-gate TRANS_INODE(ufsvfsp, ip); 1156*7c478bd9Sstevel@tonic-gate } while (error == 0 && uio->uio_resid > 0 && n != 0); 1157*7c478bd9Sstevel@tonic-gate 1158*7c478bd9Sstevel@tonic-gate out: 1159*7c478bd9Sstevel@tonic-gate /* 1160*7c478bd9Sstevel@tonic-gate * Make sure i_seq is increased at least once per write 1161*7c478bd9Sstevel@tonic-gate */ 1162*7c478bd9Sstevel@tonic-gate if (i_seq_needed) { 1163*7c478bd9Sstevel@tonic-gate ip->i_seq++; 1164*7c478bd9Sstevel@tonic-gate ip->i_flag &= ~ISEQ; /* no longer deferred */ 1165*7c478bd9Sstevel@tonic-gate } 1166*7c478bd9Sstevel@tonic-gate 1167*7c478bd9Sstevel@tonic-gate /* 1168*7c478bd9Sstevel@tonic-gate * Inode is updated according to this table - 1169*7c478bd9Sstevel@tonic-gate * 1170*7c478bd9Sstevel@tonic-gate * FSYNC FDSYNC(posix.4) 1171*7c478bd9Sstevel@tonic-gate * -------------------------- 1172*7c478bd9Sstevel@tonic-gate * always@ IATTCHG|IBDWRITE 1173*7c478bd9Sstevel@tonic-gate * 1174*7c478bd9Sstevel@tonic-gate * @ - If we are doing synchronous write the only time we should 1175*7c478bd9Sstevel@tonic-gate * not be sync'ing the ip here is if we have the stickyhack 1176*7c478bd9Sstevel@tonic-gate * activated, the file is marked with the sticky bit and 1177*7c478bd9Sstevel@tonic-gate * no exec bit, the file length has not been changed and 1178*7c478bd9Sstevel@tonic-gate * no new blocks have been allocated during this write. 1179*7c478bd9Sstevel@tonic-gate */ 1180*7c478bd9Sstevel@tonic-gate 1181*7c478bd9Sstevel@tonic-gate if ((ip->i_flag & ISYNC) != 0) { 1182*7c478bd9Sstevel@tonic-gate /* 1183*7c478bd9Sstevel@tonic-gate * we have eliminated nosync 1184*7c478bd9Sstevel@tonic-gate */ 1185*7c478bd9Sstevel@tonic-gate if ((ip->i_flag & (IATTCHG|IBDWRITE)) || 1186*7c478bd9Sstevel@tonic-gate ((ioflag & FSYNC) && iupdat_flag)) { 1187*7c478bd9Sstevel@tonic-gate ufs_iupdat(ip, 1); 1188*7c478bd9Sstevel@tonic-gate } 1189*7c478bd9Sstevel@tonic-gate } 1190*7c478bd9Sstevel@tonic-gate 1191*7c478bd9Sstevel@tonic-gate /* 1192*7c478bd9Sstevel@tonic-gate * If we've already done a partial-write, terminate 1193*7c478bd9Sstevel@tonic-gate * the write but return no error unless the error is ENOSPC 1194*7c478bd9Sstevel@tonic-gate * because the caller can detect this and free resources and 1195*7c478bd9Sstevel@tonic-gate * try again. 1196*7c478bd9Sstevel@tonic-gate */ 1197*7c478bd9Sstevel@tonic-gate if ((start_resid != uio->uio_resid) && (error != ENOSPC)) 1198*7c478bd9Sstevel@tonic-gate error = 0; 1199*7c478bd9Sstevel@tonic-gate 1200*7c478bd9Sstevel@tonic-gate ip->i_flag &= ~(INOACC | ISYNC); 1201*7c478bd9Sstevel@tonic-gate ITIMES_NOLOCK(ip); 1202*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_RWIP_END, 1203*7c478bd9Sstevel@tonic-gate "ufs_wrip_end:vp %p error %d", vp, error); 1204*7c478bd9Sstevel@tonic-gate return (error); 1205*7c478bd9Sstevel@tonic-gate } 1206*7c478bd9Sstevel@tonic-gate 1207*7c478bd9Sstevel@tonic-gate /* 1208*7c478bd9Sstevel@tonic-gate * rdip does the real work of read requests for ufs. 1209*7c478bd9Sstevel@tonic-gate */ 1210*7c478bd9Sstevel@tonic-gate int 1211*7c478bd9Sstevel@tonic-gate rdip(struct inode *ip, struct uio *uio, int ioflag, cred_t *cr) 1212*7c478bd9Sstevel@tonic-gate { 1213*7c478bd9Sstevel@tonic-gate u_offset_t off; 1214*7c478bd9Sstevel@tonic-gate caddr_t base; 1215*7c478bd9Sstevel@tonic-gate struct fs *fs; 1216*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 1217*7c478bd9Sstevel@tonic-gate struct vnode *vp; 1218*7c478bd9Sstevel@tonic-gate long oresid = uio->uio_resid; 1219*7c478bd9Sstevel@tonic-gate u_offset_t n, on, mapon; 1220*7c478bd9Sstevel@tonic-gate int error = 0; 1221*7c478bd9Sstevel@tonic-gate int doupdate = 1; 1222*7c478bd9Sstevel@tonic-gate uint_t flags, cachemode; 1223*7c478bd9Sstevel@tonic-gate int dofree, directio_status; 1224*7c478bd9Sstevel@tonic-gate krw_t rwtype; 1225*7c478bd9Sstevel@tonic-gate o_mode_t type; 1226*7c478bd9Sstevel@tonic-gate 1227*7c478bd9Sstevel@tonic-gate vp = ITOV(ip); 1228*7c478bd9Sstevel@tonic-gate 1229*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_RWIP_START, 1230*7c478bd9Sstevel@tonic-gate "ufs_rdip_start:vp %p", vp); 1231*7c478bd9Sstevel@tonic-gate 1232*7c478bd9Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&ip->i_contents)); 1233*7c478bd9Sstevel@tonic-gate 1234*7c478bd9Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 1235*7c478bd9Sstevel@tonic-gate 1236*7c478bd9Sstevel@tonic-gate if (ufsvfsp == NULL) 1237*7c478bd9Sstevel@tonic-gate return (EIO); 1238*7c478bd9Sstevel@tonic-gate 1239*7c478bd9Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 1240*7c478bd9Sstevel@tonic-gate 1241*7c478bd9Sstevel@tonic-gate /* check for valid filetype */ 1242*7c478bd9Sstevel@tonic-gate type = ip->i_mode & IFMT; 1243*7c478bd9Sstevel@tonic-gate if ((type != IFREG) && (type != IFDIR) && (type != IFATTRDIR) && 1244*7c478bd9Sstevel@tonic-gate (type != IFLNK) && (type != IFSHAD)) { 1245*7c478bd9Sstevel@tonic-gate return (EIO); 1246*7c478bd9Sstevel@tonic-gate } 1247*7c478bd9Sstevel@tonic-gate 1248*7c478bd9Sstevel@tonic-gate if (uio->uio_loffset > UFS_MAXOFFSET_T) { 1249*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_RWIP_END, 1250*7c478bd9Sstevel@tonic-gate "ufs_rdip_end:vp %p error %d", vp, EINVAL); 1251*7c478bd9Sstevel@tonic-gate error = 0; 1252*7c478bd9Sstevel@tonic-gate goto out; 1253*7c478bd9Sstevel@tonic-gate } 1254*7c478bd9Sstevel@tonic-gate if (uio->uio_loffset < (offset_t)0) { 1255*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_RWIP_END, 1256*7c478bd9Sstevel@tonic-gate "ufs_rdip_end:vp %p error %d", vp, EINVAL); 1257*7c478bd9Sstevel@tonic-gate return (EINVAL); 1258*7c478bd9Sstevel@tonic-gate } 1259*7c478bd9Sstevel@tonic-gate if (uio->uio_resid == 0) { 1260*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_RWIP_END, 1261*7c478bd9Sstevel@tonic-gate "ufs_rdip_end:vp %p error %d", vp, 0); 1262*7c478bd9Sstevel@tonic-gate return (0); 1263*7c478bd9Sstevel@tonic-gate } 1264*7c478bd9Sstevel@tonic-gate 1265*7c478bd9Sstevel@tonic-gate if (!ULOCKFS_IS_NOIACC(ITOUL(ip)) && (fs->fs_ronly == 0) && 1266*7c478bd9Sstevel@tonic-gate (!ufsvfsp->vfs_noatime)) { 1267*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 1268*7c478bd9Sstevel@tonic-gate ip->i_flag |= IACC; 1269*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 1270*7c478bd9Sstevel@tonic-gate } 1271*7c478bd9Sstevel@tonic-gate /* 1272*7c478bd9Sstevel@tonic-gate * Try to go direct 1273*7c478bd9Sstevel@tonic-gate */ 1274*7c478bd9Sstevel@tonic-gate if (ip->i_flag & IDIRECTIO || ufsvfsp->vfs_forcedirectio) { 1275*7c478bd9Sstevel@tonic-gate error = ufs_directio_read(ip, uio, cr, &directio_status); 1276*7c478bd9Sstevel@tonic-gate if (directio_status == DIRECTIO_SUCCESS) 1277*7c478bd9Sstevel@tonic-gate goto out; 1278*7c478bd9Sstevel@tonic-gate } 1279*7c478bd9Sstevel@tonic-gate 1280*7c478bd9Sstevel@tonic-gate rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER); 1281*7c478bd9Sstevel@tonic-gate 1282*7c478bd9Sstevel@tonic-gate /* 1283*7c478bd9Sstevel@tonic-gate * If cache_read_ahead is enabled, we will 1284*7c478bd9Sstevel@tonic-gate * release the pages at the tail of the cache 1285*7c478bd9Sstevel@tonic-gate * list, otherwise we will put them at the head. 1286*7c478bd9Sstevel@tonic-gate */ 1287*7c478bd9Sstevel@tonic-gate if (cache_read_ahead) 1288*7c478bd9Sstevel@tonic-gate cachemode = SM_FREE | SM_ASYNC; 1289*7c478bd9Sstevel@tonic-gate else 1290*7c478bd9Sstevel@tonic-gate cachemode = SM_FREE | SM_DONTNEED | SM_ASYNC; 1291*7c478bd9Sstevel@tonic-gate 1292*7c478bd9Sstevel@tonic-gate do { 1293*7c478bd9Sstevel@tonic-gate offset_t diff; 1294*7c478bd9Sstevel@tonic-gate u_offset_t uoff = uio->uio_loffset; 1295*7c478bd9Sstevel@tonic-gate off = uoff & (offset_t)MAXBMASK; 1296*7c478bd9Sstevel@tonic-gate mapon = (u_offset_t)(uoff & (offset_t)MAXBOFFSET); 1297*7c478bd9Sstevel@tonic-gate on = (u_offset_t)blkoff(fs, uoff); 1298*7c478bd9Sstevel@tonic-gate n = MIN((u_offset_t)fs->fs_bsize - on, 1299*7c478bd9Sstevel@tonic-gate (u_offset_t)uio->uio_resid); 1300*7c478bd9Sstevel@tonic-gate 1301*7c478bd9Sstevel@tonic-gate diff = ip->i_size - uoff; 1302*7c478bd9Sstevel@tonic-gate 1303*7c478bd9Sstevel@tonic-gate if (diff <= (offset_t)0) { 1304*7c478bd9Sstevel@tonic-gate error = 0; 1305*7c478bd9Sstevel@tonic-gate goto out; 1306*7c478bd9Sstevel@tonic-gate } 1307*7c478bd9Sstevel@tonic-gate if (diff < (offset_t)n) 1308*7c478bd9Sstevel@tonic-gate n = (int)diff; 1309*7c478bd9Sstevel@tonic-gate dofree = freebehind && 1310*7c478bd9Sstevel@tonic-gate ip->i_nextr == (off & PAGEMASK) && off > smallfile; 1311*7c478bd9Sstevel@tonic-gate 1312*7c478bd9Sstevel@tonic-gate /* 1313*7c478bd9Sstevel@tonic-gate * At this point we can enter ufs_getpage() in one of two 1314*7c478bd9Sstevel@tonic-gate * ways: 1315*7c478bd9Sstevel@tonic-gate * 1) segmap_getmapflt() calls ufs_getpage() when the 1316*7c478bd9Sstevel@tonic-gate * forcefault parameter is true (value of 1 is passed) 1317*7c478bd9Sstevel@tonic-gate * 2) uiomove() causes a page fault. 1318*7c478bd9Sstevel@tonic-gate * 1319*7c478bd9Sstevel@tonic-gate * We cannot hold onto an i_contents reader lock without 1320*7c478bd9Sstevel@tonic-gate * risking deadlock in ufs_getpage() so drop a reader lock. 1321*7c478bd9Sstevel@tonic-gate * The ufs_getpage() dolock logic already allows for a 1322*7c478bd9Sstevel@tonic-gate * thread holding i_contents as writer to work properly 1323*7c478bd9Sstevel@tonic-gate * so we keep a writer lock. 1324*7c478bd9Sstevel@tonic-gate */ 1325*7c478bd9Sstevel@tonic-gate if (rwtype == RW_READER) 1326*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 1327*7c478bd9Sstevel@tonic-gate base = segmap_getmapflt(segkmap, vp, (off + mapon), 1328*7c478bd9Sstevel@tonic-gate (uint_t)n, 1, S_READ); 1329*7c478bd9Sstevel@tonic-gate 1330*7c478bd9Sstevel@tonic-gate error = uiomove(base + mapon, (long)n, UIO_READ, uio); 1331*7c478bd9Sstevel@tonic-gate 1332*7c478bd9Sstevel@tonic-gate flags = 0; 1333*7c478bd9Sstevel@tonic-gate if (!error) { 1334*7c478bd9Sstevel@tonic-gate /* 1335*7c478bd9Sstevel@tonic-gate * If reading sequential we won't need 1336*7c478bd9Sstevel@tonic-gate * this buffer again soon. 1337*7c478bd9Sstevel@tonic-gate */ 1338*7c478bd9Sstevel@tonic-gate if (dofree) { 1339*7c478bd9Sstevel@tonic-gate flags = cachemode; 1340*7c478bd9Sstevel@tonic-gate } 1341*7c478bd9Sstevel@tonic-gate /* 1342*7c478bd9Sstevel@tonic-gate * In POSIX SYNC (FSYNC and FDSYNC) read mode, 1343*7c478bd9Sstevel@tonic-gate * we want to make sure that the page which has 1344*7c478bd9Sstevel@tonic-gate * been read, is written on disk if it is dirty. 1345*7c478bd9Sstevel@tonic-gate * And corresponding indirect blocks should also 1346*7c478bd9Sstevel@tonic-gate * be flushed out. 1347*7c478bd9Sstevel@tonic-gate */ 1348*7c478bd9Sstevel@tonic-gate if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) { 1349*7c478bd9Sstevel@tonic-gate flags &= ~SM_ASYNC; 1350*7c478bd9Sstevel@tonic-gate flags |= SM_WRITE; 1351*7c478bd9Sstevel@tonic-gate } 1352*7c478bd9Sstevel@tonic-gate error = segmap_release(segkmap, base, flags); 1353*7c478bd9Sstevel@tonic-gate } else 1354*7c478bd9Sstevel@tonic-gate (void) segmap_release(segkmap, base, flags); 1355*7c478bd9Sstevel@tonic-gate 1356*7c478bd9Sstevel@tonic-gate if (rwtype == RW_READER) 1357*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, rwtype); 1358*7c478bd9Sstevel@tonic-gate } while (error == 0 && uio->uio_resid > 0 && n != 0); 1359*7c478bd9Sstevel@tonic-gate out: 1360*7c478bd9Sstevel@tonic-gate /* 1361*7c478bd9Sstevel@tonic-gate * Inode is updated according to this table if FRSYNC is set. 1362*7c478bd9Sstevel@tonic-gate * 1363*7c478bd9Sstevel@tonic-gate * FSYNC FDSYNC(posix.4) 1364*7c478bd9Sstevel@tonic-gate * -------------------------- 1365*7c478bd9Sstevel@tonic-gate * always IATTCHG|IBDWRITE 1366*7c478bd9Sstevel@tonic-gate */ 1367*7c478bd9Sstevel@tonic-gate /* 1368*7c478bd9Sstevel@tonic-gate * The inode is not updated if we're logging and the inode is a 1369*7c478bd9Sstevel@tonic-gate * directory with FRSYNC, FSYNC and FDSYNC flags set. 1370*7c478bd9Sstevel@tonic-gate */ 1371*7c478bd9Sstevel@tonic-gate if (ioflag & FRSYNC) { 1372*7c478bd9Sstevel@tonic-gate if (TRANS_ISTRANS(ufsvfsp) && ((ip->i_mode & IFMT) == IFDIR)) { 1373*7c478bd9Sstevel@tonic-gate doupdate = 0; 1374*7c478bd9Sstevel@tonic-gate } 1375*7c478bd9Sstevel@tonic-gate if (doupdate) { 1376*7c478bd9Sstevel@tonic-gate if ((ioflag & FSYNC) || 1377*7c478bd9Sstevel@tonic-gate ((ioflag & FDSYNC) && 1378*7c478bd9Sstevel@tonic-gate (ip->i_flag & (IATTCHG|IBDWRITE)))) { 1379*7c478bd9Sstevel@tonic-gate ufs_iupdat(ip, 1); 1380*7c478bd9Sstevel@tonic-gate } 1381*7c478bd9Sstevel@tonic-gate } 1382*7c478bd9Sstevel@tonic-gate } 1383*7c478bd9Sstevel@tonic-gate /* 1384*7c478bd9Sstevel@tonic-gate * If we've already done a partial read, terminate 1385*7c478bd9Sstevel@tonic-gate * the read but return no error. 1386*7c478bd9Sstevel@tonic-gate */ 1387*7c478bd9Sstevel@tonic-gate if (oresid != uio->uio_resid) 1388*7c478bd9Sstevel@tonic-gate error = 0; 1389*7c478bd9Sstevel@tonic-gate ITIMES(ip); 1390*7c478bd9Sstevel@tonic-gate 1391*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_RWIP_END, 1392*7c478bd9Sstevel@tonic-gate "ufs_rdip_end:vp %p error %d", vp, error); 1393*7c478bd9Sstevel@tonic-gate return (error); 1394*7c478bd9Sstevel@tonic-gate } 1395*7c478bd9Sstevel@tonic-gate 1396*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 1397*7c478bd9Sstevel@tonic-gate static int 1398*7c478bd9Sstevel@tonic-gate ufs_ioctl( 1399*7c478bd9Sstevel@tonic-gate struct vnode *vp, 1400*7c478bd9Sstevel@tonic-gate int cmd, 1401*7c478bd9Sstevel@tonic-gate intptr_t arg, 1402*7c478bd9Sstevel@tonic-gate int flag, 1403*7c478bd9Sstevel@tonic-gate struct cred *cr, 1404*7c478bd9Sstevel@tonic-gate int *rvalp) 1405*7c478bd9Sstevel@tonic-gate { 1406*7c478bd9Sstevel@tonic-gate struct lockfs lockfs, lockfs_out; 1407*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = VTOI(vp)->i_ufsvfs; 1408*7c478bd9Sstevel@tonic-gate char *comment, *original_comment; 1409*7c478bd9Sstevel@tonic-gate struct fs *fs; 1410*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 1411*7c478bd9Sstevel@tonic-gate offset_t off; 1412*7c478bd9Sstevel@tonic-gate extern int maxphys; 1413*7c478bd9Sstevel@tonic-gate int error; 1414*7c478bd9Sstevel@tonic-gate int issync; 1415*7c478bd9Sstevel@tonic-gate int trans_size; 1416*7c478bd9Sstevel@tonic-gate 1417*7c478bd9Sstevel@tonic-gate 1418*7c478bd9Sstevel@tonic-gate /* 1419*7c478bd9Sstevel@tonic-gate * forcibly unmounted 1420*7c478bd9Sstevel@tonic-gate */ 1421*7c478bd9Sstevel@tonic-gate if (ufsvfsp == NULL) { 1422*7c478bd9Sstevel@tonic-gate return (EIO); 1423*7c478bd9Sstevel@tonic-gate } 1424*7c478bd9Sstevel@tonic-gate 1425*7c478bd9Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 1426*7c478bd9Sstevel@tonic-gate 1427*7c478bd9Sstevel@tonic-gate if (cmd == Q_QUOTACTL) { 1428*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_QUOTA_MASK); 1429*7c478bd9Sstevel@tonic-gate if (error) 1430*7c478bd9Sstevel@tonic-gate return (error); 1431*7c478bd9Sstevel@tonic-gate 1432*7c478bd9Sstevel@tonic-gate if (ulp) { 1433*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, TOP_QUOTA, 1434*7c478bd9Sstevel@tonic-gate TOP_SETQUOTA_SIZE(fs)); 1435*7c478bd9Sstevel@tonic-gate } 1436*7c478bd9Sstevel@tonic-gate 1437*7c478bd9Sstevel@tonic-gate error = quotactl(vp, arg, flag, cr); 1438*7c478bd9Sstevel@tonic-gate 1439*7c478bd9Sstevel@tonic-gate if (ulp) { 1440*7c478bd9Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, TOP_QUOTA, 1441*7c478bd9Sstevel@tonic-gate TOP_SETQUOTA_SIZE(fs)); 1442*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 1443*7c478bd9Sstevel@tonic-gate } 1444*7c478bd9Sstevel@tonic-gate return (error); 1445*7c478bd9Sstevel@tonic-gate } 1446*7c478bd9Sstevel@tonic-gate 1447*7c478bd9Sstevel@tonic-gate switch (cmd) { 1448*7c478bd9Sstevel@tonic-gate case _FIOLFS: 1449*7c478bd9Sstevel@tonic-gate /* 1450*7c478bd9Sstevel@tonic-gate * file system locking 1451*7c478bd9Sstevel@tonic-gate */ 1452*7c478bd9Sstevel@tonic-gate if (secpolicy_fs_config(cr, ufsvfsp->vfs_vfs) != 0) 1453*7c478bd9Sstevel@tonic-gate return (EPERM); 1454*7c478bd9Sstevel@tonic-gate 1455*7c478bd9Sstevel@tonic-gate if ((flag & DATAMODEL_MASK) == DATAMODEL_NATIVE) { 1456*7c478bd9Sstevel@tonic-gate if (copyin((caddr_t)arg, &lockfs, 1457*7c478bd9Sstevel@tonic-gate sizeof (struct lockfs))) 1458*7c478bd9Sstevel@tonic-gate return (EFAULT); 1459*7c478bd9Sstevel@tonic-gate } 1460*7c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1461*7c478bd9Sstevel@tonic-gate else { 1462*7c478bd9Sstevel@tonic-gate struct lockfs32 lockfs32; 1463*7c478bd9Sstevel@tonic-gate /* Translate ILP32 lockfs to LP64 lockfs */ 1464*7c478bd9Sstevel@tonic-gate if (copyin((caddr_t)arg, &lockfs32, 1465*7c478bd9Sstevel@tonic-gate sizeof (struct lockfs32))) 1466*7c478bd9Sstevel@tonic-gate return (EFAULT); 1467*7c478bd9Sstevel@tonic-gate lockfs.lf_lock = (ulong_t)lockfs32.lf_lock; 1468*7c478bd9Sstevel@tonic-gate lockfs.lf_flags = (ulong_t)lockfs32.lf_flags; 1469*7c478bd9Sstevel@tonic-gate lockfs.lf_key = (ulong_t)lockfs32.lf_key; 1470*7c478bd9Sstevel@tonic-gate lockfs.lf_comlen = (ulong_t)lockfs32.lf_comlen; 1471*7c478bd9Sstevel@tonic-gate lockfs.lf_comment = 1472*7c478bd9Sstevel@tonic-gate (caddr_t)(uintptr_t)lockfs32.lf_comment; 1473*7c478bd9Sstevel@tonic-gate } 1474*7c478bd9Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 1475*7c478bd9Sstevel@tonic-gate 1476*7c478bd9Sstevel@tonic-gate if (lockfs.lf_comlen) { 1477*7c478bd9Sstevel@tonic-gate if (lockfs.lf_comlen > LOCKFS_MAXCOMMENTLEN) 1478*7c478bd9Sstevel@tonic-gate return (ENAMETOOLONG); 1479*7c478bd9Sstevel@tonic-gate comment = kmem_alloc(lockfs.lf_comlen, 1480*7c478bd9Sstevel@tonic-gate KM_SLEEP); 1481*7c478bd9Sstevel@tonic-gate if (copyin(lockfs.lf_comment, comment, 1482*7c478bd9Sstevel@tonic-gate lockfs.lf_comlen)) { 1483*7c478bd9Sstevel@tonic-gate kmem_free(comment, lockfs.lf_comlen); 1484*7c478bd9Sstevel@tonic-gate return (EFAULT); 1485*7c478bd9Sstevel@tonic-gate } 1486*7c478bd9Sstevel@tonic-gate original_comment = lockfs.lf_comment; 1487*7c478bd9Sstevel@tonic-gate lockfs.lf_comment = comment; 1488*7c478bd9Sstevel@tonic-gate } 1489*7c478bd9Sstevel@tonic-gate if ((error = ufs_fiolfs(vp, &lockfs, 0)) == 0) { 1490*7c478bd9Sstevel@tonic-gate lockfs.lf_comment = original_comment; 1491*7c478bd9Sstevel@tonic-gate 1492*7c478bd9Sstevel@tonic-gate if ((flag & DATAMODEL_MASK) == 1493*7c478bd9Sstevel@tonic-gate DATAMODEL_NATIVE) { 1494*7c478bd9Sstevel@tonic-gate (void) copyout(&lockfs, (caddr_t)arg, 1495*7c478bd9Sstevel@tonic-gate sizeof (struct lockfs)); 1496*7c478bd9Sstevel@tonic-gate } 1497*7c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1498*7c478bd9Sstevel@tonic-gate else { 1499*7c478bd9Sstevel@tonic-gate struct lockfs32 lockfs32; 1500*7c478bd9Sstevel@tonic-gate /* Translate LP64 to ILP32 lockfs */ 1501*7c478bd9Sstevel@tonic-gate lockfs32.lf_lock = 1502*7c478bd9Sstevel@tonic-gate (uint32_t)lockfs.lf_lock; 1503*7c478bd9Sstevel@tonic-gate lockfs32.lf_flags = 1504*7c478bd9Sstevel@tonic-gate (uint32_t)lockfs.lf_flags; 1505*7c478bd9Sstevel@tonic-gate lockfs32.lf_key = 1506*7c478bd9Sstevel@tonic-gate (uint32_t)lockfs.lf_key; 1507*7c478bd9Sstevel@tonic-gate lockfs32.lf_comlen = 1508*7c478bd9Sstevel@tonic-gate (uint32_t)lockfs.lf_comlen; 1509*7c478bd9Sstevel@tonic-gate lockfs32.lf_comment = 1510*7c478bd9Sstevel@tonic-gate (uint32_t)(uintptr_t)lockfs.lf_comment; 1511*7c478bd9Sstevel@tonic-gate (void) copyout(&lockfs32, (caddr_t)arg, 1512*7c478bd9Sstevel@tonic-gate sizeof (struct lockfs32)); 1513*7c478bd9Sstevel@tonic-gate } 1514*7c478bd9Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 1515*7c478bd9Sstevel@tonic-gate 1516*7c478bd9Sstevel@tonic-gate } else { 1517*7c478bd9Sstevel@tonic-gate if (lockfs.lf_comlen) 1518*7c478bd9Sstevel@tonic-gate kmem_free(comment, lockfs.lf_comlen); 1519*7c478bd9Sstevel@tonic-gate } 1520*7c478bd9Sstevel@tonic-gate return (error); 1521*7c478bd9Sstevel@tonic-gate 1522*7c478bd9Sstevel@tonic-gate case _FIOLFSS: 1523*7c478bd9Sstevel@tonic-gate /* 1524*7c478bd9Sstevel@tonic-gate * get file system locking status 1525*7c478bd9Sstevel@tonic-gate */ 1526*7c478bd9Sstevel@tonic-gate 1527*7c478bd9Sstevel@tonic-gate if ((flag & DATAMODEL_MASK) == DATAMODEL_NATIVE) { 1528*7c478bd9Sstevel@tonic-gate if (copyin((caddr_t)arg, &lockfs, 1529*7c478bd9Sstevel@tonic-gate sizeof (struct lockfs))) 1530*7c478bd9Sstevel@tonic-gate return (EFAULT); 1531*7c478bd9Sstevel@tonic-gate } 1532*7c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1533*7c478bd9Sstevel@tonic-gate else { 1534*7c478bd9Sstevel@tonic-gate struct lockfs32 lockfs32; 1535*7c478bd9Sstevel@tonic-gate /* Translate ILP32 lockfs to LP64 lockfs */ 1536*7c478bd9Sstevel@tonic-gate if (copyin((caddr_t)arg, &lockfs32, 1537*7c478bd9Sstevel@tonic-gate sizeof (struct lockfs32))) 1538*7c478bd9Sstevel@tonic-gate return (EFAULT); 1539*7c478bd9Sstevel@tonic-gate lockfs.lf_lock = (ulong_t)lockfs32.lf_lock; 1540*7c478bd9Sstevel@tonic-gate lockfs.lf_flags = (ulong_t)lockfs32.lf_flags; 1541*7c478bd9Sstevel@tonic-gate lockfs.lf_key = (ulong_t)lockfs32.lf_key; 1542*7c478bd9Sstevel@tonic-gate lockfs.lf_comlen = (ulong_t)lockfs32.lf_comlen; 1543*7c478bd9Sstevel@tonic-gate lockfs.lf_comment = 1544*7c478bd9Sstevel@tonic-gate (caddr_t)(uintptr_t)lockfs32.lf_comment; 1545*7c478bd9Sstevel@tonic-gate } 1546*7c478bd9Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 1547*7c478bd9Sstevel@tonic-gate 1548*7c478bd9Sstevel@tonic-gate if (error = ufs_fiolfss(vp, &lockfs_out)) 1549*7c478bd9Sstevel@tonic-gate return (error); 1550*7c478bd9Sstevel@tonic-gate lockfs.lf_lock = lockfs_out.lf_lock; 1551*7c478bd9Sstevel@tonic-gate lockfs.lf_key = lockfs_out.lf_key; 1552*7c478bd9Sstevel@tonic-gate lockfs.lf_flags = lockfs_out.lf_flags; 1553*7c478bd9Sstevel@tonic-gate lockfs.lf_comlen = MIN(lockfs.lf_comlen, 1554*7c478bd9Sstevel@tonic-gate lockfs_out.lf_comlen); 1555*7c478bd9Sstevel@tonic-gate 1556*7c478bd9Sstevel@tonic-gate if ((flag & DATAMODEL_MASK) == DATAMODEL_NATIVE) { 1557*7c478bd9Sstevel@tonic-gate if (copyout(&lockfs, (caddr_t)arg, 1558*7c478bd9Sstevel@tonic-gate sizeof (struct lockfs))) 1559*7c478bd9Sstevel@tonic-gate return (EFAULT); 1560*7c478bd9Sstevel@tonic-gate } 1561*7c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1562*7c478bd9Sstevel@tonic-gate else { 1563*7c478bd9Sstevel@tonic-gate /* Translate LP64 to ILP32 lockfs */ 1564*7c478bd9Sstevel@tonic-gate struct lockfs32 lockfs32; 1565*7c478bd9Sstevel@tonic-gate lockfs32.lf_lock = (uint32_t)lockfs.lf_lock; 1566*7c478bd9Sstevel@tonic-gate lockfs32.lf_flags = (uint32_t)lockfs.lf_flags; 1567*7c478bd9Sstevel@tonic-gate lockfs32.lf_key = (uint32_t)lockfs.lf_key; 1568*7c478bd9Sstevel@tonic-gate lockfs32.lf_comlen = (uint32_t)lockfs.lf_comlen; 1569*7c478bd9Sstevel@tonic-gate lockfs32.lf_comment = 1570*7c478bd9Sstevel@tonic-gate (uint32_t)(uintptr_t)lockfs.lf_comment; 1571*7c478bd9Sstevel@tonic-gate if (copyout(&lockfs32, (caddr_t)arg, 1572*7c478bd9Sstevel@tonic-gate sizeof (struct lockfs32))) 1573*7c478bd9Sstevel@tonic-gate return (EFAULT); 1574*7c478bd9Sstevel@tonic-gate } 1575*7c478bd9Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 1576*7c478bd9Sstevel@tonic-gate 1577*7c478bd9Sstevel@tonic-gate if (lockfs.lf_comlen && 1578*7c478bd9Sstevel@tonic-gate lockfs.lf_comment && lockfs_out.lf_comment) 1579*7c478bd9Sstevel@tonic-gate if (copyout(lockfs_out.lf_comment, 1580*7c478bd9Sstevel@tonic-gate lockfs.lf_comment, 1581*7c478bd9Sstevel@tonic-gate lockfs.lf_comlen)) 1582*7c478bd9Sstevel@tonic-gate return (EFAULT); 1583*7c478bd9Sstevel@tonic-gate return (0); 1584*7c478bd9Sstevel@tonic-gate 1585*7c478bd9Sstevel@tonic-gate case _FIOSATIME: 1586*7c478bd9Sstevel@tonic-gate /* 1587*7c478bd9Sstevel@tonic-gate * set access time 1588*7c478bd9Sstevel@tonic-gate */ 1589*7c478bd9Sstevel@tonic-gate 1590*7c478bd9Sstevel@tonic-gate /* 1591*7c478bd9Sstevel@tonic-gate * if mounted w/o atime, return quietly. 1592*7c478bd9Sstevel@tonic-gate * I briefly thought about returning ENOSYS, but 1593*7c478bd9Sstevel@tonic-gate * figured that most apps would consider this fatal 1594*7c478bd9Sstevel@tonic-gate * but the idea is to make this as seamless as poss. 1595*7c478bd9Sstevel@tonic-gate */ 1596*7c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_noatime) 1597*7c478bd9Sstevel@tonic-gate return (0); 1598*7c478bd9Sstevel@tonic-gate 1599*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, 1600*7c478bd9Sstevel@tonic-gate ULOCKFS_SETATTR_MASK); 1601*7c478bd9Sstevel@tonic-gate if (error) 1602*7c478bd9Sstevel@tonic-gate return (error); 1603*7c478bd9Sstevel@tonic-gate 1604*7c478bd9Sstevel@tonic-gate if (ulp) { 1605*7c478bd9Sstevel@tonic-gate trans_size = (int)TOP_SETATTR_SIZE(VTOI(vp)); 1606*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_CSYNC(ufsvfsp, issync, 1607*7c478bd9Sstevel@tonic-gate TOP_SETATTR, trans_size); 1608*7c478bd9Sstevel@tonic-gate } 1609*7c478bd9Sstevel@tonic-gate 1610*7c478bd9Sstevel@tonic-gate error = ufs_fiosatime(vp, (struct timeval *)arg, 1611*7c478bd9Sstevel@tonic-gate flag, cr); 1612*7c478bd9Sstevel@tonic-gate 1613*7c478bd9Sstevel@tonic-gate if (ulp) { 1614*7c478bd9Sstevel@tonic-gate TRANS_END_CSYNC(ufsvfsp, error, issync, 1615*7c478bd9Sstevel@tonic-gate TOP_SETATTR, trans_size); 1616*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 1617*7c478bd9Sstevel@tonic-gate } 1618*7c478bd9Sstevel@tonic-gate return (error); 1619*7c478bd9Sstevel@tonic-gate 1620*7c478bd9Sstevel@tonic-gate case _FIOSDIO: 1621*7c478bd9Sstevel@tonic-gate /* 1622*7c478bd9Sstevel@tonic-gate * set delayed-io 1623*7c478bd9Sstevel@tonic-gate */ 1624*7c478bd9Sstevel@tonic-gate return (ufs_fiosdio(vp, (uint_t *)arg, flag, cr)); 1625*7c478bd9Sstevel@tonic-gate 1626*7c478bd9Sstevel@tonic-gate case _FIOGDIO: 1627*7c478bd9Sstevel@tonic-gate /* 1628*7c478bd9Sstevel@tonic-gate * get delayed-io 1629*7c478bd9Sstevel@tonic-gate */ 1630*7c478bd9Sstevel@tonic-gate return (ufs_fiogdio(vp, (uint_t *)arg, flag, cr)); 1631*7c478bd9Sstevel@tonic-gate 1632*7c478bd9Sstevel@tonic-gate case _FIOIO: 1633*7c478bd9Sstevel@tonic-gate /* 1634*7c478bd9Sstevel@tonic-gate * inode open 1635*7c478bd9Sstevel@tonic-gate */ 1636*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, 1637*7c478bd9Sstevel@tonic-gate ULOCKFS_VGET_MASK); 1638*7c478bd9Sstevel@tonic-gate if (error) 1639*7c478bd9Sstevel@tonic-gate return (error); 1640*7c478bd9Sstevel@tonic-gate 1641*7c478bd9Sstevel@tonic-gate error = ufs_fioio(vp, (struct fioio *)arg, flag, cr); 1642*7c478bd9Sstevel@tonic-gate 1643*7c478bd9Sstevel@tonic-gate if (ulp) { 1644*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 1645*7c478bd9Sstevel@tonic-gate } 1646*7c478bd9Sstevel@tonic-gate return (error); 1647*7c478bd9Sstevel@tonic-gate 1648*7c478bd9Sstevel@tonic-gate case _FIOFFS: 1649*7c478bd9Sstevel@tonic-gate /* 1650*7c478bd9Sstevel@tonic-gate * file system flush (push w/invalidate) 1651*7c478bd9Sstevel@tonic-gate */ 1652*7c478bd9Sstevel@tonic-gate if ((caddr_t)arg != NULL) 1653*7c478bd9Sstevel@tonic-gate return (EINVAL); 1654*7c478bd9Sstevel@tonic-gate return (ufs_fioffs(vp, NULL, cr)); 1655*7c478bd9Sstevel@tonic-gate 1656*7c478bd9Sstevel@tonic-gate case _FIOISBUSY: 1657*7c478bd9Sstevel@tonic-gate /* 1658*7c478bd9Sstevel@tonic-gate * Contract-private interface for Legato 1659*7c478bd9Sstevel@tonic-gate * Purge this vnode from the DNLC and decide 1660*7c478bd9Sstevel@tonic-gate * if this vnode is busy (*arg == 1) or not 1661*7c478bd9Sstevel@tonic-gate * (*arg == 0) 1662*7c478bd9Sstevel@tonic-gate */ 1663*7c478bd9Sstevel@tonic-gate if (secpolicy_fs_config(cr, ufsvfsp->vfs_vfs) != 0) 1664*7c478bd9Sstevel@tonic-gate return (EPERM); 1665*7c478bd9Sstevel@tonic-gate error = ufs_fioisbusy(vp, (int *)arg, cr); 1666*7c478bd9Sstevel@tonic-gate return (error); 1667*7c478bd9Sstevel@tonic-gate 1668*7c478bd9Sstevel@tonic-gate case _FIODIRECTIO: 1669*7c478bd9Sstevel@tonic-gate return (ufs_fiodirectio(vp, (int)arg, cr)); 1670*7c478bd9Sstevel@tonic-gate 1671*7c478bd9Sstevel@tonic-gate case _FIOTUNE: 1672*7c478bd9Sstevel@tonic-gate /* 1673*7c478bd9Sstevel@tonic-gate * Tune the file system (aka setting fs attributes) 1674*7c478bd9Sstevel@tonic-gate */ 1675*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, 1676*7c478bd9Sstevel@tonic-gate ULOCKFS_SETATTR_MASK); 1677*7c478bd9Sstevel@tonic-gate if (error) 1678*7c478bd9Sstevel@tonic-gate return (error); 1679*7c478bd9Sstevel@tonic-gate 1680*7c478bd9Sstevel@tonic-gate error = ufs_fiotune(vp, (struct fiotune *)arg, cr); 1681*7c478bd9Sstevel@tonic-gate 1682*7c478bd9Sstevel@tonic-gate if (ulp) 1683*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 1684*7c478bd9Sstevel@tonic-gate return (error); 1685*7c478bd9Sstevel@tonic-gate 1686*7c478bd9Sstevel@tonic-gate case _FIOLOGENABLE: 1687*7c478bd9Sstevel@tonic-gate if (secpolicy_fs_config(cr, ufsvfsp->vfs_vfs) != 0) 1688*7c478bd9Sstevel@tonic-gate return (EPERM); 1689*7c478bd9Sstevel@tonic-gate return (ufs_fiologenable(vp, (void *)arg, cr, flag)); 1690*7c478bd9Sstevel@tonic-gate 1691*7c478bd9Sstevel@tonic-gate case _FIOLOGDISABLE: 1692*7c478bd9Sstevel@tonic-gate if (secpolicy_fs_config(cr, ufsvfsp->vfs_vfs) != 0) 1693*7c478bd9Sstevel@tonic-gate return (EPERM); 1694*7c478bd9Sstevel@tonic-gate return (ufs_fiologdisable(vp, (void *)arg, cr, flag)); 1695*7c478bd9Sstevel@tonic-gate 1696*7c478bd9Sstevel@tonic-gate case _FIOISLOG: 1697*7c478bd9Sstevel@tonic-gate return (ufs_fioislog(vp, (void *)arg, cr, flag)); 1698*7c478bd9Sstevel@tonic-gate 1699*7c478bd9Sstevel@tonic-gate case _FIOSNAPSHOTCREATE_MULTI: 1700*7c478bd9Sstevel@tonic-gate { 1701*7c478bd9Sstevel@tonic-gate struct fiosnapcreate_multi fc, *fcp; 1702*7c478bd9Sstevel@tonic-gate size_t fcm_size; 1703*7c478bd9Sstevel@tonic-gate 1704*7c478bd9Sstevel@tonic-gate if (copyin((void *)arg, &fc, sizeof (fc))) 1705*7c478bd9Sstevel@tonic-gate return (EFAULT); 1706*7c478bd9Sstevel@tonic-gate if (fc.backfilecount > MAX_BACKFILE_COUNT) 1707*7c478bd9Sstevel@tonic-gate return (EINVAL); 1708*7c478bd9Sstevel@tonic-gate fcm_size = sizeof (struct fiosnapcreate_multi) + 1709*7c478bd9Sstevel@tonic-gate (fc.backfilecount - 1) * sizeof (int); 1710*7c478bd9Sstevel@tonic-gate fcp = (struct fiosnapcreate_multi *) 1711*7c478bd9Sstevel@tonic-gate kmem_alloc(fcm_size, KM_SLEEP); 1712*7c478bd9Sstevel@tonic-gate if (copyin((void *)arg, fcp, fcm_size)) { 1713*7c478bd9Sstevel@tonic-gate kmem_free(fcp, fcm_size); 1714*7c478bd9Sstevel@tonic-gate return (EFAULT); 1715*7c478bd9Sstevel@tonic-gate } 1716*7c478bd9Sstevel@tonic-gate error = ufs_snap_create(vp, fcp, cr); 1717*7c478bd9Sstevel@tonic-gate if (!error && copyout(fcp, (void *)arg, fcm_size)) 1718*7c478bd9Sstevel@tonic-gate error = EFAULT; 1719*7c478bd9Sstevel@tonic-gate kmem_free(fcp, fcm_size); 1720*7c478bd9Sstevel@tonic-gate return (error); 1721*7c478bd9Sstevel@tonic-gate } 1722*7c478bd9Sstevel@tonic-gate 1723*7c478bd9Sstevel@tonic-gate case _FIOSNAPSHOTDELETE: 1724*7c478bd9Sstevel@tonic-gate { 1725*7c478bd9Sstevel@tonic-gate struct fiosnapdelete fc; 1726*7c478bd9Sstevel@tonic-gate 1727*7c478bd9Sstevel@tonic-gate if (copyin((void *)arg, &fc, sizeof (fc))) 1728*7c478bd9Sstevel@tonic-gate return (EFAULT); 1729*7c478bd9Sstevel@tonic-gate error = ufs_snap_delete(vp, &fc, cr); 1730*7c478bd9Sstevel@tonic-gate if (!error && copyout(&fc, (void *)arg, sizeof (fc))) 1731*7c478bd9Sstevel@tonic-gate error = EFAULT; 1732*7c478bd9Sstevel@tonic-gate return (error); 1733*7c478bd9Sstevel@tonic-gate } 1734*7c478bd9Sstevel@tonic-gate 1735*7c478bd9Sstevel@tonic-gate case _FIOGETSUPERBLOCK: 1736*7c478bd9Sstevel@tonic-gate if (copyout(fs, (void *)arg, SBSIZE)) 1737*7c478bd9Sstevel@tonic-gate return (EFAULT); 1738*7c478bd9Sstevel@tonic-gate return (0); 1739*7c478bd9Sstevel@tonic-gate 1740*7c478bd9Sstevel@tonic-gate case _FIOGETMAXPHYS: 1741*7c478bd9Sstevel@tonic-gate if (copyout(&maxphys, (void *)arg, sizeof (maxphys))) 1742*7c478bd9Sstevel@tonic-gate return (EFAULT); 1743*7c478bd9Sstevel@tonic-gate return (0); 1744*7c478bd9Sstevel@tonic-gate 1745*7c478bd9Sstevel@tonic-gate /* 1746*7c478bd9Sstevel@tonic-gate * The following 3 ioctls are for TSufs support 1747*7c478bd9Sstevel@tonic-gate * although could potentially be used elsewhere 1748*7c478bd9Sstevel@tonic-gate */ 1749*7c478bd9Sstevel@tonic-gate case _FIO_SET_LUFS_DEBUG: 1750*7c478bd9Sstevel@tonic-gate if (secpolicy_fs_config(cr, ufsvfsp->vfs_vfs) != 0) 1751*7c478bd9Sstevel@tonic-gate return (EPERM); 1752*7c478bd9Sstevel@tonic-gate lufs_debug = (uint32_t)arg; 1753*7c478bd9Sstevel@tonic-gate return (0); 1754*7c478bd9Sstevel@tonic-gate 1755*7c478bd9Sstevel@tonic-gate case _FIO_SET_LUFS_ERROR: 1756*7c478bd9Sstevel@tonic-gate if (secpolicy_fs_config(cr, ufsvfsp->vfs_vfs) != 0) 1757*7c478bd9Sstevel@tonic-gate return (EPERM); 1758*7c478bd9Sstevel@tonic-gate TRANS_SETERROR(ufsvfsp); 1759*7c478bd9Sstevel@tonic-gate return (0); 1760*7c478bd9Sstevel@tonic-gate 1761*7c478bd9Sstevel@tonic-gate case _FIO_GET_TOP_STATS: 1762*7c478bd9Sstevel@tonic-gate { 1763*7c478bd9Sstevel@tonic-gate fio_lufs_stats_t *ls; 1764*7c478bd9Sstevel@tonic-gate ml_unit_t *ul = ufsvfsp->vfs_log; 1765*7c478bd9Sstevel@tonic-gate 1766*7c478bd9Sstevel@tonic-gate ls = kmem_zalloc(sizeof (*ls), KM_SLEEP); 1767*7c478bd9Sstevel@tonic-gate ls->ls_debug = ul->un_debug; /* return debug value */ 1768*7c478bd9Sstevel@tonic-gate /* Copy stucture if statistics are being kept */ 1769*7c478bd9Sstevel@tonic-gate if (ul->un_logmap->mtm_tops) { 1770*7c478bd9Sstevel@tonic-gate ls->ls_topstats = *(ul->un_logmap->mtm_tops); 1771*7c478bd9Sstevel@tonic-gate } 1772*7c478bd9Sstevel@tonic-gate error = 0; 1773*7c478bd9Sstevel@tonic-gate if (copyout(ls, (void *)arg, sizeof (*ls))) 1774*7c478bd9Sstevel@tonic-gate error = EFAULT; 1775*7c478bd9Sstevel@tonic-gate kmem_free(ls, sizeof (*ls)); 1776*7c478bd9Sstevel@tonic-gate return (error); 1777*7c478bd9Sstevel@tonic-gate } 1778*7c478bd9Sstevel@tonic-gate 1779*7c478bd9Sstevel@tonic-gate case _FIO_SEEK_DATA: 1780*7c478bd9Sstevel@tonic-gate case _FIO_SEEK_HOLE: 1781*7c478bd9Sstevel@tonic-gate if (ddi_copyin((void *)arg, &off, sizeof (off), flag)) 1782*7c478bd9Sstevel@tonic-gate return (EFAULT); 1783*7c478bd9Sstevel@tonic-gate /* offset paramater is in/out */ 1784*7c478bd9Sstevel@tonic-gate error = ufs_fio_holey(vp, cmd, &off); 1785*7c478bd9Sstevel@tonic-gate if (error) 1786*7c478bd9Sstevel@tonic-gate return (error); 1787*7c478bd9Sstevel@tonic-gate if (ddi_copyout(&off, (void *)arg, sizeof (off), flag)) 1788*7c478bd9Sstevel@tonic-gate return (EFAULT); 1789*7c478bd9Sstevel@tonic-gate return (0); 1790*7c478bd9Sstevel@tonic-gate 1791*7c478bd9Sstevel@tonic-gate default: 1792*7c478bd9Sstevel@tonic-gate return (ENOTTY); 1793*7c478bd9Sstevel@tonic-gate } 1794*7c478bd9Sstevel@tonic-gate } 1795*7c478bd9Sstevel@tonic-gate 1796*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 1797*7c478bd9Sstevel@tonic-gate static int 1798*7c478bd9Sstevel@tonic-gate ufs_getattr(struct vnode *vp, struct vattr *vap, int flags, 1799*7c478bd9Sstevel@tonic-gate struct cred *cr) 1800*7c478bd9Sstevel@tonic-gate { 1801*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 1802*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 1803*7c478bd9Sstevel@tonic-gate int err; 1804*7c478bd9Sstevel@tonic-gate 1805*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_GETATTR_START, 1806*7c478bd9Sstevel@tonic-gate "ufs_getattr_start:vp %p flags %x", vp, flags); 1807*7c478bd9Sstevel@tonic-gate 1808*7c478bd9Sstevel@tonic-gate if (vap->va_mask == AT_SIZE) { 1809*7c478bd9Sstevel@tonic-gate /* 1810*7c478bd9Sstevel@tonic-gate * for performance, if only the size is requested don't bother 1811*7c478bd9Sstevel@tonic-gate * with anything else. 1812*7c478bd9Sstevel@tonic-gate */ 1813*7c478bd9Sstevel@tonic-gate UFS_GET_ISIZE(&vap->va_size, ip); 1814*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_GETATTR_END, 1815*7c478bd9Sstevel@tonic-gate "ufs_getattr_end:vp %p", vp); 1816*7c478bd9Sstevel@tonic-gate return (0); 1817*7c478bd9Sstevel@tonic-gate } 1818*7c478bd9Sstevel@tonic-gate 1819*7c478bd9Sstevel@tonic-gate /* 1820*7c478bd9Sstevel@tonic-gate * inlined lockfs checks 1821*7c478bd9Sstevel@tonic-gate */ 1822*7c478bd9Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 1823*7c478bd9Sstevel@tonic-gate if ((ufsvfsp == NULL) || ULOCKFS_IS_HLOCK(&ufsvfsp->vfs_ulockfs)) { 1824*7c478bd9Sstevel@tonic-gate err = EIO; 1825*7c478bd9Sstevel@tonic-gate goto out; 1826*7c478bd9Sstevel@tonic-gate } 1827*7c478bd9Sstevel@tonic-gate 1828*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 1829*7c478bd9Sstevel@tonic-gate /* 1830*7c478bd9Sstevel@tonic-gate * Return all the attributes. This should be refined so 1831*7c478bd9Sstevel@tonic-gate * that it only returns what's asked for. 1832*7c478bd9Sstevel@tonic-gate */ 1833*7c478bd9Sstevel@tonic-gate 1834*7c478bd9Sstevel@tonic-gate /* 1835*7c478bd9Sstevel@tonic-gate * Copy from inode table. 1836*7c478bd9Sstevel@tonic-gate */ 1837*7c478bd9Sstevel@tonic-gate vap->va_type = vp->v_type; 1838*7c478bd9Sstevel@tonic-gate vap->va_mode = ip->i_mode & MODEMASK; 1839*7c478bd9Sstevel@tonic-gate /* 1840*7c478bd9Sstevel@tonic-gate * If there is an ACL and there is a mask entry, then do the 1841*7c478bd9Sstevel@tonic-gate * extra work that completes the equivalent of an acltomode(3) 1842*7c478bd9Sstevel@tonic-gate * call. According to POSIX P1003.1e, the acl mask should be 1843*7c478bd9Sstevel@tonic-gate * returned in the group permissions field. 1844*7c478bd9Sstevel@tonic-gate * 1845*7c478bd9Sstevel@tonic-gate * - start with the original permission and mode bits (from above) 1846*7c478bd9Sstevel@tonic-gate * - clear the group owner bits 1847*7c478bd9Sstevel@tonic-gate * - add in the mask bits. 1848*7c478bd9Sstevel@tonic-gate */ 1849*7c478bd9Sstevel@tonic-gate if (ip->i_ufs_acl && ip->i_ufs_acl->aclass.acl_ismask) { 1850*7c478bd9Sstevel@tonic-gate vap->va_mode &= ~((VREAD | VWRITE | VEXEC) >> 3); 1851*7c478bd9Sstevel@tonic-gate vap->va_mode |= 1852*7c478bd9Sstevel@tonic-gate (ip->i_ufs_acl->aclass.acl_maskbits & PERMMASK) << 3; 1853*7c478bd9Sstevel@tonic-gate } 1854*7c478bd9Sstevel@tonic-gate vap->va_uid = ip->i_uid; 1855*7c478bd9Sstevel@tonic-gate vap->va_gid = ip->i_gid; 1856*7c478bd9Sstevel@tonic-gate vap->va_fsid = ip->i_dev; 1857*7c478bd9Sstevel@tonic-gate vap->va_nodeid = (ino64_t)ip->i_number; 1858*7c478bd9Sstevel@tonic-gate vap->va_nlink = ip->i_nlink; 1859*7c478bd9Sstevel@tonic-gate vap->va_size = ip->i_size; 1860*7c478bd9Sstevel@tonic-gate if (vp->v_type == VCHR || vp->v_type == VBLK) 1861*7c478bd9Sstevel@tonic-gate vap->va_rdev = ip->i_rdev; 1862*7c478bd9Sstevel@tonic-gate else 1863*7c478bd9Sstevel@tonic-gate vap->va_rdev = 0; /* not a b/c spec. */ 1864*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 1865*7c478bd9Sstevel@tonic-gate ITIMES_NOLOCK(ip); /* mark correct time in inode */ 1866*7c478bd9Sstevel@tonic-gate vap->va_seq = ip->i_seq; 1867*7c478bd9Sstevel@tonic-gate vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec; 1868*7c478bd9Sstevel@tonic-gate vap->va_atime.tv_nsec = ip->i_atime.tv_usec*1000; 1869*7c478bd9Sstevel@tonic-gate vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec; 1870*7c478bd9Sstevel@tonic-gate vap->va_mtime.tv_nsec = ip->i_mtime.tv_usec*1000; 1871*7c478bd9Sstevel@tonic-gate vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec; 1872*7c478bd9Sstevel@tonic-gate vap->va_ctime.tv_nsec = ip->i_ctime.tv_usec*1000; 1873*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 1874*7c478bd9Sstevel@tonic-gate 1875*7c478bd9Sstevel@tonic-gate switch (ip->i_mode & IFMT) { 1876*7c478bd9Sstevel@tonic-gate 1877*7c478bd9Sstevel@tonic-gate case IFBLK: 1878*7c478bd9Sstevel@tonic-gate vap->va_blksize = MAXBSIZE; /* was BLKDEV_IOSIZE */ 1879*7c478bd9Sstevel@tonic-gate break; 1880*7c478bd9Sstevel@tonic-gate 1881*7c478bd9Sstevel@tonic-gate case IFCHR: 1882*7c478bd9Sstevel@tonic-gate vap->va_blksize = MAXBSIZE; 1883*7c478bd9Sstevel@tonic-gate break; 1884*7c478bd9Sstevel@tonic-gate 1885*7c478bd9Sstevel@tonic-gate default: 1886*7c478bd9Sstevel@tonic-gate vap->va_blksize = ip->i_fs->fs_bsize; 1887*7c478bd9Sstevel@tonic-gate break; 1888*7c478bd9Sstevel@tonic-gate } 1889*7c478bd9Sstevel@tonic-gate vap->va_nblocks = (fsblkcnt64_t)ip->i_blocks; 1890*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 1891*7c478bd9Sstevel@tonic-gate err = 0; 1892*7c478bd9Sstevel@tonic-gate 1893*7c478bd9Sstevel@tonic-gate out: 1894*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_GETATTR_END, "ufs_getattr_end:vp %p", vp); 1895*7c478bd9Sstevel@tonic-gate 1896*7c478bd9Sstevel@tonic-gate return (err); 1897*7c478bd9Sstevel@tonic-gate } 1898*7c478bd9Sstevel@tonic-gate 1899*7c478bd9Sstevel@tonic-gate /*ARGSUSED4*/ 1900*7c478bd9Sstevel@tonic-gate static int 1901*7c478bd9Sstevel@tonic-gate ufs_setattr( 1902*7c478bd9Sstevel@tonic-gate struct vnode *vp, 1903*7c478bd9Sstevel@tonic-gate struct vattr *vap, 1904*7c478bd9Sstevel@tonic-gate int flags, 1905*7c478bd9Sstevel@tonic-gate struct cred *cr, 1906*7c478bd9Sstevel@tonic-gate caller_context_t *ct) 1907*7c478bd9Sstevel@tonic-gate { 1908*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 1909*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 1910*7c478bd9Sstevel@tonic-gate struct fs *fs; 1911*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 1912*7c478bd9Sstevel@tonic-gate char *errmsg1; 1913*7c478bd9Sstevel@tonic-gate char *errmsg2; 1914*7c478bd9Sstevel@tonic-gate long blocks; 1915*7c478bd9Sstevel@tonic-gate long int mask = vap->va_mask; 1916*7c478bd9Sstevel@tonic-gate size_t len1, len2; 1917*7c478bd9Sstevel@tonic-gate int issync; 1918*7c478bd9Sstevel@tonic-gate int trans_size; 1919*7c478bd9Sstevel@tonic-gate int dotrans; 1920*7c478bd9Sstevel@tonic-gate int dorwlock; 1921*7c478bd9Sstevel@tonic-gate int error; 1922*7c478bd9Sstevel@tonic-gate int owner_change; 1923*7c478bd9Sstevel@tonic-gate int dodqlock; 1924*7c478bd9Sstevel@tonic-gate timestruc_t now; 1925*7c478bd9Sstevel@tonic-gate vattr_t oldva; 1926*7c478bd9Sstevel@tonic-gate int retry = 1; 1927*7c478bd9Sstevel@tonic-gate 1928*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_SETATTR_START, 1929*7c478bd9Sstevel@tonic-gate "ufs_setattr_start:vp %p flags %x", vp, flags); 1930*7c478bd9Sstevel@tonic-gate 1931*7c478bd9Sstevel@tonic-gate /* 1932*7c478bd9Sstevel@tonic-gate * Cannot set these attributes. 1933*7c478bd9Sstevel@tonic-gate */ 1934*7c478bd9Sstevel@tonic-gate if (mask & AT_NOSET) { 1935*7c478bd9Sstevel@tonic-gate error = EINVAL; 1936*7c478bd9Sstevel@tonic-gate goto out; 1937*7c478bd9Sstevel@tonic-gate } 1938*7c478bd9Sstevel@tonic-gate 1939*7c478bd9Sstevel@tonic-gate /* 1940*7c478bd9Sstevel@tonic-gate * check for forced unmount 1941*7c478bd9Sstevel@tonic-gate */ 1942*7c478bd9Sstevel@tonic-gate if (ufsvfsp == NULL) 1943*7c478bd9Sstevel@tonic-gate return (EIO); 1944*7c478bd9Sstevel@tonic-gate 1945*7c478bd9Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 1946*7c478bd9Sstevel@tonic-gate if (fs->fs_ronly != 0) 1947*7c478bd9Sstevel@tonic-gate return (EROFS); 1948*7c478bd9Sstevel@tonic-gate 1949*7c478bd9Sstevel@tonic-gate again: 1950*7c478bd9Sstevel@tonic-gate errmsg1 = NULL; 1951*7c478bd9Sstevel@tonic-gate errmsg2 = NULL; 1952*7c478bd9Sstevel@tonic-gate dotrans = 0; 1953*7c478bd9Sstevel@tonic-gate dorwlock = 0; 1954*7c478bd9Sstevel@tonic-gate dodqlock = 0; 1955*7c478bd9Sstevel@tonic-gate 1956*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_SETATTR_MASK); 1957*7c478bd9Sstevel@tonic-gate if (error) 1958*7c478bd9Sstevel@tonic-gate goto out; 1959*7c478bd9Sstevel@tonic-gate 1960*7c478bd9Sstevel@tonic-gate /* 1961*7c478bd9Sstevel@tonic-gate * Acquire i_rwlock before TRANS_BEGIN_CSYNC() if this is a file. 1962*7c478bd9Sstevel@tonic-gate * This follows the protocol for read()/write(). 1963*7c478bd9Sstevel@tonic-gate */ 1964*7c478bd9Sstevel@tonic-gate if (vp->v_type != VDIR) { 1965*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 1966*7c478bd9Sstevel@tonic-gate dorwlock = 1; 1967*7c478bd9Sstevel@tonic-gate } 1968*7c478bd9Sstevel@tonic-gate 1969*7c478bd9Sstevel@tonic-gate /* 1970*7c478bd9Sstevel@tonic-gate * Truncate file. Must have write permission and not be a directory. 1971*7c478bd9Sstevel@tonic-gate */ 1972*7c478bd9Sstevel@tonic-gate if (mask & AT_SIZE) { 1973*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 1974*7c478bd9Sstevel@tonic-gate if (vp->v_type == VDIR) { 1975*7c478bd9Sstevel@tonic-gate error = EISDIR; 1976*7c478bd9Sstevel@tonic-gate goto update_inode; 1977*7c478bd9Sstevel@tonic-gate } 1978*7c478bd9Sstevel@tonic-gate if (error = ufs_iaccess(ip, IWRITE, cr)) 1979*7c478bd9Sstevel@tonic-gate goto update_inode; 1980*7c478bd9Sstevel@tonic-gate 1981*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 1982*7c478bd9Sstevel@tonic-gate error = TRANS_ITRUNC(ip, vap->va_size, 0, cr); 1983*7c478bd9Sstevel@tonic-gate if (error) { 1984*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 1985*7c478bd9Sstevel@tonic-gate goto update_inode; 1986*7c478bd9Sstevel@tonic-gate } 1987*7c478bd9Sstevel@tonic-gate } 1988*7c478bd9Sstevel@tonic-gate 1989*7c478bd9Sstevel@tonic-gate if (ulp) { 1990*7c478bd9Sstevel@tonic-gate trans_size = (int)TOP_SETATTR_SIZE(ip); 1991*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_SETATTR, trans_size); 1992*7c478bd9Sstevel@tonic-gate ++dotrans; 1993*7c478bd9Sstevel@tonic-gate } 1994*7c478bd9Sstevel@tonic-gate 1995*7c478bd9Sstevel@tonic-gate /* 1996*7c478bd9Sstevel@tonic-gate * Acquire i_rwlock after TRANS_BEGIN_CSYNC() if this is a directory. 1997*7c478bd9Sstevel@tonic-gate * This follows the protocol established by 1998*7c478bd9Sstevel@tonic-gate * ufs_link/create/remove/rename/mkdir/rmdir/symlink. 1999*7c478bd9Sstevel@tonic-gate */ 2000*7c478bd9Sstevel@tonic-gate if (vp->v_type == VDIR) { 2001*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 2002*7c478bd9Sstevel@tonic-gate dorwlock = 1; 2003*7c478bd9Sstevel@tonic-gate } 2004*7c478bd9Sstevel@tonic-gate 2005*7c478bd9Sstevel@tonic-gate /* 2006*7c478bd9Sstevel@tonic-gate * Grab quota lock if we are changing the file's owner. 2007*7c478bd9Sstevel@tonic-gate */ 2008*7c478bd9Sstevel@tonic-gate if (mask & AT_UID) { 2009*7c478bd9Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 2010*7c478bd9Sstevel@tonic-gate dodqlock = 1; 2011*7c478bd9Sstevel@tonic-gate } 2012*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 2013*7c478bd9Sstevel@tonic-gate 2014*7c478bd9Sstevel@tonic-gate oldva.va_mode = ip->i_mode; 2015*7c478bd9Sstevel@tonic-gate oldva.va_uid = ip->i_uid; 2016*7c478bd9Sstevel@tonic-gate oldva.va_gid = ip->i_gid; 2017*7c478bd9Sstevel@tonic-gate 2018*7c478bd9Sstevel@tonic-gate vap->va_mask &= ~AT_SIZE; 2019*7c478bd9Sstevel@tonic-gate /* 2020*7c478bd9Sstevel@tonic-gate * ufs_iaccess is "close enough"; that's because it doesn't 2021*7c478bd9Sstevel@tonic-gate * map the defines. 2022*7c478bd9Sstevel@tonic-gate */ 2023*7c478bd9Sstevel@tonic-gate error = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 2024*7c478bd9Sstevel@tonic-gate ufs_iaccess, ip); 2025*7c478bd9Sstevel@tonic-gate if (error) 2026*7c478bd9Sstevel@tonic-gate goto update_inode; 2027*7c478bd9Sstevel@tonic-gate 2028*7c478bd9Sstevel@tonic-gate mask = vap->va_mask; 2029*7c478bd9Sstevel@tonic-gate 2030*7c478bd9Sstevel@tonic-gate /* 2031*7c478bd9Sstevel@tonic-gate * Change file access modes. 2032*7c478bd9Sstevel@tonic-gate */ 2033*7c478bd9Sstevel@tonic-gate if (mask & AT_MODE) { 2034*7c478bd9Sstevel@tonic-gate ip->i_mode = (ip->i_mode & IFMT) | (vap->va_mode & ~IFMT); 2035*7c478bd9Sstevel@tonic-gate TRANS_INODE(ufsvfsp, ip); 2036*7c478bd9Sstevel@tonic-gate ip->i_flag |= ICHG; 2037*7c478bd9Sstevel@tonic-gate if (stickyhack) { 2038*7c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 2039*7c478bd9Sstevel@tonic-gate if ((ip->i_mode & (ISVTX | IEXEC | IFDIR)) == ISVTX) 2040*7c478bd9Sstevel@tonic-gate vp->v_flag |= VSWAPLIKE; 2041*7c478bd9Sstevel@tonic-gate else 2042*7c478bd9Sstevel@tonic-gate vp->v_flag &= ~VSWAPLIKE; 2043*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 2044*7c478bd9Sstevel@tonic-gate } 2045*7c478bd9Sstevel@tonic-gate } 2046*7c478bd9Sstevel@tonic-gate if (mask & (AT_UID|AT_GID)) { 2047*7c478bd9Sstevel@tonic-gate if (mask & AT_UID) { 2048*7c478bd9Sstevel@tonic-gate /* 2049*7c478bd9Sstevel@tonic-gate * Don't change ownership of the quota inode. 2050*7c478bd9Sstevel@tonic-gate */ 2051*7c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_qinod == ip) { 2052*7c478bd9Sstevel@tonic-gate ASSERT(ufsvfsp->vfs_qflags & MQ_ENABLED); 2053*7c478bd9Sstevel@tonic-gate error = EINVAL; 2054*7c478bd9Sstevel@tonic-gate goto update_inode; 2055*7c478bd9Sstevel@tonic-gate } 2056*7c478bd9Sstevel@tonic-gate 2057*7c478bd9Sstevel@tonic-gate /* 2058*7c478bd9Sstevel@tonic-gate * No real ownership change. 2059*7c478bd9Sstevel@tonic-gate */ 2060*7c478bd9Sstevel@tonic-gate if (ip->i_uid == vap->va_uid) { 2061*7c478bd9Sstevel@tonic-gate blocks = 0; 2062*7c478bd9Sstevel@tonic-gate owner_change = 0; 2063*7c478bd9Sstevel@tonic-gate } 2064*7c478bd9Sstevel@tonic-gate /* 2065*7c478bd9Sstevel@tonic-gate * Remove the blocks and the file, from the old user's 2066*7c478bd9Sstevel@tonic-gate * quota. 2067*7c478bd9Sstevel@tonic-gate */ 2068*7c478bd9Sstevel@tonic-gate else { 2069*7c478bd9Sstevel@tonic-gate blocks = ip->i_blocks; 2070*7c478bd9Sstevel@tonic-gate owner_change = 1; 2071*7c478bd9Sstevel@tonic-gate 2072*7c478bd9Sstevel@tonic-gate (void) chkdq(ip, -blocks, /* force */ 1, cr, 2073*7c478bd9Sstevel@tonic-gate (char **)NULL, (size_t *)NULL); 2074*7c478bd9Sstevel@tonic-gate (void) chkiq(ufsvfsp, /* change */ -1, ip, 2075*7c478bd9Sstevel@tonic-gate (uid_t)ip->i_uid, 2076*7c478bd9Sstevel@tonic-gate /* force */ 1, cr, 2077*7c478bd9Sstevel@tonic-gate (char **)NULL, (size_t *)NULL); 2078*7c478bd9Sstevel@tonic-gate dqrele(ip->i_dquot); 2079*7c478bd9Sstevel@tonic-gate } 2080*7c478bd9Sstevel@tonic-gate 2081*7c478bd9Sstevel@tonic-gate ip->i_uid = vap->va_uid; 2082*7c478bd9Sstevel@tonic-gate 2083*7c478bd9Sstevel@tonic-gate /* 2084*7c478bd9Sstevel@tonic-gate * There is a real ownership change. 2085*7c478bd9Sstevel@tonic-gate */ 2086*7c478bd9Sstevel@tonic-gate if (owner_change) { 2087*7c478bd9Sstevel@tonic-gate /* 2088*7c478bd9Sstevel@tonic-gate * Add the blocks and the file to the new 2089*7c478bd9Sstevel@tonic-gate * user's quota. 2090*7c478bd9Sstevel@tonic-gate */ 2091*7c478bd9Sstevel@tonic-gate ip->i_dquot = getinoquota(ip); 2092*7c478bd9Sstevel@tonic-gate (void) chkdq(ip, blocks, /* force */ 1, cr, 2093*7c478bd9Sstevel@tonic-gate &errmsg1, &len1); 2094*7c478bd9Sstevel@tonic-gate (void) chkiq(ufsvfsp, /* change */ 1, 2095*7c478bd9Sstevel@tonic-gate (struct inode *)NULL, 2096*7c478bd9Sstevel@tonic-gate (uid_t)ip->i_uid, 2097*7c478bd9Sstevel@tonic-gate /* force */ 1, cr, 2098*7c478bd9Sstevel@tonic-gate &errmsg2, &len2); 2099*7c478bd9Sstevel@tonic-gate } 2100*7c478bd9Sstevel@tonic-gate } 2101*7c478bd9Sstevel@tonic-gate if (mask & AT_GID) { 2102*7c478bd9Sstevel@tonic-gate ip->i_gid = vap->va_gid; 2103*7c478bd9Sstevel@tonic-gate } 2104*7c478bd9Sstevel@tonic-gate TRANS_INODE(ufsvfsp, ip); 2105*7c478bd9Sstevel@tonic-gate ip->i_flag |= ICHG; 2106*7c478bd9Sstevel@tonic-gate } 2107*7c478bd9Sstevel@tonic-gate /* 2108*7c478bd9Sstevel@tonic-gate * Change file access or modified times. 2109*7c478bd9Sstevel@tonic-gate */ 2110*7c478bd9Sstevel@tonic-gate if (mask & (AT_ATIME|AT_MTIME)) { 2111*7c478bd9Sstevel@tonic-gate /* Check that the time value is within ufs range */ 2112*7c478bd9Sstevel@tonic-gate if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2113*7c478bd9Sstevel@tonic-gate ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2114*7c478bd9Sstevel@tonic-gate error = EOVERFLOW; 2115*7c478bd9Sstevel@tonic-gate goto update_inode; 2116*7c478bd9Sstevel@tonic-gate } 2117*7c478bd9Sstevel@tonic-gate 2118*7c478bd9Sstevel@tonic-gate /* 2119*7c478bd9Sstevel@tonic-gate * if the "noaccess" mount option is set and only atime 2120*7c478bd9Sstevel@tonic-gate * update is requested, do nothing. No error is returned. 2121*7c478bd9Sstevel@tonic-gate */ 2122*7c478bd9Sstevel@tonic-gate if ((ufsvfsp->vfs_noatime) && 2123*7c478bd9Sstevel@tonic-gate ((mask & (AT_ATIME|AT_MTIME)) == AT_ATIME)) 2124*7c478bd9Sstevel@tonic-gate goto skip_atime; 2125*7c478bd9Sstevel@tonic-gate 2126*7c478bd9Sstevel@tonic-gate if (mask & AT_ATIME) { 2127*7c478bd9Sstevel@tonic-gate ip->i_atime.tv_sec = vap->va_atime.tv_sec; 2128*7c478bd9Sstevel@tonic-gate ip->i_atime.tv_usec = vap->va_atime.tv_nsec / 1000; 2129*7c478bd9Sstevel@tonic-gate ip->i_flag &= ~IACC; 2130*7c478bd9Sstevel@tonic-gate } 2131*7c478bd9Sstevel@tonic-gate if (mask & AT_MTIME) { 2132*7c478bd9Sstevel@tonic-gate ip->i_mtime.tv_sec = vap->va_mtime.tv_sec; 2133*7c478bd9Sstevel@tonic-gate ip->i_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000; 2134*7c478bd9Sstevel@tonic-gate gethrestime(&now); 2135*7c478bd9Sstevel@tonic-gate if (now.tv_sec > TIME32_MAX) { 2136*7c478bd9Sstevel@tonic-gate /* 2137*7c478bd9Sstevel@tonic-gate * In 2038, ctime sticks forever.. 2138*7c478bd9Sstevel@tonic-gate */ 2139*7c478bd9Sstevel@tonic-gate ip->i_ctime.tv_sec = TIME32_MAX; 2140*7c478bd9Sstevel@tonic-gate ip->i_ctime.tv_usec = 0; 2141*7c478bd9Sstevel@tonic-gate } else { 2142*7c478bd9Sstevel@tonic-gate ip->i_ctime.tv_sec = now.tv_sec; 2143*7c478bd9Sstevel@tonic-gate ip->i_ctime.tv_usec = now.tv_nsec / 1000; 2144*7c478bd9Sstevel@tonic-gate } 2145*7c478bd9Sstevel@tonic-gate ip->i_flag &= ~(IUPD|ICHG); 2146*7c478bd9Sstevel@tonic-gate ip->i_flag |= IMODTIME; 2147*7c478bd9Sstevel@tonic-gate } 2148*7c478bd9Sstevel@tonic-gate TRANS_INODE(ufsvfsp, ip); 2149*7c478bd9Sstevel@tonic-gate ip->i_flag |= IMOD; 2150*7c478bd9Sstevel@tonic-gate } 2151*7c478bd9Sstevel@tonic-gate 2152*7c478bd9Sstevel@tonic-gate skip_atime: 2153*7c478bd9Sstevel@tonic-gate /* 2154*7c478bd9Sstevel@tonic-gate * The presence of a shadow inode may indicate an ACL, but does 2155*7c478bd9Sstevel@tonic-gate * not imply an ACL. Future FSD types should be handled here too 2156*7c478bd9Sstevel@tonic-gate * and check for the presence of the attribute-specific data 2157*7c478bd9Sstevel@tonic-gate * before referencing it. 2158*7c478bd9Sstevel@tonic-gate */ 2159*7c478bd9Sstevel@tonic-gate if (ip->i_shadow) { 2160*7c478bd9Sstevel@tonic-gate /* 2161*7c478bd9Sstevel@tonic-gate * XXX if ufs_iupdat is changed to sandbagged write fix 2162*7c478bd9Sstevel@tonic-gate * ufs_acl_setattr to push ip to keep acls consistent 2163*7c478bd9Sstevel@tonic-gate * 2164*7c478bd9Sstevel@tonic-gate * Suppress out of inodes messages if we will retry. 2165*7c478bd9Sstevel@tonic-gate */ 2166*7c478bd9Sstevel@tonic-gate if (retry) 2167*7c478bd9Sstevel@tonic-gate ip->i_flag |= IQUIET; 2168*7c478bd9Sstevel@tonic-gate error = ufs_acl_setattr(ip, vap, cr); 2169*7c478bd9Sstevel@tonic-gate ip->i_flag &= ~IQUIET; 2170*7c478bd9Sstevel@tonic-gate } 2171*7c478bd9Sstevel@tonic-gate 2172*7c478bd9Sstevel@tonic-gate update_inode: 2173*7c478bd9Sstevel@tonic-gate /* 2174*7c478bd9Sstevel@tonic-gate * Setattr always increases the sequence number 2175*7c478bd9Sstevel@tonic-gate */ 2176*7c478bd9Sstevel@tonic-gate ip->i_seq++; 2177*7c478bd9Sstevel@tonic-gate 2178*7c478bd9Sstevel@tonic-gate /* 2179*7c478bd9Sstevel@tonic-gate * if nfsd and not logging; push synchronously 2180*7c478bd9Sstevel@tonic-gate */ 2181*7c478bd9Sstevel@tonic-gate if ((curthread->t_flag & T_DONTPEND) && !TRANS_ISTRANS(ufsvfsp)) { 2182*7c478bd9Sstevel@tonic-gate ufs_iupdat(ip, 1); 2183*7c478bd9Sstevel@tonic-gate } else { 2184*7c478bd9Sstevel@tonic-gate ITIMES_NOLOCK(ip); 2185*7c478bd9Sstevel@tonic-gate } 2186*7c478bd9Sstevel@tonic-gate 2187*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 2188*7c478bd9Sstevel@tonic-gate if (dodqlock) { 2189*7c478bd9Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 2190*7c478bd9Sstevel@tonic-gate } 2191*7c478bd9Sstevel@tonic-gate if (dorwlock) 2192*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 2193*7c478bd9Sstevel@tonic-gate 2194*7c478bd9Sstevel@tonic-gate if (ulp) { 2195*7c478bd9Sstevel@tonic-gate if (dotrans) { 2196*7c478bd9Sstevel@tonic-gate int terr = 0; 2197*7c478bd9Sstevel@tonic-gate TRANS_END_CSYNC(ufsvfsp, terr, issync, TOP_SETATTR, 2198*7c478bd9Sstevel@tonic-gate trans_size); 2199*7c478bd9Sstevel@tonic-gate if (error == 0) 2200*7c478bd9Sstevel@tonic-gate error = terr; 2201*7c478bd9Sstevel@tonic-gate } 2202*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 2203*7c478bd9Sstevel@tonic-gate } 2204*7c478bd9Sstevel@tonic-gate out: 2205*7c478bd9Sstevel@tonic-gate /* 2206*7c478bd9Sstevel@tonic-gate * If out of inodes or blocks, see if we can free something 2207*7c478bd9Sstevel@tonic-gate * up from the delete queue. 2208*7c478bd9Sstevel@tonic-gate */ 2209*7c478bd9Sstevel@tonic-gate if ((error == ENOSPC) && retry && TRANS_ISTRANS(ufsvfsp)) { 2210*7c478bd9Sstevel@tonic-gate ufs_delete_drain_wait(ufsvfsp, 1); 2211*7c478bd9Sstevel@tonic-gate retry = 0; 2212*7c478bd9Sstevel@tonic-gate if (errmsg1 != NULL) 2213*7c478bd9Sstevel@tonic-gate kmem_free(errmsg1, len1); 2214*7c478bd9Sstevel@tonic-gate if (errmsg2 != NULL) 2215*7c478bd9Sstevel@tonic-gate kmem_free(errmsg2, len2); 2216*7c478bd9Sstevel@tonic-gate goto again; 2217*7c478bd9Sstevel@tonic-gate } 2218*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_SETATTR_END, 2219*7c478bd9Sstevel@tonic-gate "ufs_setattr_end:vp %p error %d", vp, error); 2220*7c478bd9Sstevel@tonic-gate if (errmsg1 != NULL) { 2221*7c478bd9Sstevel@tonic-gate uprintf(errmsg1); 2222*7c478bd9Sstevel@tonic-gate kmem_free(errmsg1, len1); 2223*7c478bd9Sstevel@tonic-gate } 2224*7c478bd9Sstevel@tonic-gate if (errmsg2 != NULL) { 2225*7c478bd9Sstevel@tonic-gate uprintf(errmsg2); 2226*7c478bd9Sstevel@tonic-gate kmem_free(errmsg2, len2); 2227*7c478bd9Sstevel@tonic-gate } 2228*7c478bd9Sstevel@tonic-gate return (error); 2229*7c478bd9Sstevel@tonic-gate } 2230*7c478bd9Sstevel@tonic-gate 2231*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 2232*7c478bd9Sstevel@tonic-gate static int 2233*7c478bd9Sstevel@tonic-gate ufs_access(struct vnode *vp, int mode, int flags, struct cred *cr) 2234*7c478bd9Sstevel@tonic-gate { 2235*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 2236*7c478bd9Sstevel@tonic-gate int error; 2237*7c478bd9Sstevel@tonic-gate 2238*7c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_UFS, TR_UFS_ACCESS_START, 2239*7c478bd9Sstevel@tonic-gate "ufs_access_start:vp %p mode %x flags %x", vp, mode, flags); 2240*7c478bd9Sstevel@tonic-gate 2241*7c478bd9Sstevel@tonic-gate if (ip->i_ufsvfs == NULL) 2242*7c478bd9Sstevel@tonic-gate return (EIO); 2243*7c478bd9Sstevel@tonic-gate 2244*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 2245*7c478bd9Sstevel@tonic-gate 2246*7c478bd9Sstevel@tonic-gate /* 2247*7c478bd9Sstevel@tonic-gate * The ufs_iaccess function wants to be called with 2248*7c478bd9Sstevel@tonic-gate * mode bits expressed as "ufs specific" bits. 2249*7c478bd9Sstevel@tonic-gate * I.e., VWRITE|VREAD|VEXEC do not make sense to 2250*7c478bd9Sstevel@tonic-gate * ufs_iaccess() but IWRITE|IREAD|IEXEC do. 2251*7c478bd9Sstevel@tonic-gate * But since they're the same we just pass the vnode mode 2252*7c478bd9Sstevel@tonic-gate * bit but just verify that assumption at compile time. 2253*7c478bd9Sstevel@tonic-gate */ 2254*7c478bd9Sstevel@tonic-gate #if IWRITE != VWRITE || IREAD != VREAD || IEXEC != VEXEC 2255*7c478bd9Sstevel@tonic-gate #error "ufs_access needs to map Vmodes to Imodes" 2256*7c478bd9Sstevel@tonic-gate #endif 2257*7c478bd9Sstevel@tonic-gate error = ufs_iaccess(ip, mode, cr); 2258*7c478bd9Sstevel@tonic-gate 2259*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 2260*7c478bd9Sstevel@tonic-gate 2261*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_ACCESS_END, 2262*7c478bd9Sstevel@tonic-gate "ufs_access_end:vp %p error %d", vp, error); 2263*7c478bd9Sstevel@tonic-gate return (error); 2264*7c478bd9Sstevel@tonic-gate } 2265*7c478bd9Sstevel@tonic-gate 2266*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 2267*7c478bd9Sstevel@tonic-gate static int 2268*7c478bd9Sstevel@tonic-gate ufs_readlink(struct vnode *vp, struct uio *uiop, struct cred *cr) 2269*7c478bd9Sstevel@tonic-gate { 2270*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 2271*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 2272*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 2273*7c478bd9Sstevel@tonic-gate int error; 2274*7c478bd9Sstevel@tonic-gate int fastsymlink; 2275*7c478bd9Sstevel@tonic-gate 2276*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_READLINK_START, 2277*7c478bd9Sstevel@tonic-gate "ufs_readlink_start:vp %p uiop %p", uiop, vp); 2278*7c478bd9Sstevel@tonic-gate 2279*7c478bd9Sstevel@tonic-gate if (vp->v_type != VLNK) { 2280*7c478bd9Sstevel@tonic-gate error = EINVAL; 2281*7c478bd9Sstevel@tonic-gate goto nolockout; 2282*7c478bd9Sstevel@tonic-gate } 2283*7c478bd9Sstevel@tonic-gate 2284*7c478bd9Sstevel@tonic-gate /* 2285*7c478bd9Sstevel@tonic-gate * If the symbolic link is empty there is nothing to read. 2286*7c478bd9Sstevel@tonic-gate * Fast-track these empty symbolic links 2287*7c478bd9Sstevel@tonic-gate */ 2288*7c478bd9Sstevel@tonic-gate if (ip->i_size == 0) { 2289*7c478bd9Sstevel@tonic-gate error = 0; 2290*7c478bd9Sstevel@tonic-gate goto nolockout; 2291*7c478bd9Sstevel@tonic-gate } 2292*7c478bd9Sstevel@tonic-gate 2293*7c478bd9Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 2294*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_READLINK_MASK); 2295*7c478bd9Sstevel@tonic-gate if (error) 2296*7c478bd9Sstevel@tonic-gate goto nolockout; 2297*7c478bd9Sstevel@tonic-gate /* 2298*7c478bd9Sstevel@tonic-gate * The ip->i_rwlock protects the data blocks used for FASTSYMLINK 2299*7c478bd9Sstevel@tonic-gate */ 2300*7c478bd9Sstevel@tonic-gate again: 2301*7c478bd9Sstevel@tonic-gate fastsymlink = 0; 2302*7c478bd9Sstevel@tonic-gate if (ip->i_flag & IFASTSYMLNK) { 2303*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_READER); 2304*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 2305*7c478bd9Sstevel@tonic-gate if (ip->i_flag & IFASTSYMLNK) { 2306*7c478bd9Sstevel@tonic-gate if (!ULOCKFS_IS_NOIACC(ITOUL(ip)) && 2307*7c478bd9Sstevel@tonic-gate (ip->i_fs->fs_ronly == 0) && 2308*7c478bd9Sstevel@tonic-gate (!ufsvfsp->vfs_noatime)) { 2309*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 2310*7c478bd9Sstevel@tonic-gate ip->i_flag |= IACC; 2311*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 2312*7c478bd9Sstevel@tonic-gate } 2313*7c478bd9Sstevel@tonic-gate error = uiomove((caddr_t)&ip->i_db[1], 2314*7c478bd9Sstevel@tonic-gate MIN(ip->i_size, uiop->uio_resid), 2315*7c478bd9Sstevel@tonic-gate UIO_READ, uiop); 2316*7c478bd9Sstevel@tonic-gate ITIMES(ip); 2317*7c478bd9Sstevel@tonic-gate ++fastsymlink; 2318*7c478bd9Sstevel@tonic-gate } 2319*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 2320*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 2321*7c478bd9Sstevel@tonic-gate } 2322*7c478bd9Sstevel@tonic-gate if (!fastsymlink) { 2323*7c478bd9Sstevel@tonic-gate ssize_t size; /* number of bytes read */ 2324*7c478bd9Sstevel@tonic-gate caddr_t basep; /* pointer to input data */ 2325*7c478bd9Sstevel@tonic-gate ino_t ino; 2326*7c478bd9Sstevel@tonic-gate long igen; 2327*7c478bd9Sstevel@tonic-gate struct uio tuio; /* temp uio struct */ 2328*7c478bd9Sstevel@tonic-gate struct uio *tuiop; 2329*7c478bd9Sstevel@tonic-gate iovec_t tiov; /* temp iovec struct */ 2330*7c478bd9Sstevel@tonic-gate char kbuf[FSL_SIZE]; /* buffer to hold fast symlink */ 2331*7c478bd9Sstevel@tonic-gate int tflag = 0; /* flag to indicate temp vars used */ 2332*7c478bd9Sstevel@tonic-gate 2333*7c478bd9Sstevel@tonic-gate ino = ip->i_number; 2334*7c478bd9Sstevel@tonic-gate igen = ip->i_gen; 2335*7c478bd9Sstevel@tonic-gate size = uiop->uio_resid; 2336*7c478bd9Sstevel@tonic-gate basep = uiop->uio_iov->iov_base; 2337*7c478bd9Sstevel@tonic-gate tuiop = uiop; 2338*7c478bd9Sstevel@tonic-gate 2339*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 2340*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 2341*7c478bd9Sstevel@tonic-gate if (ip->i_flag & IFASTSYMLNK) { 2342*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 2343*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 2344*7c478bd9Sstevel@tonic-gate goto again; 2345*7c478bd9Sstevel@tonic-gate } 2346*7c478bd9Sstevel@tonic-gate 2347*7c478bd9Sstevel@tonic-gate /* can this be a fast symlink and is it a user buffer? */ 2348*7c478bd9Sstevel@tonic-gate if (ip->i_size <= FSL_SIZE && 2349*7c478bd9Sstevel@tonic-gate (uiop->uio_segflg == UIO_USERSPACE || 2350*7c478bd9Sstevel@tonic-gate uiop->uio_segflg == UIO_USERISPACE)) { 2351*7c478bd9Sstevel@tonic-gate 2352*7c478bd9Sstevel@tonic-gate bzero(&tuio, sizeof (struct uio)); 2353*7c478bd9Sstevel@tonic-gate /* 2354*7c478bd9Sstevel@tonic-gate * setup a kernel buffer to read link into. this 2355*7c478bd9Sstevel@tonic-gate * is to fix a race condition where the user buffer 2356*7c478bd9Sstevel@tonic-gate * got corrupted before copying it into the inode. 2357*7c478bd9Sstevel@tonic-gate */ 2358*7c478bd9Sstevel@tonic-gate size = ip->i_size; 2359*7c478bd9Sstevel@tonic-gate tiov.iov_len = size; 2360*7c478bd9Sstevel@tonic-gate tiov.iov_base = kbuf; 2361*7c478bd9Sstevel@tonic-gate tuio.uio_iov = &tiov; 2362*7c478bd9Sstevel@tonic-gate tuio.uio_iovcnt = 1; 2363*7c478bd9Sstevel@tonic-gate tuio.uio_offset = uiop->uio_offset; 2364*7c478bd9Sstevel@tonic-gate tuio.uio_segflg = UIO_SYSSPACE; 2365*7c478bd9Sstevel@tonic-gate tuio.uio_fmode = uiop->uio_fmode; 2366*7c478bd9Sstevel@tonic-gate tuio.uio_extflg = uiop->uio_extflg; 2367*7c478bd9Sstevel@tonic-gate tuio.uio_limit = uiop->uio_limit; 2368*7c478bd9Sstevel@tonic-gate tuio.uio_resid = size; 2369*7c478bd9Sstevel@tonic-gate 2370*7c478bd9Sstevel@tonic-gate basep = tuio.uio_iov->iov_base; 2371*7c478bd9Sstevel@tonic-gate tuiop = &tuio; 2372*7c478bd9Sstevel@tonic-gate tflag = 1; 2373*7c478bd9Sstevel@tonic-gate } 2374*7c478bd9Sstevel@tonic-gate 2375*7c478bd9Sstevel@tonic-gate error = rdip(ip, tuiop, 0, cr); 2376*7c478bd9Sstevel@tonic-gate if (!(error == 0 && ip->i_number == ino && ip->i_gen == igen)) { 2377*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 2378*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 2379*7c478bd9Sstevel@tonic-gate goto out; 2380*7c478bd9Sstevel@tonic-gate } 2381*7c478bd9Sstevel@tonic-gate 2382*7c478bd9Sstevel@tonic-gate if (tflag == 0) 2383*7c478bd9Sstevel@tonic-gate size -= uiop->uio_resid; 2384*7c478bd9Sstevel@tonic-gate 2385*7c478bd9Sstevel@tonic-gate if ((tflag == 0 && ip->i_size <= FSL_SIZE && 2386*7c478bd9Sstevel@tonic-gate ip->i_size == size) || (tflag == 1 && 2387*7c478bd9Sstevel@tonic-gate tuio.uio_resid == 0)) { 2388*7c478bd9Sstevel@tonic-gate error = kcopy(basep, &ip->i_db[1], ip->i_size); 2389*7c478bd9Sstevel@tonic-gate if (error == 0) { 2390*7c478bd9Sstevel@tonic-gate ip->i_flag |= IFASTSYMLNK; 2391*7c478bd9Sstevel@tonic-gate /* 2392*7c478bd9Sstevel@tonic-gate * free page 2393*7c478bd9Sstevel@tonic-gate */ 2394*7c478bd9Sstevel@tonic-gate (void) VOP_PUTPAGE(ITOV(ip), 2395*7c478bd9Sstevel@tonic-gate (offset_t)0, PAGESIZE, 2396*7c478bd9Sstevel@tonic-gate (B_DONTNEED | B_FREE | B_FORCE | B_ASYNC), 2397*7c478bd9Sstevel@tonic-gate cr); 2398*7c478bd9Sstevel@tonic-gate } else { 2399*7c478bd9Sstevel@tonic-gate int i; 2400*7c478bd9Sstevel@tonic-gate /* error, clear garbage left behind */ 2401*7c478bd9Sstevel@tonic-gate for (i = 1; i < NDADDR; i++) 2402*7c478bd9Sstevel@tonic-gate ip->i_db[i] = 0; 2403*7c478bd9Sstevel@tonic-gate for (i = 0; i < NIADDR; i++) 2404*7c478bd9Sstevel@tonic-gate ip->i_ib[i] = 0; 2405*7c478bd9Sstevel@tonic-gate } 2406*7c478bd9Sstevel@tonic-gate } 2407*7c478bd9Sstevel@tonic-gate if (tflag == 1) { 2408*7c478bd9Sstevel@tonic-gate /* now, copy it into the user buffer */ 2409*7c478bd9Sstevel@tonic-gate error = uiomove((caddr_t)kbuf, 2410*7c478bd9Sstevel@tonic-gate MIN(size, uiop->uio_resid), 2411*7c478bd9Sstevel@tonic-gate UIO_READ, uiop); 2412*7c478bd9Sstevel@tonic-gate } 2413*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 2414*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 2415*7c478bd9Sstevel@tonic-gate } 2416*7c478bd9Sstevel@tonic-gate out: 2417*7c478bd9Sstevel@tonic-gate if (ulp) { 2418*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 2419*7c478bd9Sstevel@tonic-gate } 2420*7c478bd9Sstevel@tonic-gate nolockout: 2421*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_READLINK_END, 2422*7c478bd9Sstevel@tonic-gate "ufs_readlink_end:vp %p error %d", vp, error); 2423*7c478bd9Sstevel@tonic-gate 2424*7c478bd9Sstevel@tonic-gate return (error); 2425*7c478bd9Sstevel@tonic-gate } 2426*7c478bd9Sstevel@tonic-gate 2427*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 2428*7c478bd9Sstevel@tonic-gate static int 2429*7c478bd9Sstevel@tonic-gate ufs_fsync(struct vnode *vp, int syncflag, struct cred *cr) 2430*7c478bd9Sstevel@tonic-gate { 2431*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 2432*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 2433*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 2434*7c478bd9Sstevel@tonic-gate int error; 2435*7c478bd9Sstevel@tonic-gate 2436*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_FSYNC_START, 2437*7c478bd9Sstevel@tonic-gate "ufs_fsync_start:vp %p", vp); 2438*7c478bd9Sstevel@tonic-gate 2439*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_FSYNC_MASK); 2440*7c478bd9Sstevel@tonic-gate if (error) 2441*7c478bd9Sstevel@tonic-gate return (error); 2442*7c478bd9Sstevel@tonic-gate 2443*7c478bd9Sstevel@tonic-gate if (TRANS_ISTRANS(ufsvfsp)) { 2444*7c478bd9Sstevel@tonic-gate /* 2445*7c478bd9Sstevel@tonic-gate * First push out any data pages 2446*7c478bd9Sstevel@tonic-gate */ 2447*7c478bd9Sstevel@tonic-gate if (vn_has_cached_data(vp) && !(syncflag & FNODSYNC) && 2448*7c478bd9Sstevel@tonic-gate (vp->v_type != VCHR) && !(IS_SWAPVP(vp))) { 2449*7c478bd9Sstevel@tonic-gate error = VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, 2450*7c478bd9Sstevel@tonic-gate 0, CRED()); 2451*7c478bd9Sstevel@tonic-gate if (error) 2452*7c478bd9Sstevel@tonic-gate goto out; 2453*7c478bd9Sstevel@tonic-gate } 2454*7c478bd9Sstevel@tonic-gate 2455*7c478bd9Sstevel@tonic-gate /* 2456*7c478bd9Sstevel@tonic-gate * Delta any delayed inode times updates 2457*7c478bd9Sstevel@tonic-gate * and push inode to log. 2458*7c478bd9Sstevel@tonic-gate * All other inode deltas will have already been delta'd 2459*7c478bd9Sstevel@tonic-gate * and will be pushed during the commit. 2460*7c478bd9Sstevel@tonic-gate */ 2461*7c478bd9Sstevel@tonic-gate if (!(syncflag & FDSYNC) && 2462*7c478bd9Sstevel@tonic-gate ((ip->i_flag & (IMOD|IMODACC)) == IMODACC)) { 2463*7c478bd9Sstevel@tonic-gate if (ulp) { 2464*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, TOP_FSYNC, 2465*7c478bd9Sstevel@tonic-gate TOP_SYNCIP_SIZE); 2466*7c478bd9Sstevel@tonic-gate } 2467*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 2468*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 2469*7c478bd9Sstevel@tonic-gate ip->i_flag &= ~IMODTIME; 2470*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 2471*7c478bd9Sstevel@tonic-gate ufs_iupdat(ip, I_SYNC); 2472*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 2473*7c478bd9Sstevel@tonic-gate if (ulp) { 2474*7c478bd9Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, TOP_FSYNC, 2475*7c478bd9Sstevel@tonic-gate TOP_SYNCIP_SIZE); 2476*7c478bd9Sstevel@tonic-gate } 2477*7c478bd9Sstevel@tonic-gate } 2478*7c478bd9Sstevel@tonic-gate 2479*7c478bd9Sstevel@tonic-gate /* 2480*7c478bd9Sstevel@tonic-gate * Commit the Moby transaction 2481*7c478bd9Sstevel@tonic-gate * 2482*7c478bd9Sstevel@tonic-gate * Deltas have already been made so we just need to 2483*7c478bd9Sstevel@tonic-gate * commit them with a synchronous transaction. 2484*7c478bd9Sstevel@tonic-gate * TRANS_BEGIN_SYNC() will return an error 2485*7c478bd9Sstevel@tonic-gate * if there are no deltas to commit, for an 2486*7c478bd9Sstevel@tonic-gate * empty transaction. 2487*7c478bd9Sstevel@tonic-gate */ 2488*7c478bd9Sstevel@tonic-gate if (ulp) { 2489*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_SYNC(ufsvfsp, TOP_FSYNC, TOP_COMMIT_SIZE, 2490*7c478bd9Sstevel@tonic-gate error); 2491*7c478bd9Sstevel@tonic-gate if (error) { 2492*7c478bd9Sstevel@tonic-gate error = 0; /* commit wasn't needed */ 2493*7c478bd9Sstevel@tonic-gate goto out; 2494*7c478bd9Sstevel@tonic-gate } 2495*7c478bd9Sstevel@tonic-gate TRANS_END_SYNC(ufsvfsp, error, TOP_FSYNC, 2496*7c478bd9Sstevel@tonic-gate TOP_COMMIT_SIZE); 2497*7c478bd9Sstevel@tonic-gate } 2498*7c478bd9Sstevel@tonic-gate } else { /* not logging */ 2499*7c478bd9Sstevel@tonic-gate if (!(IS_SWAPVP(vp))) 2500*7c478bd9Sstevel@tonic-gate if (syncflag & FNODSYNC) { 2501*7c478bd9Sstevel@tonic-gate /* Just update the inode only */ 2502*7c478bd9Sstevel@tonic-gate TRANS_IUPDAT(ip, 1); 2503*7c478bd9Sstevel@tonic-gate error = 0; 2504*7c478bd9Sstevel@tonic-gate } else if (syncflag & FDSYNC) 2505*7c478bd9Sstevel@tonic-gate /* Do data-synchronous writes */ 2506*7c478bd9Sstevel@tonic-gate error = TRANS_SYNCIP(ip, 0, I_DSYNC, TOP_FSYNC); 2507*7c478bd9Sstevel@tonic-gate else 2508*7c478bd9Sstevel@tonic-gate /* Do synchronous writes */ 2509*7c478bd9Sstevel@tonic-gate error = TRANS_SYNCIP(ip, 0, I_SYNC, TOP_FSYNC); 2510*7c478bd9Sstevel@tonic-gate 2511*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 2512*7c478bd9Sstevel@tonic-gate if (!error) 2513*7c478bd9Sstevel@tonic-gate error = ufs_sync_indir(ip); 2514*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 2515*7c478bd9Sstevel@tonic-gate } 2516*7c478bd9Sstevel@tonic-gate out: 2517*7c478bd9Sstevel@tonic-gate if (ulp) { 2518*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 2519*7c478bd9Sstevel@tonic-gate } 2520*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_FSYNC_END, 2521*7c478bd9Sstevel@tonic-gate "ufs_fsync_end:vp %p error %d", vp, error); 2522*7c478bd9Sstevel@tonic-gate return (error); 2523*7c478bd9Sstevel@tonic-gate } 2524*7c478bd9Sstevel@tonic-gate 2525*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 2526*7c478bd9Sstevel@tonic-gate static void 2527*7c478bd9Sstevel@tonic-gate ufs_inactive(struct vnode *vp, struct cred *cr) 2528*7c478bd9Sstevel@tonic-gate { 2529*7c478bd9Sstevel@tonic-gate ufs_iinactive(VTOI(vp)); 2530*7c478bd9Sstevel@tonic-gate } 2531*7c478bd9Sstevel@tonic-gate 2532*7c478bd9Sstevel@tonic-gate /* 2533*7c478bd9Sstevel@tonic-gate * Unix file system operations having to do with directory manipulation. 2534*7c478bd9Sstevel@tonic-gate */ 2535*7c478bd9Sstevel@tonic-gate int ufs_lookup_idle_count = 2; /* Number of inodes to idle each time */ 2536*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 2537*7c478bd9Sstevel@tonic-gate static int 2538*7c478bd9Sstevel@tonic-gate ufs_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, 2539*7c478bd9Sstevel@tonic-gate struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cr) 2540*7c478bd9Sstevel@tonic-gate { 2541*7c478bd9Sstevel@tonic-gate struct inode *ip; 2542*7c478bd9Sstevel@tonic-gate struct inode *sip; 2543*7c478bd9Sstevel@tonic-gate struct inode *xip; 2544*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 2545*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 2546*7c478bd9Sstevel@tonic-gate struct vnode *vp; 2547*7c478bd9Sstevel@tonic-gate int error; 2548*7c478bd9Sstevel@tonic-gate 2549*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_LOOKUP_START, 2550*7c478bd9Sstevel@tonic-gate "ufs_lookup_start:dvp %p name %s", dvp, nm); 2551*7c478bd9Sstevel@tonic-gate 2552*7c478bd9Sstevel@tonic-gate 2553*7c478bd9Sstevel@tonic-gate /* 2554*7c478bd9Sstevel@tonic-gate * Check flags for type of lookup (regular file or attribute file) 2555*7c478bd9Sstevel@tonic-gate */ 2556*7c478bd9Sstevel@tonic-gate 2557*7c478bd9Sstevel@tonic-gate ip = VTOI(dvp); 2558*7c478bd9Sstevel@tonic-gate 2559*7c478bd9Sstevel@tonic-gate if (flags & LOOKUP_XATTR) { 2560*7c478bd9Sstevel@tonic-gate 2561*7c478bd9Sstevel@tonic-gate /* 2562*7c478bd9Sstevel@tonic-gate * We don't allow recursive attributes... 2563*7c478bd9Sstevel@tonic-gate * Maybe someday we will. 2564*7c478bd9Sstevel@tonic-gate */ 2565*7c478bd9Sstevel@tonic-gate if ((ip->i_cflags & IXATTR)) { 2566*7c478bd9Sstevel@tonic-gate return (EINVAL); 2567*7c478bd9Sstevel@tonic-gate } 2568*7c478bd9Sstevel@tonic-gate 2569*7c478bd9Sstevel@tonic-gate if ((vp = dnlc_lookup(dvp, XATTR_DIR_NAME)) == NULL) { 2570*7c478bd9Sstevel@tonic-gate error = ufs_xattr_getattrdir(dvp, &sip, flags, cr); 2571*7c478bd9Sstevel@tonic-gate if (error) { 2572*7c478bd9Sstevel@tonic-gate *vpp = NULL; 2573*7c478bd9Sstevel@tonic-gate goto out; 2574*7c478bd9Sstevel@tonic-gate } 2575*7c478bd9Sstevel@tonic-gate 2576*7c478bd9Sstevel@tonic-gate vp = ITOV(sip); 2577*7c478bd9Sstevel@tonic-gate dnlc_update(dvp, XATTR_DIR_NAME, vp); 2578*7c478bd9Sstevel@tonic-gate } 2579*7c478bd9Sstevel@tonic-gate 2580*7c478bd9Sstevel@tonic-gate /* 2581*7c478bd9Sstevel@tonic-gate * Check accessibility of directory. 2582*7c478bd9Sstevel@tonic-gate */ 2583*7c478bd9Sstevel@tonic-gate if (vp == DNLC_NO_VNODE) { 2584*7c478bd9Sstevel@tonic-gate VN_RELE(vp); 2585*7c478bd9Sstevel@tonic-gate error = ENOENT; 2586*7c478bd9Sstevel@tonic-gate goto out; 2587*7c478bd9Sstevel@tonic-gate } 2588*7c478bd9Sstevel@tonic-gate if ((error = ufs_iaccess(VTOI(vp), IEXEC, cr)) != 0) { 2589*7c478bd9Sstevel@tonic-gate VN_RELE(vp); 2590*7c478bd9Sstevel@tonic-gate goto out; 2591*7c478bd9Sstevel@tonic-gate } 2592*7c478bd9Sstevel@tonic-gate 2593*7c478bd9Sstevel@tonic-gate *vpp = vp; 2594*7c478bd9Sstevel@tonic-gate return (0); 2595*7c478bd9Sstevel@tonic-gate } 2596*7c478bd9Sstevel@tonic-gate 2597*7c478bd9Sstevel@tonic-gate /* 2598*7c478bd9Sstevel@tonic-gate * Check for a null component, which we should treat as 2599*7c478bd9Sstevel@tonic-gate * looking at dvp from within it's parent, so we don't 2600*7c478bd9Sstevel@tonic-gate * need a call to ufs_iaccess(), as it has already been 2601*7c478bd9Sstevel@tonic-gate * done. 2602*7c478bd9Sstevel@tonic-gate */ 2603*7c478bd9Sstevel@tonic-gate if (nm[0] == 0) { 2604*7c478bd9Sstevel@tonic-gate VN_HOLD(dvp); 2605*7c478bd9Sstevel@tonic-gate error = 0; 2606*7c478bd9Sstevel@tonic-gate *vpp = dvp; 2607*7c478bd9Sstevel@tonic-gate goto out; 2608*7c478bd9Sstevel@tonic-gate } 2609*7c478bd9Sstevel@tonic-gate 2610*7c478bd9Sstevel@tonic-gate /* 2611*7c478bd9Sstevel@tonic-gate * Check for "." ie itself. this is a quick check and 2612*7c478bd9Sstevel@tonic-gate * avoids adding "." into the dnlc (which have been seen 2613*7c478bd9Sstevel@tonic-gate * to occupy >10% of the cache). 2614*7c478bd9Sstevel@tonic-gate */ 2615*7c478bd9Sstevel@tonic-gate if ((nm[0] == '.') && (nm[1] == 0)) { 2616*7c478bd9Sstevel@tonic-gate /* 2617*7c478bd9Sstevel@tonic-gate * Don't return without checking accessibility 2618*7c478bd9Sstevel@tonic-gate * of the directory. We only need the lock if 2619*7c478bd9Sstevel@tonic-gate * we are going to return it. 2620*7c478bd9Sstevel@tonic-gate */ 2621*7c478bd9Sstevel@tonic-gate if ((error = ufs_iaccess(ip, IEXEC, cr)) == 0) { 2622*7c478bd9Sstevel@tonic-gate VN_HOLD(dvp); 2623*7c478bd9Sstevel@tonic-gate *vpp = dvp; 2624*7c478bd9Sstevel@tonic-gate } 2625*7c478bd9Sstevel@tonic-gate goto out; 2626*7c478bd9Sstevel@tonic-gate } 2627*7c478bd9Sstevel@tonic-gate 2628*7c478bd9Sstevel@tonic-gate /* 2629*7c478bd9Sstevel@tonic-gate * Fast path: Check the directory name lookup cache. 2630*7c478bd9Sstevel@tonic-gate */ 2631*7c478bd9Sstevel@tonic-gate if (vp = dnlc_lookup(dvp, nm)) { 2632*7c478bd9Sstevel@tonic-gate /* 2633*7c478bd9Sstevel@tonic-gate * Check accessibility of directory. 2634*7c478bd9Sstevel@tonic-gate */ 2635*7c478bd9Sstevel@tonic-gate if ((error = ufs_iaccess(ip, IEXEC, cr)) != 0) { 2636*7c478bd9Sstevel@tonic-gate VN_RELE(vp); 2637*7c478bd9Sstevel@tonic-gate goto out; 2638*7c478bd9Sstevel@tonic-gate } 2639*7c478bd9Sstevel@tonic-gate if (vp == DNLC_NO_VNODE) { 2640*7c478bd9Sstevel@tonic-gate VN_RELE(vp); 2641*7c478bd9Sstevel@tonic-gate error = ENOENT; 2642*7c478bd9Sstevel@tonic-gate goto out; 2643*7c478bd9Sstevel@tonic-gate } 2644*7c478bd9Sstevel@tonic-gate xip = VTOI(vp); 2645*7c478bd9Sstevel@tonic-gate ulp = NULL; 2646*7c478bd9Sstevel@tonic-gate goto fastpath; 2647*7c478bd9Sstevel@tonic-gate } 2648*7c478bd9Sstevel@tonic-gate 2649*7c478bd9Sstevel@tonic-gate /* 2650*7c478bd9Sstevel@tonic-gate * Keep the idle queue from getting too long by 2651*7c478bd9Sstevel@tonic-gate * idling two inodes before attempting to allocate another. 2652*7c478bd9Sstevel@tonic-gate * This operation must be performed before entering 2653*7c478bd9Sstevel@tonic-gate * lockfs or a transaction. 2654*7c478bd9Sstevel@tonic-gate */ 2655*7c478bd9Sstevel@tonic-gate if (ufs_idle_q.uq_ne > ufs_idle_q.uq_hiwat) 2656*7c478bd9Sstevel@tonic-gate if ((curthread->t_flag & T_DONTBLOCK) == 0) { 2657*7c478bd9Sstevel@tonic-gate ins.in_lidles.value.ul += ufs_lookup_idle_count; 2658*7c478bd9Sstevel@tonic-gate ufs_idle_some(ufs_lookup_idle_count); 2659*7c478bd9Sstevel@tonic-gate } 2660*7c478bd9Sstevel@tonic-gate 2661*7c478bd9Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 2662*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_LOOKUP_MASK); 2663*7c478bd9Sstevel@tonic-gate if (error) 2664*7c478bd9Sstevel@tonic-gate goto out; 2665*7c478bd9Sstevel@tonic-gate 2666*7c478bd9Sstevel@tonic-gate error = ufs_dirlook(ip, nm, &xip, cr, 1); 2667*7c478bd9Sstevel@tonic-gate 2668*7c478bd9Sstevel@tonic-gate fastpath: 2669*7c478bd9Sstevel@tonic-gate if (error == 0) { 2670*7c478bd9Sstevel@tonic-gate ip = xip; 2671*7c478bd9Sstevel@tonic-gate *vpp = ITOV(ip); 2672*7c478bd9Sstevel@tonic-gate 2673*7c478bd9Sstevel@tonic-gate /* 2674*7c478bd9Sstevel@tonic-gate * If vnode is a device return special vnode instead. 2675*7c478bd9Sstevel@tonic-gate */ 2676*7c478bd9Sstevel@tonic-gate if (IS_DEVVP(*vpp)) { 2677*7c478bd9Sstevel@tonic-gate struct vnode *newvp; 2678*7c478bd9Sstevel@tonic-gate 2679*7c478bd9Sstevel@tonic-gate newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, 2680*7c478bd9Sstevel@tonic-gate cr); 2681*7c478bd9Sstevel@tonic-gate VN_RELE(*vpp); 2682*7c478bd9Sstevel@tonic-gate if (newvp == NULL) 2683*7c478bd9Sstevel@tonic-gate error = ENOSYS; 2684*7c478bd9Sstevel@tonic-gate else 2685*7c478bd9Sstevel@tonic-gate *vpp = newvp; 2686*7c478bd9Sstevel@tonic-gate } 2687*7c478bd9Sstevel@tonic-gate } 2688*7c478bd9Sstevel@tonic-gate if (ulp) { 2689*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 2690*7c478bd9Sstevel@tonic-gate } 2691*7c478bd9Sstevel@tonic-gate 2692*7c478bd9Sstevel@tonic-gate out: 2693*7c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_UFS, TR_UFS_LOOKUP_END, 2694*7c478bd9Sstevel@tonic-gate "ufs_lookup_end:dvp %p name %s error %d", vpp, nm, error); 2695*7c478bd9Sstevel@tonic-gate return (error); 2696*7c478bd9Sstevel@tonic-gate } 2697*7c478bd9Sstevel@tonic-gate 2698*7c478bd9Sstevel@tonic-gate static int 2699*7c478bd9Sstevel@tonic-gate ufs_create(struct vnode *dvp, char *name, struct vattr *vap, enum vcexcl excl, 2700*7c478bd9Sstevel@tonic-gate int mode, struct vnode **vpp, struct cred *cr, int flag) 2701*7c478bd9Sstevel@tonic-gate { 2702*7c478bd9Sstevel@tonic-gate struct inode *ip; 2703*7c478bd9Sstevel@tonic-gate struct inode *xip; 2704*7c478bd9Sstevel@tonic-gate struct inode *dip; 2705*7c478bd9Sstevel@tonic-gate struct vnode *xvp; 2706*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 2707*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 2708*7c478bd9Sstevel@tonic-gate int error; 2709*7c478bd9Sstevel@tonic-gate int issync; 2710*7c478bd9Sstevel@tonic-gate int truncflag; 2711*7c478bd9Sstevel@tonic-gate int trans_size; 2712*7c478bd9Sstevel@tonic-gate int noentry; 2713*7c478bd9Sstevel@tonic-gate int defer_dip_seq_update = 0; /* need to defer update of dip->i_seq */ 2714*7c478bd9Sstevel@tonic-gate int retry = 1; 2715*7c478bd9Sstevel@tonic-gate 2716*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_CREATE_START, 2717*7c478bd9Sstevel@tonic-gate "ufs_create_start:dvp %p", dvp); 2718*7c478bd9Sstevel@tonic-gate 2719*7c478bd9Sstevel@tonic-gate again: 2720*7c478bd9Sstevel@tonic-gate ip = VTOI(dvp); 2721*7c478bd9Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 2722*7c478bd9Sstevel@tonic-gate truncflag = 0; 2723*7c478bd9Sstevel@tonic-gate 2724*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_CREATE_MASK); 2725*7c478bd9Sstevel@tonic-gate if (error) 2726*7c478bd9Sstevel@tonic-gate goto out; 2727*7c478bd9Sstevel@tonic-gate 2728*7c478bd9Sstevel@tonic-gate if (ulp) { 2729*7c478bd9Sstevel@tonic-gate trans_size = (int)TOP_CREATE_SIZE(ip); 2730*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_CREATE, trans_size); 2731*7c478bd9Sstevel@tonic-gate } 2732*7c478bd9Sstevel@tonic-gate 2733*7c478bd9Sstevel@tonic-gate if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0) 2734*7c478bd9Sstevel@tonic-gate vap->va_mode &= ~VSVTX; 2735*7c478bd9Sstevel@tonic-gate 2736*7c478bd9Sstevel@tonic-gate if (*name == '\0') { 2737*7c478bd9Sstevel@tonic-gate /* 2738*7c478bd9Sstevel@tonic-gate * Null component name refers to the directory itself. 2739*7c478bd9Sstevel@tonic-gate */ 2740*7c478bd9Sstevel@tonic-gate VN_HOLD(dvp); 2741*7c478bd9Sstevel@tonic-gate /* 2742*7c478bd9Sstevel@tonic-gate * Even though this is an error case, we need to grab the 2743*7c478bd9Sstevel@tonic-gate * quota lock since the error handling code below is common. 2744*7c478bd9Sstevel@tonic-gate */ 2745*7c478bd9Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 2746*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 2747*7c478bd9Sstevel@tonic-gate error = EEXIST; 2748*7c478bd9Sstevel@tonic-gate } else { 2749*7c478bd9Sstevel@tonic-gate xip = NULL; 2750*7c478bd9Sstevel@tonic-gate noentry = 0; 2751*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 2752*7c478bd9Sstevel@tonic-gate xvp = dnlc_lookup(dvp, name); 2753*7c478bd9Sstevel@tonic-gate if (xvp == DNLC_NO_VNODE) { 2754*7c478bd9Sstevel@tonic-gate noentry = 1; 2755*7c478bd9Sstevel@tonic-gate VN_RELE(xvp); 2756*7c478bd9Sstevel@tonic-gate xvp = NULL; 2757*7c478bd9Sstevel@tonic-gate } 2758*7c478bd9Sstevel@tonic-gate if (xvp) { 2759*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 2760*7c478bd9Sstevel@tonic-gate if (error = ufs_iaccess(ip, IEXEC, cr)) { 2761*7c478bd9Sstevel@tonic-gate VN_RELE(xvp); 2762*7c478bd9Sstevel@tonic-gate } else { 2763*7c478bd9Sstevel@tonic-gate error = EEXIST; 2764*7c478bd9Sstevel@tonic-gate xip = VTOI(xvp); 2765*7c478bd9Sstevel@tonic-gate } 2766*7c478bd9Sstevel@tonic-gate } else { 2767*7c478bd9Sstevel@tonic-gate /* 2768*7c478bd9Sstevel@tonic-gate * Suppress file system full message if we will retry 2769*7c478bd9Sstevel@tonic-gate */ 2770*7c478bd9Sstevel@tonic-gate error = ufs_direnter_cm(ip, name, DE_CREATE, 2771*7c478bd9Sstevel@tonic-gate vap, &xip, cr, 2772*7c478bd9Sstevel@tonic-gate (noentry | (retry ? IQUIET : 0))); 2773*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 2774*7c478bd9Sstevel@tonic-gate } 2775*7c478bd9Sstevel@tonic-gate ip = xip; 2776*7c478bd9Sstevel@tonic-gate if (ip != NULL) { 2777*7c478bd9Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 2778*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 2779*7c478bd9Sstevel@tonic-gate } 2780*7c478bd9Sstevel@tonic-gate } 2781*7c478bd9Sstevel@tonic-gate 2782*7c478bd9Sstevel@tonic-gate /* 2783*7c478bd9Sstevel@tonic-gate * If the file already exists and this is a non-exclusive create, 2784*7c478bd9Sstevel@tonic-gate * check permissions and allow access for non-directories. 2785*7c478bd9Sstevel@tonic-gate * Read-only create of an existing directory is also allowed. 2786*7c478bd9Sstevel@tonic-gate * We fail an exclusive create of anything which already exists. 2787*7c478bd9Sstevel@tonic-gate */ 2788*7c478bd9Sstevel@tonic-gate if (error == EEXIST) { 2789*7c478bd9Sstevel@tonic-gate dip = VTOI(dvp); 2790*7c478bd9Sstevel@tonic-gate if (excl == NONEXCL) { 2791*7c478bd9Sstevel@tonic-gate if ((((ip->i_mode & IFMT) == IFDIR) || 2792*7c478bd9Sstevel@tonic-gate ((ip->i_mode & IFMT) == IFATTRDIR)) && 2793*7c478bd9Sstevel@tonic-gate (mode & IWRITE)) 2794*7c478bd9Sstevel@tonic-gate error = EISDIR; 2795*7c478bd9Sstevel@tonic-gate else if (mode) 2796*7c478bd9Sstevel@tonic-gate error = ufs_iaccess(ip, mode, cr); 2797*7c478bd9Sstevel@tonic-gate else 2798*7c478bd9Sstevel@tonic-gate error = 0; 2799*7c478bd9Sstevel@tonic-gate } 2800*7c478bd9Sstevel@tonic-gate if (error) { 2801*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 2802*7c478bd9Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 2803*7c478bd9Sstevel@tonic-gate VN_RELE(ITOV(ip)); 2804*7c478bd9Sstevel@tonic-gate goto unlock; 2805*7c478bd9Sstevel@tonic-gate } 2806*7c478bd9Sstevel@tonic-gate /* 2807*7c478bd9Sstevel@tonic-gate * If the error EEXIST was set, then i_seq can not 2808*7c478bd9Sstevel@tonic-gate * have been updated. The sequence number interface 2809*7c478bd9Sstevel@tonic-gate * is defined such that a non-error VOP_CREATE must 2810*7c478bd9Sstevel@tonic-gate * increase the dir va_seq it by at least one. If we 2811*7c478bd9Sstevel@tonic-gate * have cleared the error, increase i_seq. Note that 2812*7c478bd9Sstevel@tonic-gate * we are increasing the dir i_seq and in rare cases 2813*7c478bd9Sstevel@tonic-gate * ip may actually be from the dvp, so we already have 2814*7c478bd9Sstevel@tonic-gate * the locks and it will not be subject to truncation. 2815*7c478bd9Sstevel@tonic-gate * In case we have to update i_seq of the parent 2816*7c478bd9Sstevel@tonic-gate * directory dip, we have to defer it till we have 2817*7c478bd9Sstevel@tonic-gate * released our locks on ip due to lock ordering requirements. 2818*7c478bd9Sstevel@tonic-gate */ 2819*7c478bd9Sstevel@tonic-gate if (ip != dip) 2820*7c478bd9Sstevel@tonic-gate defer_dip_seq_update = 1; 2821*7c478bd9Sstevel@tonic-gate else 2822*7c478bd9Sstevel@tonic-gate ip->i_seq++; 2823*7c478bd9Sstevel@tonic-gate 2824*7c478bd9Sstevel@tonic-gate if (((ip->i_mode & IFMT) == IFREG) && 2825*7c478bd9Sstevel@tonic-gate (vap->va_mask & AT_SIZE) && vap->va_size == 0) { 2826*7c478bd9Sstevel@tonic-gate /* 2827*7c478bd9Sstevel@tonic-gate * Truncate regular files, if requested by caller. 2828*7c478bd9Sstevel@tonic-gate * Grab i_rwlock to make sure no one else is 2829*7c478bd9Sstevel@tonic-gate * currently writing to the file (we promised 2830*7c478bd9Sstevel@tonic-gate * bmap we would do this). 2831*7c478bd9Sstevel@tonic-gate * Must get the locks in the correct order. 2832*7c478bd9Sstevel@tonic-gate */ 2833*7c478bd9Sstevel@tonic-gate if (ip->i_size == 0) { 2834*7c478bd9Sstevel@tonic-gate ip->i_flag |= ICHG | IUPD; 2835*7c478bd9Sstevel@tonic-gate ip->i_seq++; 2836*7c478bd9Sstevel@tonic-gate TRANS_INODE(ufsvfsp, ip); 2837*7c478bd9Sstevel@tonic-gate } else { 2838*7c478bd9Sstevel@tonic-gate /* 2839*7c478bd9Sstevel@tonic-gate * Large Files: Why this check here? 2840*7c478bd9Sstevel@tonic-gate * Though we do it in vn_create() we really 2841*7c478bd9Sstevel@tonic-gate * want to guarantee that we do not destroy 2842*7c478bd9Sstevel@tonic-gate * Large file data by atomically checking 2843*7c478bd9Sstevel@tonic-gate * the size while holding the contents 2844*7c478bd9Sstevel@tonic-gate * lock. 2845*7c478bd9Sstevel@tonic-gate */ 2846*7c478bd9Sstevel@tonic-gate if (flag && !(flag & FOFFMAX) && 2847*7c478bd9Sstevel@tonic-gate ((ip->i_mode & IFMT) == IFREG) && 2848*7c478bd9Sstevel@tonic-gate (ip->i_size > (offset_t)MAXOFF32_T)) { 2849*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 2850*7c478bd9Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 2851*7c478bd9Sstevel@tonic-gate error = EOVERFLOW; 2852*7c478bd9Sstevel@tonic-gate goto unlock; 2853*7c478bd9Sstevel@tonic-gate } 2854*7c478bd9Sstevel@tonic-gate if (TRANS_ISTRANS(ufsvfsp)) 2855*7c478bd9Sstevel@tonic-gate truncflag++; 2856*7c478bd9Sstevel@tonic-gate else { 2857*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 2858*7c478bd9Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 2859*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 2860*7c478bd9Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, 2861*7c478bd9Sstevel@tonic-gate RW_READER); 2862*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 2863*7c478bd9Sstevel@tonic-gate (void) ufs_itrunc(ip, (u_offset_t)0, 0, 2864*7c478bd9Sstevel@tonic-gate cr); 2865*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 2866*7c478bd9Sstevel@tonic-gate } 2867*7c478bd9Sstevel@tonic-gate } 2868*7c478bd9Sstevel@tonic-gate } 2869*7c478bd9Sstevel@tonic-gate } 2870*7c478bd9Sstevel@tonic-gate 2871*7c478bd9Sstevel@tonic-gate if (error) { 2872*7c478bd9Sstevel@tonic-gate if (ip != NULL) { 2873*7c478bd9Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 2874*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 2875*7c478bd9Sstevel@tonic-gate } 2876*7c478bd9Sstevel@tonic-gate goto unlock; 2877*7c478bd9Sstevel@tonic-gate } 2878*7c478bd9Sstevel@tonic-gate 2879*7c478bd9Sstevel@tonic-gate *vpp = ITOV(ip); 2880*7c478bd9Sstevel@tonic-gate ITIMES(ip); 2881*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 2882*7c478bd9Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 2883*7c478bd9Sstevel@tonic-gate 2884*7c478bd9Sstevel@tonic-gate /* 2885*7c478bd9Sstevel@tonic-gate * If vnode is a device return special vnode instead. 2886*7c478bd9Sstevel@tonic-gate */ 2887*7c478bd9Sstevel@tonic-gate if (!error && IS_DEVVP(*vpp)) { 2888*7c478bd9Sstevel@tonic-gate struct vnode *newvp; 2889*7c478bd9Sstevel@tonic-gate 2890*7c478bd9Sstevel@tonic-gate newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 2891*7c478bd9Sstevel@tonic-gate VN_RELE(*vpp); 2892*7c478bd9Sstevel@tonic-gate if (newvp == NULL) { 2893*7c478bd9Sstevel@tonic-gate error = ENOSYS; 2894*7c478bd9Sstevel@tonic-gate goto unlock; 2895*7c478bd9Sstevel@tonic-gate } 2896*7c478bd9Sstevel@tonic-gate truncflag = 0; 2897*7c478bd9Sstevel@tonic-gate *vpp = newvp; 2898*7c478bd9Sstevel@tonic-gate } 2899*7c478bd9Sstevel@tonic-gate unlock: 2900*7c478bd9Sstevel@tonic-gate 2901*7c478bd9Sstevel@tonic-gate /* 2902*7c478bd9Sstevel@tonic-gate * Do the deferred update of the parent directory's sequence 2903*7c478bd9Sstevel@tonic-gate * number now. 2904*7c478bd9Sstevel@tonic-gate */ 2905*7c478bd9Sstevel@tonic-gate if (defer_dip_seq_update == 1) { 2906*7c478bd9Sstevel@tonic-gate rw_enter(&dip->i_contents, RW_READER); 2907*7c478bd9Sstevel@tonic-gate mutex_enter(&dip->i_tlock); 2908*7c478bd9Sstevel@tonic-gate dip->i_seq++; 2909*7c478bd9Sstevel@tonic-gate mutex_exit(&dip->i_tlock); 2910*7c478bd9Sstevel@tonic-gate rw_exit(&dip->i_contents); 2911*7c478bd9Sstevel@tonic-gate } 2912*7c478bd9Sstevel@tonic-gate 2913*7c478bd9Sstevel@tonic-gate if (ulp) { 2914*7c478bd9Sstevel@tonic-gate int terr = 0; 2915*7c478bd9Sstevel@tonic-gate 2916*7c478bd9Sstevel@tonic-gate TRANS_END_CSYNC(ufsvfsp, terr, issync, TOP_CREATE, 2917*7c478bd9Sstevel@tonic-gate trans_size); 2918*7c478bd9Sstevel@tonic-gate 2919*7c478bd9Sstevel@tonic-gate /* 2920*7c478bd9Sstevel@tonic-gate * If we haven't had a more interesting failure 2921*7c478bd9Sstevel@tonic-gate * already, then anything that might've happened 2922*7c478bd9Sstevel@tonic-gate * here should be reported. 2923*7c478bd9Sstevel@tonic-gate */ 2924*7c478bd9Sstevel@tonic-gate if (error == 0) 2925*7c478bd9Sstevel@tonic-gate error = terr; 2926*7c478bd9Sstevel@tonic-gate } 2927*7c478bd9Sstevel@tonic-gate 2928*7c478bd9Sstevel@tonic-gate if (!error && truncflag) { 2929*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 2930*7c478bd9Sstevel@tonic-gate (void) TRANS_ITRUNC(ip, (u_offset_t)0, 0, cr); 2931*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 2932*7c478bd9Sstevel@tonic-gate } 2933*7c478bd9Sstevel@tonic-gate 2934*7c478bd9Sstevel@tonic-gate if (ulp) 2935*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 2936*7c478bd9Sstevel@tonic-gate 2937*7c478bd9Sstevel@tonic-gate /* 2938*7c478bd9Sstevel@tonic-gate * If no inodes available, try to free one up out of the 2939*7c478bd9Sstevel@tonic-gate * pending delete queue. 2940*7c478bd9Sstevel@tonic-gate */ 2941*7c478bd9Sstevel@tonic-gate if ((error == ENOSPC) && retry && TRANS_ISTRANS(ufsvfsp)) { 2942*7c478bd9Sstevel@tonic-gate ufs_delete_drain_wait(ufsvfsp, 1); 2943*7c478bd9Sstevel@tonic-gate retry = 0; 2944*7c478bd9Sstevel@tonic-gate goto again; 2945*7c478bd9Sstevel@tonic-gate } 2946*7c478bd9Sstevel@tonic-gate 2947*7c478bd9Sstevel@tonic-gate out: 2948*7c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_UFS, TR_UFS_CREATE_END, 2949*7c478bd9Sstevel@tonic-gate "ufs_create_end:dvp %p name %s error %d", vpp, name, error); 2950*7c478bd9Sstevel@tonic-gate return (error); 2951*7c478bd9Sstevel@tonic-gate } 2952*7c478bd9Sstevel@tonic-gate 2953*7c478bd9Sstevel@tonic-gate extern int ufs_idle_max; 2954*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 2955*7c478bd9Sstevel@tonic-gate static int 2956*7c478bd9Sstevel@tonic-gate ufs_remove(struct vnode *vp, char *nm, struct cred *cr) 2957*7c478bd9Sstevel@tonic-gate { 2958*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 2959*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 2960*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 2961*7c478bd9Sstevel@tonic-gate vnode_t *rmvp = NULL; /* Vnode corresponding to name being removed */ 2962*7c478bd9Sstevel@tonic-gate int error; 2963*7c478bd9Sstevel@tonic-gate int issync; 2964*7c478bd9Sstevel@tonic-gate int trans_size; 2965*7c478bd9Sstevel@tonic-gate 2966*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_REMOVE_START, 2967*7c478bd9Sstevel@tonic-gate "ufs_remove_start:vp %p", vp); 2968*7c478bd9Sstevel@tonic-gate 2969*7c478bd9Sstevel@tonic-gate /* 2970*7c478bd9Sstevel@tonic-gate * don't let the delete queue get too long 2971*7c478bd9Sstevel@tonic-gate */ 2972*7c478bd9Sstevel@tonic-gate if (ufsvfsp == NULL) { 2973*7c478bd9Sstevel@tonic-gate error = EIO; 2974*7c478bd9Sstevel@tonic-gate goto out; 2975*7c478bd9Sstevel@tonic-gate } 2976*7c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_delete.uq_ne > ufs_idle_max) 2977*7c478bd9Sstevel@tonic-gate ufs_delete_drain(vp->v_vfsp, 1, 1); 2978*7c478bd9Sstevel@tonic-gate 2979*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_REMOVE_MASK); 2980*7c478bd9Sstevel@tonic-gate if (error) 2981*7c478bd9Sstevel@tonic-gate goto out; 2982*7c478bd9Sstevel@tonic-gate 2983*7c478bd9Sstevel@tonic-gate if (ulp) 2984*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_REMOVE, 2985*7c478bd9Sstevel@tonic-gate trans_size = (int)TOP_REMOVE_SIZE(VTOI(vp))); 2986*7c478bd9Sstevel@tonic-gate 2987*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 2988*7c478bd9Sstevel@tonic-gate error = ufs_dirremove(ip, nm, (struct inode *)0, (struct vnode *)0, 2989*7c478bd9Sstevel@tonic-gate DR_REMOVE, cr, &rmvp); 2990*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 2991*7c478bd9Sstevel@tonic-gate 2992*7c478bd9Sstevel@tonic-gate if (ulp) { 2993*7c478bd9Sstevel@tonic-gate TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_REMOVE, trans_size); 2994*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 2995*7c478bd9Sstevel@tonic-gate } 2996*7c478bd9Sstevel@tonic-gate 2997*7c478bd9Sstevel@tonic-gate /* 2998*7c478bd9Sstevel@tonic-gate * This must be called after the remove transaction is closed. 2999*7c478bd9Sstevel@tonic-gate */ 3000*7c478bd9Sstevel@tonic-gate if (rmvp != NULL) { 3001*7c478bd9Sstevel@tonic-gate /* Only send the event if there were no errors */ 3002*7c478bd9Sstevel@tonic-gate if (error == 0) 3003*7c478bd9Sstevel@tonic-gate vnevent_remove(rmvp); 3004*7c478bd9Sstevel@tonic-gate VN_RELE(rmvp); 3005*7c478bd9Sstevel@tonic-gate } 3006*7c478bd9Sstevel@tonic-gate out: 3007*7c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_UFS, TR_UFS_REMOVE_END, 3008*7c478bd9Sstevel@tonic-gate "ufs_remove_end:vp %p name %s error %d", vp, nm, error); 3009*7c478bd9Sstevel@tonic-gate return (error); 3010*7c478bd9Sstevel@tonic-gate } 3011*7c478bd9Sstevel@tonic-gate 3012*7c478bd9Sstevel@tonic-gate /* 3013*7c478bd9Sstevel@tonic-gate * Link a file or a directory. Only privileged processes are allowed to 3014*7c478bd9Sstevel@tonic-gate * make links to directories. 3015*7c478bd9Sstevel@tonic-gate */ 3016*7c478bd9Sstevel@tonic-gate static int 3017*7c478bd9Sstevel@tonic-gate ufs_link(struct vnode *tdvp, struct vnode *svp, char *tnm, struct cred *cr) 3018*7c478bd9Sstevel@tonic-gate { 3019*7c478bd9Sstevel@tonic-gate struct inode *sip; 3020*7c478bd9Sstevel@tonic-gate struct inode *tdp = VTOI(tdvp); 3021*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = tdp->i_ufsvfs; 3022*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 3023*7c478bd9Sstevel@tonic-gate struct vnode *realvp; 3024*7c478bd9Sstevel@tonic-gate int error; 3025*7c478bd9Sstevel@tonic-gate int issync; 3026*7c478bd9Sstevel@tonic-gate int trans_size; 3027*7c478bd9Sstevel@tonic-gate int isdev; 3028*7c478bd9Sstevel@tonic-gate 3029*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_LINK_START, 3030*7c478bd9Sstevel@tonic-gate "ufs_link_start:tdvp %p", tdvp); 3031*7c478bd9Sstevel@tonic-gate 3032*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_LINK_MASK); 3033*7c478bd9Sstevel@tonic-gate if (error) 3034*7c478bd9Sstevel@tonic-gate goto out; 3035*7c478bd9Sstevel@tonic-gate 3036*7c478bd9Sstevel@tonic-gate if (ulp) 3037*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_LINK, 3038*7c478bd9Sstevel@tonic-gate trans_size = (int)TOP_LINK_SIZE(VTOI(tdvp))); 3039*7c478bd9Sstevel@tonic-gate 3040*7c478bd9Sstevel@tonic-gate if (VOP_REALVP(svp, &realvp) == 0) 3041*7c478bd9Sstevel@tonic-gate svp = realvp; 3042*7c478bd9Sstevel@tonic-gate 3043*7c478bd9Sstevel@tonic-gate /* 3044*7c478bd9Sstevel@tonic-gate * Make sure link for extended attributes is valid 3045*7c478bd9Sstevel@tonic-gate * We only support hard linking of attr in ATTRDIR to ATTRDIR 3046*7c478bd9Sstevel@tonic-gate * 3047*7c478bd9Sstevel@tonic-gate * Make certain we don't attempt to look at a device node as 3048*7c478bd9Sstevel@tonic-gate * a ufs inode. 3049*7c478bd9Sstevel@tonic-gate */ 3050*7c478bd9Sstevel@tonic-gate 3051*7c478bd9Sstevel@tonic-gate isdev = IS_DEVVP(svp); 3052*7c478bd9Sstevel@tonic-gate if (((isdev == 0) && ((VTOI(svp)->i_cflags & IXATTR) == 0) && 3053*7c478bd9Sstevel@tonic-gate ((tdp->i_mode & IFMT) == IFATTRDIR)) || 3054*7c478bd9Sstevel@tonic-gate ((isdev == 0) && (VTOI(svp)->i_cflags & IXATTR) && 3055*7c478bd9Sstevel@tonic-gate ((tdp->i_mode & IFMT) == IFDIR))) { 3056*7c478bd9Sstevel@tonic-gate error = EINVAL; 3057*7c478bd9Sstevel@tonic-gate goto unlock; 3058*7c478bd9Sstevel@tonic-gate } 3059*7c478bd9Sstevel@tonic-gate 3060*7c478bd9Sstevel@tonic-gate sip = VTOI(svp); 3061*7c478bd9Sstevel@tonic-gate if ((svp->v_type == VDIR && 3062*7c478bd9Sstevel@tonic-gate secpolicy_fs_linkdir(cr, ufsvfsp->vfs_vfs) != 0) || 3063*7c478bd9Sstevel@tonic-gate (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)) { 3064*7c478bd9Sstevel@tonic-gate error = EPERM; 3065*7c478bd9Sstevel@tonic-gate goto unlock; 3066*7c478bd9Sstevel@tonic-gate } 3067*7c478bd9Sstevel@tonic-gate rw_enter(&tdp->i_rwlock, RW_WRITER); 3068*7c478bd9Sstevel@tonic-gate error = ufs_direnter_lr(tdp, tnm, DE_LINK, (struct inode *)0, 3069*7c478bd9Sstevel@tonic-gate sip, cr, NULL); 3070*7c478bd9Sstevel@tonic-gate rw_exit(&tdp->i_rwlock); 3071*7c478bd9Sstevel@tonic-gate 3072*7c478bd9Sstevel@tonic-gate unlock: 3073*7c478bd9Sstevel@tonic-gate if (ulp) { 3074*7c478bd9Sstevel@tonic-gate TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_LINK, trans_size); 3075*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 3076*7c478bd9Sstevel@tonic-gate } 3077*7c478bd9Sstevel@tonic-gate out: 3078*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_LINK_END, 3079*7c478bd9Sstevel@tonic-gate "ufs_link_end:tdvp %p error %d", tdvp, error); 3080*7c478bd9Sstevel@tonic-gate return (error); 3081*7c478bd9Sstevel@tonic-gate } 3082*7c478bd9Sstevel@tonic-gate 3083*7c478bd9Sstevel@tonic-gate uint64_t ufs_rename_retry_cnt; 3084*7c478bd9Sstevel@tonic-gate uint64_t ufs_rename_upgrade_retry_cnt; 3085*7c478bd9Sstevel@tonic-gate uint64_t ufs_rename_dircheck_retry_cnt; 3086*7c478bd9Sstevel@tonic-gate clock_t ufs_rename_backoff_delay = 1; 3087*7c478bd9Sstevel@tonic-gate 3088*7c478bd9Sstevel@tonic-gate /* 3089*7c478bd9Sstevel@tonic-gate * Rename a file or directory. 3090*7c478bd9Sstevel@tonic-gate * We are given the vnode and entry string of the source and the 3091*7c478bd9Sstevel@tonic-gate * vnode and entry string of the place we want to move the source 3092*7c478bd9Sstevel@tonic-gate * to (the target). The essential operation is: 3093*7c478bd9Sstevel@tonic-gate * unlink(target); 3094*7c478bd9Sstevel@tonic-gate * link(source, target); 3095*7c478bd9Sstevel@tonic-gate * unlink(source); 3096*7c478bd9Sstevel@tonic-gate * but "atomically". Can't do full commit without saving state in 3097*7c478bd9Sstevel@tonic-gate * the inode on disk, which isn't feasible at this time. Best we 3098*7c478bd9Sstevel@tonic-gate * can do is always guarantee that the TARGET exists. 3099*7c478bd9Sstevel@tonic-gate */ 3100*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 3101*7c478bd9Sstevel@tonic-gate static int 3102*7c478bd9Sstevel@tonic-gate ufs_rename( 3103*7c478bd9Sstevel@tonic-gate struct vnode *sdvp, /* old (source) parent vnode */ 3104*7c478bd9Sstevel@tonic-gate char *snm, /* old (source) entry name */ 3105*7c478bd9Sstevel@tonic-gate struct vnode *tdvp, /* new (target) parent vnode */ 3106*7c478bd9Sstevel@tonic-gate char *tnm, /* new (target) entry name */ 3107*7c478bd9Sstevel@tonic-gate struct cred *cr) 3108*7c478bd9Sstevel@tonic-gate { 3109*7c478bd9Sstevel@tonic-gate struct inode *sip = NULL; /* source inode */ 3110*7c478bd9Sstevel@tonic-gate struct inode *sdp; /* old (source) parent inode */ 3111*7c478bd9Sstevel@tonic-gate struct inode *tdp; /* new (target) parent inode */ 3112*7c478bd9Sstevel@tonic-gate struct vnode *tvp = NULL; /* target vnode, if it exists */ 3113*7c478bd9Sstevel@tonic-gate struct vnode *realvp; 3114*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 3115*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 3116*7c478bd9Sstevel@tonic-gate int error; 3117*7c478bd9Sstevel@tonic-gate int issync; 3118*7c478bd9Sstevel@tonic-gate int trans_size; 3119*7c478bd9Sstevel@tonic-gate 3120*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_RENAME_START, 3121*7c478bd9Sstevel@tonic-gate "ufs_rename_start:sdvp %p", sdvp); 3122*7c478bd9Sstevel@tonic-gate 3123*7c478bd9Sstevel@tonic-gate 3124*7c478bd9Sstevel@tonic-gate sdp = VTOI(sdvp); 3125*7c478bd9Sstevel@tonic-gate ufsvfsp = sdp->i_ufsvfs; 3126*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_RENAME_MASK); 3127*7c478bd9Sstevel@tonic-gate if (error) 3128*7c478bd9Sstevel@tonic-gate goto out; 3129*7c478bd9Sstevel@tonic-gate 3130*7c478bd9Sstevel@tonic-gate if (ulp) 3131*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_RENAME, 3132*7c478bd9Sstevel@tonic-gate trans_size = (int)TOP_RENAME_SIZE(sdp)); 3133*7c478bd9Sstevel@tonic-gate 3134*7c478bd9Sstevel@tonic-gate if (VOP_REALVP(tdvp, &realvp) == 0) 3135*7c478bd9Sstevel@tonic-gate tdvp = realvp; 3136*7c478bd9Sstevel@tonic-gate 3137*7c478bd9Sstevel@tonic-gate tdp = VTOI(tdvp); 3138*7c478bd9Sstevel@tonic-gate 3139*7c478bd9Sstevel@tonic-gate /* 3140*7c478bd9Sstevel@tonic-gate * We only allow renaming of attributes from ATTRDIR to ATTRDIR. 3141*7c478bd9Sstevel@tonic-gate */ 3142*7c478bd9Sstevel@tonic-gate if ((tdp->i_mode & IFMT) != (sdp->i_mode & IFMT)) { 3143*7c478bd9Sstevel@tonic-gate error = EINVAL; 3144*7c478bd9Sstevel@tonic-gate goto unlock; 3145*7c478bd9Sstevel@tonic-gate } 3146*7c478bd9Sstevel@tonic-gate 3147*7c478bd9Sstevel@tonic-gate /* 3148*7c478bd9Sstevel@tonic-gate * Look up inode of file we're supposed to rename. 3149*7c478bd9Sstevel@tonic-gate */ 3150*7c478bd9Sstevel@tonic-gate if (error = ufs_dirlook(sdp, snm, &sip, cr, 0)) { 3151*7c478bd9Sstevel@tonic-gate goto unlock; 3152*7c478bd9Sstevel@tonic-gate } 3153*7c478bd9Sstevel@tonic-gate 3154*7c478bd9Sstevel@tonic-gate /* 3155*7c478bd9Sstevel@tonic-gate * Lock both the source and target directories (they may be 3156*7c478bd9Sstevel@tonic-gate * the same) to provide the atomicity semantics that was 3157*7c478bd9Sstevel@tonic-gate * previously provided by the per file system vfs_rename_lock 3158*7c478bd9Sstevel@tonic-gate * 3159*7c478bd9Sstevel@tonic-gate * with vfs_rename_lock removed to allow simultaneous renames 3160*7c478bd9Sstevel@tonic-gate * within a file system, ufs_dircheckpath can deadlock while 3161*7c478bd9Sstevel@tonic-gate * traversing back to ensure that source is not a parent directory 3162*7c478bd9Sstevel@tonic-gate * of target parent directory. This is because we get into 3163*7c478bd9Sstevel@tonic-gate * ufs_dircheckpath with the sdp and tdp locks held as RW_WRITER. 3164*7c478bd9Sstevel@tonic-gate * If the tdp and sdp of the simultaneous renames happen to be 3165*7c478bd9Sstevel@tonic-gate * in the path of each other, it can lead to a deadlock. This 3166*7c478bd9Sstevel@tonic-gate * can be avoided by getting the locks as RW_READER here and then 3167*7c478bd9Sstevel@tonic-gate * upgrading to RW_WRITER after completing the ufs_dircheckpath. 3168*7c478bd9Sstevel@tonic-gate */ 3169*7c478bd9Sstevel@tonic-gate retry: 3170*7c478bd9Sstevel@tonic-gate rw_enter(&tdp->i_rwlock, RW_READER); 3171*7c478bd9Sstevel@tonic-gate if (tdp != sdp) { 3172*7c478bd9Sstevel@tonic-gate /* 3173*7c478bd9Sstevel@tonic-gate * We're locking 2 peer level locks, so must use tryenter 3174*7c478bd9Sstevel@tonic-gate * on the 2nd to avoid deadlocks that would occur 3175*7c478bd9Sstevel@tonic-gate * if we renamed a->b and b->a concurrently. 3176*7c478bd9Sstevel@tonic-gate */ 3177*7c478bd9Sstevel@tonic-gate if (!rw_tryenter(&sdp->i_rwlock, RW_READER)) { 3178*7c478bd9Sstevel@tonic-gate /* 3179*7c478bd9Sstevel@tonic-gate * Reverse the lock grabs in case we have heavy 3180*7c478bd9Sstevel@tonic-gate * contention on the 2nd lock. 3181*7c478bd9Sstevel@tonic-gate */ 3182*7c478bd9Sstevel@tonic-gate rw_exit(&tdp->i_rwlock); 3183*7c478bd9Sstevel@tonic-gate rw_enter(&sdp->i_rwlock, RW_READER); 3184*7c478bd9Sstevel@tonic-gate if (!rw_tryenter(&tdp->i_rwlock, RW_READER)) { 3185*7c478bd9Sstevel@tonic-gate ufs_rename_retry_cnt++; 3186*7c478bd9Sstevel@tonic-gate rw_exit(&sdp->i_rwlock); 3187*7c478bd9Sstevel@tonic-gate goto retry; 3188*7c478bd9Sstevel@tonic-gate } 3189*7c478bd9Sstevel@tonic-gate } 3190*7c478bd9Sstevel@tonic-gate } 3191*7c478bd9Sstevel@tonic-gate 3192*7c478bd9Sstevel@tonic-gate if (sip == tdp) { 3193*7c478bd9Sstevel@tonic-gate error = EINVAL; 3194*7c478bd9Sstevel@tonic-gate goto errout; 3195*7c478bd9Sstevel@tonic-gate } 3196*7c478bd9Sstevel@tonic-gate /* 3197*7c478bd9Sstevel@tonic-gate * Make sure we can delete the source entry. This requires 3198*7c478bd9Sstevel@tonic-gate * write permission on the containing directory. 3199*7c478bd9Sstevel@tonic-gate * Check for sticky directories. 3200*7c478bd9Sstevel@tonic-gate */ 3201*7c478bd9Sstevel@tonic-gate rw_enter(&sdp->i_contents, RW_READER); 3202*7c478bd9Sstevel@tonic-gate rw_enter(&sip->i_contents, RW_READER); 3203*7c478bd9Sstevel@tonic-gate if ((error = ufs_iaccess(sdp, IWRITE, cr)) != 0 || 3204*7c478bd9Sstevel@tonic-gate (error = ufs_sticky_remove_access(sdp, sip, cr)) != 0) { 3205*7c478bd9Sstevel@tonic-gate rw_exit(&sip->i_contents); 3206*7c478bd9Sstevel@tonic-gate rw_exit(&sdp->i_contents); 3207*7c478bd9Sstevel@tonic-gate goto errout; 3208*7c478bd9Sstevel@tonic-gate } 3209*7c478bd9Sstevel@tonic-gate 3210*7c478bd9Sstevel@tonic-gate /* 3211*7c478bd9Sstevel@tonic-gate * If this is a rename of a directory and the parent is 3212*7c478bd9Sstevel@tonic-gate * different (".." must be changed), then the source 3213*7c478bd9Sstevel@tonic-gate * directory must not be in the directory hierarchy 3214*7c478bd9Sstevel@tonic-gate * above the target, as this would orphan everything 3215*7c478bd9Sstevel@tonic-gate * below the source directory. Also the user must have 3216*7c478bd9Sstevel@tonic-gate * write permission in the source so as to be able to 3217*7c478bd9Sstevel@tonic-gate * change "..". 3218*7c478bd9Sstevel@tonic-gate */ 3219*7c478bd9Sstevel@tonic-gate if ((((sip->i_mode & IFMT) == IFDIR) || 3220*7c478bd9Sstevel@tonic-gate ((sip->i_mode & IFMT) == IFATTRDIR)) && sdp != tdp) { 3221*7c478bd9Sstevel@tonic-gate ino_t inum; 3222*7c478bd9Sstevel@tonic-gate 3223*7c478bd9Sstevel@tonic-gate if ((error = ufs_iaccess(sip, IWRITE, cr))) { 3224*7c478bd9Sstevel@tonic-gate rw_exit(&sip->i_contents); 3225*7c478bd9Sstevel@tonic-gate rw_exit(&sdp->i_contents); 3226*7c478bd9Sstevel@tonic-gate goto errout; 3227*7c478bd9Sstevel@tonic-gate } 3228*7c478bd9Sstevel@tonic-gate inum = sip->i_number; 3229*7c478bd9Sstevel@tonic-gate rw_exit(&sip->i_contents); 3230*7c478bd9Sstevel@tonic-gate rw_exit(&sdp->i_contents); 3231*7c478bd9Sstevel@tonic-gate if ((error = ufs_dircheckpath(inum, tdp, sdp, cr))) { 3232*7c478bd9Sstevel@tonic-gate /* 3233*7c478bd9Sstevel@tonic-gate * If we got EAGAIN ufs_dircheckpath detected a 3234*7c478bd9Sstevel@tonic-gate * potential deadlock and backed out. We need 3235*7c478bd9Sstevel@tonic-gate * to retry the operation since sdp and tdp have 3236*7c478bd9Sstevel@tonic-gate * to be released to avoid the deadlock. 3237*7c478bd9Sstevel@tonic-gate */ 3238*7c478bd9Sstevel@tonic-gate if (error == EAGAIN) { 3239*7c478bd9Sstevel@tonic-gate rw_exit(&tdp->i_rwlock); 3240*7c478bd9Sstevel@tonic-gate if (tdp != sdp) 3241*7c478bd9Sstevel@tonic-gate rw_exit(&sdp->i_rwlock); 3242*7c478bd9Sstevel@tonic-gate delay(ufs_rename_backoff_delay); 3243*7c478bd9Sstevel@tonic-gate ufs_rename_dircheck_retry_cnt++; 3244*7c478bd9Sstevel@tonic-gate goto retry; 3245*7c478bd9Sstevel@tonic-gate } 3246*7c478bd9Sstevel@tonic-gate goto errout; 3247*7c478bd9Sstevel@tonic-gate } 3248*7c478bd9Sstevel@tonic-gate } else { 3249*7c478bd9Sstevel@tonic-gate rw_exit(&sip->i_contents); 3250*7c478bd9Sstevel@tonic-gate rw_exit(&sdp->i_contents); 3251*7c478bd9Sstevel@tonic-gate } 3252*7c478bd9Sstevel@tonic-gate 3253*7c478bd9Sstevel@tonic-gate 3254*7c478bd9Sstevel@tonic-gate /* 3255*7c478bd9Sstevel@tonic-gate * Check for renaming '.' or '..' or alias of '.' 3256*7c478bd9Sstevel@tonic-gate */ 3257*7c478bd9Sstevel@tonic-gate if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0 || sdp == sip) { 3258*7c478bd9Sstevel@tonic-gate error = EINVAL; 3259*7c478bd9Sstevel@tonic-gate goto errout; 3260*7c478bd9Sstevel@tonic-gate } 3261*7c478bd9Sstevel@tonic-gate 3262*7c478bd9Sstevel@tonic-gate /* 3263*7c478bd9Sstevel@tonic-gate * Simultaneous renames can deadlock in ufs_dircheckpath since it 3264*7c478bd9Sstevel@tonic-gate * tries to traverse back the file tree with both tdp and sdp held 3265*7c478bd9Sstevel@tonic-gate * as RW_WRITER. To avoid that we have to hold the tdp and sdp locks 3266*7c478bd9Sstevel@tonic-gate * as RW_READERS till ufs_dircheckpath is done. 3267*7c478bd9Sstevel@tonic-gate * Now that ufs_dircheckpath is done with, we can upgrade the locks 3268*7c478bd9Sstevel@tonic-gate * to RW_WRITER. 3269*7c478bd9Sstevel@tonic-gate */ 3270*7c478bd9Sstevel@tonic-gate if (!rw_tryupgrade(&tdp->i_rwlock)) { 3271*7c478bd9Sstevel@tonic-gate /* 3272*7c478bd9Sstevel@tonic-gate * The upgrade failed. We got to give away the lock 3273*7c478bd9Sstevel@tonic-gate * as to avoid deadlocking with someone else who is 3274*7c478bd9Sstevel@tonic-gate * waiting for writer lock. With the lock gone, we 3275*7c478bd9Sstevel@tonic-gate * cannot be sure the checks done above will hold 3276*7c478bd9Sstevel@tonic-gate * good when we eventually get them back as writer. 3277*7c478bd9Sstevel@tonic-gate * So if we can't upgrade we drop the locks and retry 3278*7c478bd9Sstevel@tonic-gate * everything again. 3279*7c478bd9Sstevel@tonic-gate */ 3280*7c478bd9Sstevel@tonic-gate rw_exit(&tdp->i_rwlock); 3281*7c478bd9Sstevel@tonic-gate if (tdp != sdp) 3282*7c478bd9Sstevel@tonic-gate rw_exit(&sdp->i_rwlock); 3283*7c478bd9Sstevel@tonic-gate delay(ufs_rename_backoff_delay); 3284*7c478bd9Sstevel@tonic-gate ufs_rename_upgrade_retry_cnt++; 3285*7c478bd9Sstevel@tonic-gate goto retry; 3286*7c478bd9Sstevel@tonic-gate } 3287*7c478bd9Sstevel@tonic-gate if (tdp != sdp) { 3288*7c478bd9Sstevel@tonic-gate if (!rw_tryupgrade(&sdp->i_rwlock)) { 3289*7c478bd9Sstevel@tonic-gate /* 3290*7c478bd9Sstevel@tonic-gate * The upgrade failed. We got to give away the lock 3291*7c478bd9Sstevel@tonic-gate * as to avoid deadlocking with someone else who is 3292*7c478bd9Sstevel@tonic-gate * waiting for writer lock. With the lock gone, we 3293*7c478bd9Sstevel@tonic-gate * cannot be sure the checks done above will hold 3294*7c478bd9Sstevel@tonic-gate * good when we eventually get them back as writer. 3295*7c478bd9Sstevel@tonic-gate * So if we can't upgrade we drop the locks and retry 3296*7c478bd9Sstevel@tonic-gate * everything again. 3297*7c478bd9Sstevel@tonic-gate */ 3298*7c478bd9Sstevel@tonic-gate rw_exit(&tdp->i_rwlock); 3299*7c478bd9Sstevel@tonic-gate rw_exit(&sdp->i_rwlock); 3300*7c478bd9Sstevel@tonic-gate delay(ufs_rename_backoff_delay); 3301*7c478bd9Sstevel@tonic-gate ufs_rename_upgrade_retry_cnt++; 3302*7c478bd9Sstevel@tonic-gate goto retry; 3303*7c478bd9Sstevel@tonic-gate } 3304*7c478bd9Sstevel@tonic-gate } 3305*7c478bd9Sstevel@tonic-gate /* 3306*7c478bd9Sstevel@tonic-gate * Link source to the target. If a target exists, return its 3307*7c478bd9Sstevel@tonic-gate * vnode pointer in tvp. We'll release it after sending the 3308*7c478bd9Sstevel@tonic-gate * vnevent. 3309*7c478bd9Sstevel@tonic-gate */ 3310*7c478bd9Sstevel@tonic-gate if (error = ufs_direnter_lr(tdp, tnm, DE_RENAME, sdp, sip, cr, &tvp)) { 3311*7c478bd9Sstevel@tonic-gate /* 3312*7c478bd9Sstevel@tonic-gate * ESAME isn't really an error; it indicates that the 3313*7c478bd9Sstevel@tonic-gate * operation should not be done because the source and target 3314*7c478bd9Sstevel@tonic-gate * are the same file, but that no error should be reported. 3315*7c478bd9Sstevel@tonic-gate */ 3316*7c478bd9Sstevel@tonic-gate if (error == ESAME) 3317*7c478bd9Sstevel@tonic-gate error = 0; 3318*7c478bd9Sstevel@tonic-gate goto errout; 3319*7c478bd9Sstevel@tonic-gate } 3320*7c478bd9Sstevel@tonic-gate 3321*7c478bd9Sstevel@tonic-gate /* 3322*7c478bd9Sstevel@tonic-gate * Unlink the source. 3323*7c478bd9Sstevel@tonic-gate * Remove the source entry. ufs_dirremove() checks that the entry 3324*7c478bd9Sstevel@tonic-gate * still reflects sip, and returns an error if it doesn't. 3325*7c478bd9Sstevel@tonic-gate * If the entry has changed just forget about it. Release 3326*7c478bd9Sstevel@tonic-gate * the source inode. 3327*7c478bd9Sstevel@tonic-gate */ 3328*7c478bd9Sstevel@tonic-gate if ((error = ufs_dirremove(sdp, snm, sip, (struct vnode *)0, 3329*7c478bd9Sstevel@tonic-gate DR_RENAME, cr, NULL)) == ENOENT) 3330*7c478bd9Sstevel@tonic-gate error = 0; 3331*7c478bd9Sstevel@tonic-gate 3332*7c478bd9Sstevel@tonic-gate errout: 3333*7c478bd9Sstevel@tonic-gate rw_exit(&tdp->i_rwlock); 3334*7c478bd9Sstevel@tonic-gate if (sdp != tdp) { 3335*7c478bd9Sstevel@tonic-gate rw_exit(&sdp->i_rwlock); 3336*7c478bd9Sstevel@tonic-gate } 3337*7c478bd9Sstevel@tonic-gate 3338*7c478bd9Sstevel@tonic-gate unlock: 3339*7c478bd9Sstevel@tonic-gate if (ulp) { 3340*7c478bd9Sstevel@tonic-gate TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_RENAME, trans_size); 3341*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 3342*7c478bd9Sstevel@tonic-gate } 3343*7c478bd9Sstevel@tonic-gate 3344*7c478bd9Sstevel@tonic-gate /* 3345*7c478bd9Sstevel@tonic-gate * If no errors, send the appropriate events on the source 3346*7c478bd9Sstevel@tonic-gate * and destination (a.k.a, target) vnodes, if they exist. 3347*7c478bd9Sstevel@tonic-gate * This has to be done after the rename transaction has closed. 3348*7c478bd9Sstevel@tonic-gate */ 3349*7c478bd9Sstevel@tonic-gate if (error == 0) { 3350*7c478bd9Sstevel@tonic-gate if (tvp != NULL) 3351*7c478bd9Sstevel@tonic-gate vnevent_rename_dest(tvp); 3352*7c478bd9Sstevel@tonic-gate /* 3353*7c478bd9Sstevel@tonic-gate * Note that if ufs_direnter_lr() returned ESAME then 3354*7c478bd9Sstevel@tonic-gate * this event will still be sent. This isn't expected 3355*7c478bd9Sstevel@tonic-gate * to be a problem for anticipated usage by consumers. 3356*7c478bd9Sstevel@tonic-gate */ 3357*7c478bd9Sstevel@tonic-gate if (sip != NULL) 3358*7c478bd9Sstevel@tonic-gate vnevent_rename_src(ITOV(sip)); 3359*7c478bd9Sstevel@tonic-gate } 3360*7c478bd9Sstevel@tonic-gate 3361*7c478bd9Sstevel@tonic-gate if (tvp != NULL) 3362*7c478bd9Sstevel@tonic-gate VN_RELE(tvp); 3363*7c478bd9Sstevel@tonic-gate 3364*7c478bd9Sstevel@tonic-gate if (sip != NULL) 3365*7c478bd9Sstevel@tonic-gate VN_RELE(ITOV(sip)); 3366*7c478bd9Sstevel@tonic-gate 3367*7c478bd9Sstevel@tonic-gate out: 3368*7c478bd9Sstevel@tonic-gate TRACE_5(TR_FAC_UFS, TR_UFS_RENAME_END, 3369*7c478bd9Sstevel@tonic-gate "ufs_rename_end:sdvp %p snm %s tdvp %p tnm %s error %d", 3370*7c478bd9Sstevel@tonic-gate sdvp, snm, tdvp, tnm, error); 3371*7c478bd9Sstevel@tonic-gate return (error); 3372*7c478bd9Sstevel@tonic-gate } 3373*7c478bd9Sstevel@tonic-gate 3374*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 3375*7c478bd9Sstevel@tonic-gate static int 3376*7c478bd9Sstevel@tonic-gate ufs_mkdir(struct vnode *dvp, char *dirname, struct vattr *vap, 3377*7c478bd9Sstevel@tonic-gate struct vnode **vpp, struct cred *cr) 3378*7c478bd9Sstevel@tonic-gate { 3379*7c478bd9Sstevel@tonic-gate struct inode *ip; 3380*7c478bd9Sstevel@tonic-gate struct inode *xip; 3381*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 3382*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 3383*7c478bd9Sstevel@tonic-gate int error; 3384*7c478bd9Sstevel@tonic-gate int issync; 3385*7c478bd9Sstevel@tonic-gate int trans_size; 3386*7c478bd9Sstevel@tonic-gate int retry = 1; 3387*7c478bd9Sstevel@tonic-gate 3388*7c478bd9Sstevel@tonic-gate ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 3389*7c478bd9Sstevel@tonic-gate 3390*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_MKDIR_START, 3391*7c478bd9Sstevel@tonic-gate "ufs_mkdir_start:dvp %p", dvp); 3392*7c478bd9Sstevel@tonic-gate 3393*7c478bd9Sstevel@tonic-gate /* 3394*7c478bd9Sstevel@tonic-gate * Can't make directory in attr hidden dir 3395*7c478bd9Sstevel@tonic-gate */ 3396*7c478bd9Sstevel@tonic-gate if ((VTOI(dvp)->i_mode & IFMT) == IFATTRDIR) 3397*7c478bd9Sstevel@tonic-gate return (EINVAL); 3398*7c478bd9Sstevel@tonic-gate 3399*7c478bd9Sstevel@tonic-gate again: 3400*7c478bd9Sstevel@tonic-gate ip = VTOI(dvp); 3401*7c478bd9Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 3402*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_MKDIR_MASK); 3403*7c478bd9Sstevel@tonic-gate if (error) 3404*7c478bd9Sstevel@tonic-gate goto out; 3405*7c478bd9Sstevel@tonic-gate if (ulp) 3406*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_MKDIR, 3407*7c478bd9Sstevel@tonic-gate trans_size = (int)TOP_MKDIR_SIZE(ip)); 3408*7c478bd9Sstevel@tonic-gate 3409*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 3410*7c478bd9Sstevel@tonic-gate 3411*7c478bd9Sstevel@tonic-gate error = ufs_direnter_cm(ip, dirname, DE_MKDIR, vap, &xip, cr, 3412*7c478bd9Sstevel@tonic-gate (retry ? IQUIET : 0)); 3413*7c478bd9Sstevel@tonic-gate 3414*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 3415*7c478bd9Sstevel@tonic-gate if (error == 0) { 3416*7c478bd9Sstevel@tonic-gate ip = xip; 3417*7c478bd9Sstevel@tonic-gate *vpp = ITOV(ip); 3418*7c478bd9Sstevel@tonic-gate } else if (error == EEXIST) 3419*7c478bd9Sstevel@tonic-gate VN_RELE(ITOV(xip)); 3420*7c478bd9Sstevel@tonic-gate 3421*7c478bd9Sstevel@tonic-gate if (ulp) { 3422*7c478bd9Sstevel@tonic-gate int terr = 0; 3423*7c478bd9Sstevel@tonic-gate TRANS_END_CSYNC(ufsvfsp, terr, issync, TOP_MKDIR, trans_size); 3424*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 3425*7c478bd9Sstevel@tonic-gate if (error == 0) 3426*7c478bd9Sstevel@tonic-gate error = terr; 3427*7c478bd9Sstevel@tonic-gate } 3428*7c478bd9Sstevel@tonic-gate out: 3429*7c478bd9Sstevel@tonic-gate if ((error == ENOSPC) && retry && TRANS_ISTRANS(ufsvfsp)) { 3430*7c478bd9Sstevel@tonic-gate ufs_delete_drain_wait(ufsvfsp, 1); 3431*7c478bd9Sstevel@tonic-gate retry = 0; 3432*7c478bd9Sstevel@tonic-gate goto again; 3433*7c478bd9Sstevel@tonic-gate } 3434*7c478bd9Sstevel@tonic-gate 3435*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_MKDIR_END, 3436*7c478bd9Sstevel@tonic-gate "ufs_mkdir_end:dvp %p error %d", dvp, error); 3437*7c478bd9Sstevel@tonic-gate return (error); 3438*7c478bd9Sstevel@tonic-gate } 3439*7c478bd9Sstevel@tonic-gate 3440*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 3441*7c478bd9Sstevel@tonic-gate static int 3442*7c478bd9Sstevel@tonic-gate ufs_rmdir(struct vnode *vp, char *nm, struct vnode *cdir, struct cred *cr) 3443*7c478bd9Sstevel@tonic-gate { 3444*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 3445*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 3446*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 3447*7c478bd9Sstevel@tonic-gate vnode_t *rmvp = NULL; /* Vnode of removed directory */ 3448*7c478bd9Sstevel@tonic-gate int error; 3449*7c478bd9Sstevel@tonic-gate int issync; 3450*7c478bd9Sstevel@tonic-gate 3451*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_RMDIR_START, 3452*7c478bd9Sstevel@tonic-gate "ufs_rmdir_start:vp %p", vp); 3453*7c478bd9Sstevel@tonic-gate 3454*7c478bd9Sstevel@tonic-gate /* 3455*7c478bd9Sstevel@tonic-gate * don't let the delete queue get too long 3456*7c478bd9Sstevel@tonic-gate */ 3457*7c478bd9Sstevel@tonic-gate if (ufsvfsp == NULL) { 3458*7c478bd9Sstevel@tonic-gate error = EIO; 3459*7c478bd9Sstevel@tonic-gate goto out; 3460*7c478bd9Sstevel@tonic-gate } 3461*7c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_delete.uq_ne > ufs_idle_max) 3462*7c478bd9Sstevel@tonic-gate ufs_delete_drain(vp->v_vfsp, 1, 1); 3463*7c478bd9Sstevel@tonic-gate 3464*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_RMDIR_MASK); 3465*7c478bd9Sstevel@tonic-gate if (error) 3466*7c478bd9Sstevel@tonic-gate goto out; 3467*7c478bd9Sstevel@tonic-gate 3468*7c478bd9Sstevel@tonic-gate if (ulp) 3469*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_RMDIR, TOP_RMDIR_SIZE); 3470*7c478bd9Sstevel@tonic-gate 3471*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 3472*7c478bd9Sstevel@tonic-gate error = ufs_dirremove(ip, nm, (struct inode *)0, cdir, DR_RMDIR, cr, 3473*7c478bd9Sstevel@tonic-gate &rmvp); 3474*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 3475*7c478bd9Sstevel@tonic-gate 3476*7c478bd9Sstevel@tonic-gate if (ulp) { 3477*7c478bd9Sstevel@tonic-gate TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_RMDIR, 3478*7c478bd9Sstevel@tonic-gate TOP_RMDIR_SIZE); 3479*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 3480*7c478bd9Sstevel@tonic-gate } 3481*7c478bd9Sstevel@tonic-gate 3482*7c478bd9Sstevel@tonic-gate /* 3483*7c478bd9Sstevel@tonic-gate * This must be done AFTER the rmdir transaction has closed. 3484*7c478bd9Sstevel@tonic-gate */ 3485*7c478bd9Sstevel@tonic-gate if (rmvp != NULL) { 3486*7c478bd9Sstevel@tonic-gate /* Only send the event if there were no errors */ 3487*7c478bd9Sstevel@tonic-gate if (error == 0) 3488*7c478bd9Sstevel@tonic-gate vnevent_rmdir(rmvp); 3489*7c478bd9Sstevel@tonic-gate VN_RELE(rmvp); 3490*7c478bd9Sstevel@tonic-gate } 3491*7c478bd9Sstevel@tonic-gate out: 3492*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_RMDIR_END, 3493*7c478bd9Sstevel@tonic-gate "ufs_rmdir_end:vp %p error %d", vp, error); 3494*7c478bd9Sstevel@tonic-gate 3495*7c478bd9Sstevel@tonic-gate return (error); 3496*7c478bd9Sstevel@tonic-gate } 3497*7c478bd9Sstevel@tonic-gate 3498*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 3499*7c478bd9Sstevel@tonic-gate static int 3500*7c478bd9Sstevel@tonic-gate ufs_readdir( 3501*7c478bd9Sstevel@tonic-gate struct vnode *vp, 3502*7c478bd9Sstevel@tonic-gate struct uio *uiop, 3503*7c478bd9Sstevel@tonic-gate struct cred *cr, 3504*7c478bd9Sstevel@tonic-gate int *eofp) 3505*7c478bd9Sstevel@tonic-gate { 3506*7c478bd9Sstevel@tonic-gate struct iovec *iovp; 3507*7c478bd9Sstevel@tonic-gate struct inode *ip; 3508*7c478bd9Sstevel@tonic-gate struct direct *idp; 3509*7c478bd9Sstevel@tonic-gate struct dirent64 *odp; 3510*7c478bd9Sstevel@tonic-gate struct fbuf *fbp; 3511*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 3512*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 3513*7c478bd9Sstevel@tonic-gate caddr_t outbuf; 3514*7c478bd9Sstevel@tonic-gate size_t bufsize; 3515*7c478bd9Sstevel@tonic-gate uint_t offset; 3516*7c478bd9Sstevel@tonic-gate uint_t bytes_wanted, total_bytes_wanted; 3517*7c478bd9Sstevel@tonic-gate int incount = 0; 3518*7c478bd9Sstevel@tonic-gate int outcount = 0; 3519*7c478bd9Sstevel@tonic-gate int error; 3520*7c478bd9Sstevel@tonic-gate 3521*7c478bd9Sstevel@tonic-gate ip = VTOI(vp); 3522*7c478bd9Sstevel@tonic-gate ASSERT(RW_READ_HELD(&ip->i_rwlock)); 3523*7c478bd9Sstevel@tonic-gate 3524*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_READDIR_START, 3525*7c478bd9Sstevel@tonic-gate "ufs_readdir_start:vp %p uiop %p", vp, uiop); 3526*7c478bd9Sstevel@tonic-gate 3527*7c478bd9Sstevel@tonic-gate if (uiop->uio_loffset >= MAXOFF32_T) { 3528*7c478bd9Sstevel@tonic-gate if (eofp) 3529*7c478bd9Sstevel@tonic-gate *eofp = 1; 3530*7c478bd9Sstevel@tonic-gate return (0); 3531*7c478bd9Sstevel@tonic-gate } 3532*7c478bd9Sstevel@tonic-gate 3533*7c478bd9Sstevel@tonic-gate /* 3534*7c478bd9Sstevel@tonic-gate * Check if we have been called with a valid iov_len 3535*7c478bd9Sstevel@tonic-gate * and bail out if not, otherwise we may potentially loop 3536*7c478bd9Sstevel@tonic-gate * forever further down. 3537*7c478bd9Sstevel@tonic-gate */ 3538*7c478bd9Sstevel@tonic-gate if (uiop->uio_iov->iov_len <= 0) { 3539*7c478bd9Sstevel@tonic-gate error = EINVAL; 3540*7c478bd9Sstevel@tonic-gate goto out; 3541*7c478bd9Sstevel@tonic-gate } 3542*7c478bd9Sstevel@tonic-gate 3543*7c478bd9Sstevel@tonic-gate /* 3544*7c478bd9Sstevel@tonic-gate * Large Files: When we come here we are guaranteed that 3545*7c478bd9Sstevel@tonic-gate * uio_offset can be used safely. The high word is zero. 3546*7c478bd9Sstevel@tonic-gate */ 3547*7c478bd9Sstevel@tonic-gate 3548*7c478bd9Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 3549*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_READDIR_MASK); 3550*7c478bd9Sstevel@tonic-gate if (error) 3551*7c478bd9Sstevel@tonic-gate goto out; 3552*7c478bd9Sstevel@tonic-gate 3553*7c478bd9Sstevel@tonic-gate iovp = uiop->uio_iov; 3554*7c478bd9Sstevel@tonic-gate total_bytes_wanted = iovp->iov_len; 3555*7c478bd9Sstevel@tonic-gate 3556*7c478bd9Sstevel@tonic-gate /* Large Files: directory files should not be "large" */ 3557*7c478bd9Sstevel@tonic-gate 3558*7c478bd9Sstevel@tonic-gate ASSERT(ip->i_size <= MAXOFF32_T); 3559*7c478bd9Sstevel@tonic-gate 3560*7c478bd9Sstevel@tonic-gate /* Force offset to be valid (to guard against bogus lseek() values) */ 3561*7c478bd9Sstevel@tonic-gate offset = (uint_t)uiop->uio_offset & ~(DIRBLKSIZ - 1); 3562*7c478bd9Sstevel@tonic-gate 3563*7c478bd9Sstevel@tonic-gate /* Quit if at end of file or link count of zero (posix) */ 3564*7c478bd9Sstevel@tonic-gate if (offset >= (uint_t)ip->i_size || ip->i_nlink <= 0) { 3565*7c478bd9Sstevel@tonic-gate if (eofp) 3566*7c478bd9Sstevel@tonic-gate *eofp = 1; 3567*7c478bd9Sstevel@tonic-gate error = 0; 3568*7c478bd9Sstevel@tonic-gate goto unlock; 3569*7c478bd9Sstevel@tonic-gate } 3570*7c478bd9Sstevel@tonic-gate 3571*7c478bd9Sstevel@tonic-gate /* 3572*7c478bd9Sstevel@tonic-gate * Get space to change directory entries into fs independent format. 3573*7c478bd9Sstevel@tonic-gate * Do fast alloc for the most commonly used-request size (filesystem 3574*7c478bd9Sstevel@tonic-gate * block size). 3575*7c478bd9Sstevel@tonic-gate */ 3576*7c478bd9Sstevel@tonic-gate if (uiop->uio_segflg != UIO_SYSSPACE || uiop->uio_iovcnt != 1) { 3577*7c478bd9Sstevel@tonic-gate bufsize = total_bytes_wanted; 3578*7c478bd9Sstevel@tonic-gate outbuf = kmem_alloc(bufsize, KM_SLEEP); 3579*7c478bd9Sstevel@tonic-gate odp = (struct dirent64 *)outbuf; 3580*7c478bd9Sstevel@tonic-gate } else { 3581*7c478bd9Sstevel@tonic-gate bufsize = total_bytes_wanted; 3582*7c478bd9Sstevel@tonic-gate odp = (struct dirent64 *)iovp->iov_base; 3583*7c478bd9Sstevel@tonic-gate } 3584*7c478bd9Sstevel@tonic-gate 3585*7c478bd9Sstevel@tonic-gate nextblk: 3586*7c478bd9Sstevel@tonic-gate bytes_wanted = total_bytes_wanted; 3587*7c478bd9Sstevel@tonic-gate 3588*7c478bd9Sstevel@tonic-gate /* Truncate request to file size */ 3589*7c478bd9Sstevel@tonic-gate if (offset + bytes_wanted > (int)ip->i_size) 3590*7c478bd9Sstevel@tonic-gate bytes_wanted = (int)(ip->i_size - offset); 3591*7c478bd9Sstevel@tonic-gate 3592*7c478bd9Sstevel@tonic-gate /* Comply with MAXBSIZE boundary restrictions of fbread() */ 3593*7c478bd9Sstevel@tonic-gate if ((offset & MAXBOFFSET) + bytes_wanted > MAXBSIZE) 3594*7c478bd9Sstevel@tonic-gate bytes_wanted = MAXBSIZE - (offset & MAXBOFFSET); 3595*7c478bd9Sstevel@tonic-gate 3596*7c478bd9Sstevel@tonic-gate /* 3597*7c478bd9Sstevel@tonic-gate * Read in the next chunk. 3598*7c478bd9Sstevel@tonic-gate * We are still holding the i_rwlock. 3599*7c478bd9Sstevel@tonic-gate */ 3600*7c478bd9Sstevel@tonic-gate error = fbread(vp, (offset_t)offset, bytes_wanted, S_OTHER, &fbp); 3601*7c478bd9Sstevel@tonic-gate 3602*7c478bd9Sstevel@tonic-gate if (error) 3603*7c478bd9Sstevel@tonic-gate goto update_inode; 3604*7c478bd9Sstevel@tonic-gate if (!ULOCKFS_IS_NOIACC(ITOUL(ip)) && (ip->i_fs->fs_ronly == 0) && 3605*7c478bd9Sstevel@tonic-gate (!ufsvfsp->vfs_noatime)) { 3606*7c478bd9Sstevel@tonic-gate ip->i_flag |= IACC; 3607*7c478bd9Sstevel@tonic-gate } 3608*7c478bd9Sstevel@tonic-gate incount = 0; 3609*7c478bd9Sstevel@tonic-gate idp = (struct direct *)fbp->fb_addr; 3610*7c478bd9Sstevel@tonic-gate if (idp->d_ino == 0 && idp->d_reclen == 0 && 3611*7c478bd9Sstevel@tonic-gate idp->d_namlen == 0) { 3612*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "ufs_readdir: bad dir, inumber = %llu, " 3613*7c478bd9Sstevel@tonic-gate "fs = %s\n", 3614*7c478bd9Sstevel@tonic-gate (u_longlong_t)ip->i_number, ufsvfsp->vfs_fs->fs_fsmnt); 3615*7c478bd9Sstevel@tonic-gate fbrelse(fbp, S_OTHER); 3616*7c478bd9Sstevel@tonic-gate error = ENXIO; 3617*7c478bd9Sstevel@tonic-gate goto update_inode; 3618*7c478bd9Sstevel@tonic-gate } 3619*7c478bd9Sstevel@tonic-gate /* Transform to file-system independent format */ 3620*7c478bd9Sstevel@tonic-gate while (incount < bytes_wanted) { 3621*7c478bd9Sstevel@tonic-gate /* 3622*7c478bd9Sstevel@tonic-gate * If the current directory entry is mangled, then skip 3623*7c478bd9Sstevel@tonic-gate * to the next block. It would be nice to set the FSBAD 3624*7c478bd9Sstevel@tonic-gate * flag in the super-block so that a fsck is forced on 3625*7c478bd9Sstevel@tonic-gate * next reboot, but locking is a problem. 3626*7c478bd9Sstevel@tonic-gate */ 3627*7c478bd9Sstevel@tonic-gate if (idp->d_reclen & 0x3) { 3628*7c478bd9Sstevel@tonic-gate offset = (offset + DIRBLKSIZ) & ~(DIRBLKSIZ-1); 3629*7c478bd9Sstevel@tonic-gate break; 3630*7c478bd9Sstevel@tonic-gate } 3631*7c478bd9Sstevel@tonic-gate 3632*7c478bd9Sstevel@tonic-gate /* Skip to requested offset and skip empty entries */ 3633*7c478bd9Sstevel@tonic-gate if (idp->d_ino != 0 && offset >= (uint_t)uiop->uio_offset) { 3634*7c478bd9Sstevel@tonic-gate ushort_t this_reclen = 3635*7c478bd9Sstevel@tonic-gate DIRENT64_RECLEN(idp->d_namlen); 3636*7c478bd9Sstevel@tonic-gate /* Buffer too small for any entries */ 3637*7c478bd9Sstevel@tonic-gate if (!outcount && this_reclen > bufsize) { 3638*7c478bd9Sstevel@tonic-gate fbrelse(fbp, S_OTHER); 3639*7c478bd9Sstevel@tonic-gate error = EINVAL; 3640*7c478bd9Sstevel@tonic-gate goto update_inode; 3641*7c478bd9Sstevel@tonic-gate } 3642*7c478bd9Sstevel@tonic-gate /* If would overrun the buffer, quit */ 3643*7c478bd9Sstevel@tonic-gate if (outcount + this_reclen > bufsize) { 3644*7c478bd9Sstevel@tonic-gate break; 3645*7c478bd9Sstevel@tonic-gate } 3646*7c478bd9Sstevel@tonic-gate /* Take this entry */ 3647*7c478bd9Sstevel@tonic-gate odp->d_ino = (ino64_t)idp->d_ino; 3648*7c478bd9Sstevel@tonic-gate odp->d_reclen = (ushort_t)this_reclen; 3649*7c478bd9Sstevel@tonic-gate odp->d_off = (offset_t)(offset + idp->d_reclen); 3650*7c478bd9Sstevel@tonic-gate 3651*7c478bd9Sstevel@tonic-gate /* use strncpy(9f) to zero out uninitialized bytes */ 3652*7c478bd9Sstevel@tonic-gate 3653*7c478bd9Sstevel@tonic-gate ASSERT(strlen(idp->d_name) + 1 <= 3654*7c478bd9Sstevel@tonic-gate DIRENT64_NAMELEN(this_reclen)); 3655*7c478bd9Sstevel@tonic-gate (void) strncpy(odp->d_name, idp->d_name, 3656*7c478bd9Sstevel@tonic-gate DIRENT64_NAMELEN(this_reclen)); 3657*7c478bd9Sstevel@tonic-gate outcount += odp->d_reclen; 3658*7c478bd9Sstevel@tonic-gate odp = (struct dirent64 *)((intptr_t)odp + 3659*7c478bd9Sstevel@tonic-gate odp->d_reclen); 3660*7c478bd9Sstevel@tonic-gate ASSERT(outcount <= bufsize); 3661*7c478bd9Sstevel@tonic-gate } 3662*7c478bd9Sstevel@tonic-gate if (idp->d_reclen) { 3663*7c478bd9Sstevel@tonic-gate incount += idp->d_reclen; 3664*7c478bd9Sstevel@tonic-gate offset += idp->d_reclen; 3665*7c478bd9Sstevel@tonic-gate idp = (struct direct *)((intptr_t)idp + idp->d_reclen); 3666*7c478bd9Sstevel@tonic-gate } else { 3667*7c478bd9Sstevel@tonic-gate offset = (offset + DIRBLKSIZ) & ~(DIRBLKSIZ-1); 3668*7c478bd9Sstevel@tonic-gate break; 3669*7c478bd9Sstevel@tonic-gate } 3670*7c478bd9Sstevel@tonic-gate } 3671*7c478bd9Sstevel@tonic-gate /* Release the chunk */ 3672*7c478bd9Sstevel@tonic-gate fbrelse(fbp, S_OTHER); 3673*7c478bd9Sstevel@tonic-gate 3674*7c478bd9Sstevel@tonic-gate /* Read whole block, but got no entries, read another if not eof */ 3675*7c478bd9Sstevel@tonic-gate 3676*7c478bd9Sstevel@tonic-gate /* 3677*7c478bd9Sstevel@tonic-gate * Large Files: casting i_size to int here is not a problem 3678*7c478bd9Sstevel@tonic-gate * because directory sizes are always less than MAXOFF32_T. 3679*7c478bd9Sstevel@tonic-gate * See assertion above. 3680*7c478bd9Sstevel@tonic-gate */ 3681*7c478bd9Sstevel@tonic-gate 3682*7c478bd9Sstevel@tonic-gate if (offset < (int)ip->i_size && !outcount) 3683*7c478bd9Sstevel@tonic-gate goto nextblk; 3684*7c478bd9Sstevel@tonic-gate 3685*7c478bd9Sstevel@tonic-gate /* Copy out the entry data */ 3686*7c478bd9Sstevel@tonic-gate if (uiop->uio_segflg == UIO_SYSSPACE && uiop->uio_iovcnt == 1) { 3687*7c478bd9Sstevel@tonic-gate iovp->iov_base += outcount; 3688*7c478bd9Sstevel@tonic-gate iovp->iov_len -= outcount; 3689*7c478bd9Sstevel@tonic-gate uiop->uio_resid -= outcount; 3690*7c478bd9Sstevel@tonic-gate uiop->uio_offset = offset; 3691*7c478bd9Sstevel@tonic-gate } else if ((error = uiomove(outbuf, (long)outcount, UIO_READ, 3692*7c478bd9Sstevel@tonic-gate uiop)) == 0) 3693*7c478bd9Sstevel@tonic-gate uiop->uio_offset = offset; 3694*7c478bd9Sstevel@tonic-gate update_inode: 3695*7c478bd9Sstevel@tonic-gate ITIMES(ip); 3696*7c478bd9Sstevel@tonic-gate if (uiop->uio_segflg != UIO_SYSSPACE || uiop->uio_iovcnt != 1) 3697*7c478bd9Sstevel@tonic-gate kmem_free(outbuf, bufsize); 3698*7c478bd9Sstevel@tonic-gate 3699*7c478bd9Sstevel@tonic-gate if (eofp && error == 0) 3700*7c478bd9Sstevel@tonic-gate *eofp = (uiop->uio_offset >= (int)ip->i_size); 3701*7c478bd9Sstevel@tonic-gate unlock: 3702*7c478bd9Sstevel@tonic-gate if (ulp) { 3703*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 3704*7c478bd9Sstevel@tonic-gate } 3705*7c478bd9Sstevel@tonic-gate out: 3706*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_READDIR_END, 3707*7c478bd9Sstevel@tonic-gate "ufs_readdir_end:vp %p error %d", vp, error); 3708*7c478bd9Sstevel@tonic-gate return (error); 3709*7c478bd9Sstevel@tonic-gate } 3710*7c478bd9Sstevel@tonic-gate 3711*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 3712*7c478bd9Sstevel@tonic-gate static int 3713*7c478bd9Sstevel@tonic-gate ufs_symlink( 3714*7c478bd9Sstevel@tonic-gate struct vnode *dvp, /* ptr to parent dir vnode */ 3715*7c478bd9Sstevel@tonic-gate char *linkname, /* name of symbolic link */ 3716*7c478bd9Sstevel@tonic-gate struct vattr *vap, /* attributes */ 3717*7c478bd9Sstevel@tonic-gate char *target, /* target path */ 3718*7c478bd9Sstevel@tonic-gate struct cred *cr) /* user credentials */ 3719*7c478bd9Sstevel@tonic-gate { 3720*7c478bd9Sstevel@tonic-gate struct inode *ip, *dip = VTOI(dvp); 3721*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = dip->i_ufsvfs; 3722*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 3723*7c478bd9Sstevel@tonic-gate int error; 3724*7c478bd9Sstevel@tonic-gate int issync; 3725*7c478bd9Sstevel@tonic-gate int trans_size; 3726*7c478bd9Sstevel@tonic-gate int residual; 3727*7c478bd9Sstevel@tonic-gate int ioflag; 3728*7c478bd9Sstevel@tonic-gate int retry = 1; 3729*7c478bd9Sstevel@tonic-gate 3730*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_SYMLINK_START, 3731*7c478bd9Sstevel@tonic-gate "ufs_symlink_start:dvp %p", dvp); 3732*7c478bd9Sstevel@tonic-gate 3733*7c478bd9Sstevel@tonic-gate /* 3734*7c478bd9Sstevel@tonic-gate * No symlinks in attrdirs at this time 3735*7c478bd9Sstevel@tonic-gate */ 3736*7c478bd9Sstevel@tonic-gate if ((VTOI(dvp)->i_mode & IFMT) == IFATTRDIR) 3737*7c478bd9Sstevel@tonic-gate return (EINVAL); 3738*7c478bd9Sstevel@tonic-gate 3739*7c478bd9Sstevel@tonic-gate again: 3740*7c478bd9Sstevel@tonic-gate ip = (struct inode *)NULL; 3741*7c478bd9Sstevel@tonic-gate vap->va_type = VLNK; 3742*7c478bd9Sstevel@tonic-gate vap->va_rdev = 0; 3743*7c478bd9Sstevel@tonic-gate 3744*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_SYMLINK_MASK); 3745*7c478bd9Sstevel@tonic-gate if (error) 3746*7c478bd9Sstevel@tonic-gate goto out; 3747*7c478bd9Sstevel@tonic-gate 3748*7c478bd9Sstevel@tonic-gate if (ulp) 3749*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_SYMLINK, 3750*7c478bd9Sstevel@tonic-gate trans_size = (int)TOP_SYMLINK_SIZE(dip)); 3751*7c478bd9Sstevel@tonic-gate 3752*7c478bd9Sstevel@tonic-gate /* 3753*7c478bd9Sstevel@tonic-gate * We must create the inode before the directory entry, to avoid 3754*7c478bd9Sstevel@tonic-gate * racing with readlink(). ufs_dirmakeinode requires that we 3755*7c478bd9Sstevel@tonic-gate * hold the quota lock as reader, and directory locks as writer. 3756*7c478bd9Sstevel@tonic-gate */ 3757*7c478bd9Sstevel@tonic-gate 3758*7c478bd9Sstevel@tonic-gate rw_enter(&dip->i_rwlock, RW_WRITER); 3759*7c478bd9Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 3760*7c478bd9Sstevel@tonic-gate rw_enter(&dip->i_contents, RW_WRITER); 3761*7c478bd9Sstevel@tonic-gate 3762*7c478bd9Sstevel@tonic-gate /* 3763*7c478bd9Sstevel@tonic-gate * Suppress any out of inodes messages if we will retry on 3764*7c478bd9Sstevel@tonic-gate * ENOSP 3765*7c478bd9Sstevel@tonic-gate */ 3766*7c478bd9Sstevel@tonic-gate if (retry) 3767*7c478bd9Sstevel@tonic-gate dip->i_flag |= IQUIET; 3768*7c478bd9Sstevel@tonic-gate 3769*7c478bd9Sstevel@tonic-gate error = ufs_dirmakeinode(dip, &ip, vap, DE_SYMLINK, cr); 3770*7c478bd9Sstevel@tonic-gate 3771*7c478bd9Sstevel@tonic-gate dip->i_flag &= ~IQUIET; 3772*7c478bd9Sstevel@tonic-gate 3773*7c478bd9Sstevel@tonic-gate rw_exit(&dip->i_contents); 3774*7c478bd9Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 3775*7c478bd9Sstevel@tonic-gate rw_exit(&dip->i_rwlock); 3776*7c478bd9Sstevel@tonic-gate 3777*7c478bd9Sstevel@tonic-gate if (error) 3778*7c478bd9Sstevel@tonic-gate goto unlock; 3779*7c478bd9Sstevel@tonic-gate 3780*7c478bd9Sstevel@tonic-gate /* 3781*7c478bd9Sstevel@tonic-gate * OK. The inode has been created. Write out the data of the 3782*7c478bd9Sstevel@tonic-gate * symbolic link. Since symbolic links are metadata, and should 3783*7c478bd9Sstevel@tonic-gate * remain consistent across a system crash, we need to force the 3784*7c478bd9Sstevel@tonic-gate * data out synchronously. 3785*7c478bd9Sstevel@tonic-gate * 3786*7c478bd9Sstevel@tonic-gate * (This is a change from the semantics in earlier releases, which 3787*7c478bd9Sstevel@tonic-gate * only created symbolic links synchronously if the semi-documented 3788*7c478bd9Sstevel@tonic-gate * 'syncdir' option was set, or if we were being invoked by the NFS 3789*7c478bd9Sstevel@tonic-gate * server, which requires symbolic links to be created synchronously.) 3790*7c478bd9Sstevel@tonic-gate * 3791*7c478bd9Sstevel@tonic-gate * We need to pass in a pointer for the residual length; otherwise 3792*7c478bd9Sstevel@tonic-gate * ufs_rdwri() will always return EIO if it can't write the data, 3793*7c478bd9Sstevel@tonic-gate * even if the error was really ENOSPC or EDQUOT. 3794*7c478bd9Sstevel@tonic-gate */ 3795*7c478bd9Sstevel@tonic-gate 3796*7c478bd9Sstevel@tonic-gate ioflag = FWRITE | FDSYNC; 3797*7c478bd9Sstevel@tonic-gate residual = 0; 3798*7c478bd9Sstevel@tonic-gate 3799*7c478bd9Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 3800*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 3801*7c478bd9Sstevel@tonic-gate 3802*7c478bd9Sstevel@tonic-gate /* 3803*7c478bd9Sstevel@tonic-gate * Suppress file system full messages if we will retry 3804*7c478bd9Sstevel@tonic-gate */ 3805*7c478bd9Sstevel@tonic-gate if (retry) 3806*7c478bd9Sstevel@tonic-gate ip->i_flag |= IQUIET; 3807*7c478bd9Sstevel@tonic-gate 3808*7c478bd9Sstevel@tonic-gate error = ufs_rdwri(UIO_WRITE, ioflag, ip, target, strlen(target), 3809*7c478bd9Sstevel@tonic-gate (offset_t)0, UIO_SYSSPACE, &residual, cr); 3810*7c478bd9Sstevel@tonic-gate 3811*7c478bd9Sstevel@tonic-gate ip->i_flag &= ~IQUIET; 3812*7c478bd9Sstevel@tonic-gate 3813*7c478bd9Sstevel@tonic-gate if (error) { 3814*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 3815*7c478bd9Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 3816*7c478bd9Sstevel@tonic-gate goto remove; 3817*7c478bd9Sstevel@tonic-gate } 3818*7c478bd9Sstevel@tonic-gate 3819*7c478bd9Sstevel@tonic-gate /* 3820*7c478bd9Sstevel@tonic-gate * If the link's data is small enough, we can cache it in the inode. 3821*7c478bd9Sstevel@tonic-gate * This is a "fast symbolic link". We don't use the first direct 3822*7c478bd9Sstevel@tonic-gate * block because that's actually used to point at the symbolic link's 3823*7c478bd9Sstevel@tonic-gate * contents on disk; but we know that none of the other direct or 3824*7c478bd9Sstevel@tonic-gate * indirect blocks can be used because symbolic links are restricted 3825*7c478bd9Sstevel@tonic-gate * to be smaller than a file system block. 3826*7c478bd9Sstevel@tonic-gate */ 3827*7c478bd9Sstevel@tonic-gate 3828*7c478bd9Sstevel@tonic-gate ASSERT(MAXPATHLEN <= VBSIZE(ITOV(ip))); 3829*7c478bd9Sstevel@tonic-gate 3830*7c478bd9Sstevel@tonic-gate if (ip->i_size > 0 && ip->i_size <= FSL_SIZE) { 3831*7c478bd9Sstevel@tonic-gate if (kcopy(target, &ip->i_db[1], ip->i_size) == 0) { 3832*7c478bd9Sstevel@tonic-gate ip->i_flag |= IFASTSYMLNK; 3833*7c478bd9Sstevel@tonic-gate } else { 3834*7c478bd9Sstevel@tonic-gate int i; 3835*7c478bd9Sstevel@tonic-gate /* error, clear garbage left behind */ 3836*7c478bd9Sstevel@tonic-gate for (i = 1; i < NDADDR; i++) 3837*7c478bd9Sstevel@tonic-gate ip->i_db[i] = 0; 3838*7c478bd9Sstevel@tonic-gate for (i = 0; i < NIADDR; i++) 3839*7c478bd9Sstevel@tonic-gate ip->i_ib[i] = 0; 3840*7c478bd9Sstevel@tonic-gate } 3841*7c478bd9Sstevel@tonic-gate } 3842*7c478bd9Sstevel@tonic-gate 3843*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 3844*7c478bd9Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 3845*7c478bd9Sstevel@tonic-gate 3846*7c478bd9Sstevel@tonic-gate /* 3847*7c478bd9Sstevel@tonic-gate * OK. We've successfully created the symbolic link. All that 3848*7c478bd9Sstevel@tonic-gate * remains is to insert it into the appropriate directory. 3849*7c478bd9Sstevel@tonic-gate */ 3850*7c478bd9Sstevel@tonic-gate 3851*7c478bd9Sstevel@tonic-gate rw_enter(&dip->i_rwlock, RW_WRITER); 3852*7c478bd9Sstevel@tonic-gate error = ufs_direnter_lr(dip, linkname, DE_SYMLINK, NULL, ip, cr, NULL); 3853*7c478bd9Sstevel@tonic-gate rw_exit(&dip->i_rwlock); 3854*7c478bd9Sstevel@tonic-gate 3855*7c478bd9Sstevel@tonic-gate /* 3856*7c478bd9Sstevel@tonic-gate * Fall through into remove-on-error code. We're either done, or we 3857*7c478bd9Sstevel@tonic-gate * need to remove the inode (if we couldn't insert it). 3858*7c478bd9Sstevel@tonic-gate */ 3859*7c478bd9Sstevel@tonic-gate 3860*7c478bd9Sstevel@tonic-gate remove: 3861*7c478bd9Sstevel@tonic-gate if (error && (ip != NULL)) { 3862*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 3863*7c478bd9Sstevel@tonic-gate ip->i_nlink--; 3864*7c478bd9Sstevel@tonic-gate ip->i_flag |= ICHG; 3865*7c478bd9Sstevel@tonic-gate ip->i_seq++; 3866*7c478bd9Sstevel@tonic-gate ufs_setreclaim(ip); 3867*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 3868*7c478bd9Sstevel@tonic-gate } 3869*7c478bd9Sstevel@tonic-gate 3870*7c478bd9Sstevel@tonic-gate unlock: 3871*7c478bd9Sstevel@tonic-gate if (ip != NULL) 3872*7c478bd9Sstevel@tonic-gate VN_RELE(ITOV(ip)); 3873*7c478bd9Sstevel@tonic-gate 3874*7c478bd9Sstevel@tonic-gate if (ulp) { 3875*7c478bd9Sstevel@tonic-gate int terr = 0; 3876*7c478bd9Sstevel@tonic-gate 3877*7c478bd9Sstevel@tonic-gate TRANS_END_CSYNC(ufsvfsp, terr, issync, TOP_SYMLINK, 3878*7c478bd9Sstevel@tonic-gate trans_size); 3879*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 3880*7c478bd9Sstevel@tonic-gate if (error == 0) 3881*7c478bd9Sstevel@tonic-gate error = terr; 3882*7c478bd9Sstevel@tonic-gate } 3883*7c478bd9Sstevel@tonic-gate 3884*7c478bd9Sstevel@tonic-gate /* 3885*7c478bd9Sstevel@tonic-gate * We may have failed due to lack of an inode or of a block to 3886*7c478bd9Sstevel@tonic-gate * store the target in. Try flushing the delete queue to free 3887*7c478bd9Sstevel@tonic-gate * logically-available things up and try again. 3888*7c478bd9Sstevel@tonic-gate */ 3889*7c478bd9Sstevel@tonic-gate if ((error == ENOSPC) && retry && TRANS_ISTRANS(ufsvfsp)) { 3890*7c478bd9Sstevel@tonic-gate ufs_delete_drain_wait(ufsvfsp, 1); 3891*7c478bd9Sstevel@tonic-gate retry = 0; 3892*7c478bd9Sstevel@tonic-gate goto again; 3893*7c478bd9Sstevel@tonic-gate } 3894*7c478bd9Sstevel@tonic-gate 3895*7c478bd9Sstevel@tonic-gate out: 3896*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_SYMLINK_END, 3897*7c478bd9Sstevel@tonic-gate "ufs_symlink_end:dvp %p error %d", dvp, error); 3898*7c478bd9Sstevel@tonic-gate return (error); 3899*7c478bd9Sstevel@tonic-gate } 3900*7c478bd9Sstevel@tonic-gate 3901*7c478bd9Sstevel@tonic-gate /* 3902*7c478bd9Sstevel@tonic-gate * Ufs specific routine used to do ufs io. 3903*7c478bd9Sstevel@tonic-gate */ 3904*7c478bd9Sstevel@tonic-gate int 3905*7c478bd9Sstevel@tonic-gate ufs_rdwri(enum uio_rw rw, int ioflag, struct inode *ip, caddr_t base, 3906*7c478bd9Sstevel@tonic-gate ssize_t len, offset_t offset, enum uio_seg seg, int *aresid, 3907*7c478bd9Sstevel@tonic-gate struct cred *cr) 3908*7c478bd9Sstevel@tonic-gate { 3909*7c478bd9Sstevel@tonic-gate struct uio auio; 3910*7c478bd9Sstevel@tonic-gate struct iovec aiov; 3911*7c478bd9Sstevel@tonic-gate int error; 3912*7c478bd9Sstevel@tonic-gate 3913*7c478bd9Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&ip->i_contents)); 3914*7c478bd9Sstevel@tonic-gate 3915*7c478bd9Sstevel@tonic-gate bzero((caddr_t)&auio, sizeof (uio_t)); 3916*7c478bd9Sstevel@tonic-gate bzero((caddr_t)&aiov, sizeof (iovec_t)); 3917*7c478bd9Sstevel@tonic-gate 3918*7c478bd9Sstevel@tonic-gate aiov.iov_base = base; 3919*7c478bd9Sstevel@tonic-gate aiov.iov_len = len; 3920*7c478bd9Sstevel@tonic-gate auio.uio_iov = &aiov; 3921*7c478bd9Sstevel@tonic-gate auio.uio_iovcnt = 1; 3922*7c478bd9Sstevel@tonic-gate auio.uio_loffset = offset; 3923*7c478bd9Sstevel@tonic-gate auio.uio_segflg = (short)seg; 3924*7c478bd9Sstevel@tonic-gate auio.uio_resid = len; 3925*7c478bd9Sstevel@tonic-gate 3926*7c478bd9Sstevel@tonic-gate if (rw == UIO_WRITE) { 3927*7c478bd9Sstevel@tonic-gate auio.uio_fmode = FWRITE; 3928*7c478bd9Sstevel@tonic-gate auio.uio_extflg = UIO_COPY_DEFAULT; 3929*7c478bd9Sstevel@tonic-gate auio.uio_llimit = curproc->p_fsz_ctl; 3930*7c478bd9Sstevel@tonic-gate error = wrip(ip, &auio, ioflag, cr); 3931*7c478bd9Sstevel@tonic-gate } else { 3932*7c478bd9Sstevel@tonic-gate auio.uio_fmode = FREAD; 3933*7c478bd9Sstevel@tonic-gate auio.uio_extflg = UIO_COPY_CACHED; 3934*7c478bd9Sstevel@tonic-gate auio.uio_llimit = MAXOFFSET_T; 3935*7c478bd9Sstevel@tonic-gate error = rdip(ip, &auio, ioflag, cr); 3936*7c478bd9Sstevel@tonic-gate } 3937*7c478bd9Sstevel@tonic-gate 3938*7c478bd9Sstevel@tonic-gate if (aresid) { 3939*7c478bd9Sstevel@tonic-gate *aresid = auio.uio_resid; 3940*7c478bd9Sstevel@tonic-gate } else if (auio.uio_resid) { 3941*7c478bd9Sstevel@tonic-gate error = EIO; 3942*7c478bd9Sstevel@tonic-gate } 3943*7c478bd9Sstevel@tonic-gate return (error); 3944*7c478bd9Sstevel@tonic-gate } 3945*7c478bd9Sstevel@tonic-gate 3946*7c478bd9Sstevel@tonic-gate static int 3947*7c478bd9Sstevel@tonic-gate ufs_fid(vp, fidp) 3948*7c478bd9Sstevel@tonic-gate struct vnode *vp; 3949*7c478bd9Sstevel@tonic-gate struct fid *fidp; 3950*7c478bd9Sstevel@tonic-gate { 3951*7c478bd9Sstevel@tonic-gate struct ufid *ufid; 3952*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 3953*7c478bd9Sstevel@tonic-gate 3954*7c478bd9Sstevel@tonic-gate if (ip->i_ufsvfs == NULL) 3955*7c478bd9Sstevel@tonic-gate return (EIO); 3956*7c478bd9Sstevel@tonic-gate 3957*7c478bd9Sstevel@tonic-gate if (fidp->fid_len < (sizeof (struct ufid) - sizeof (ushort_t))) { 3958*7c478bd9Sstevel@tonic-gate fidp->fid_len = sizeof (struct ufid) - sizeof (ushort_t); 3959*7c478bd9Sstevel@tonic-gate return (ENOSPC); 3960*7c478bd9Sstevel@tonic-gate } 3961*7c478bd9Sstevel@tonic-gate 3962*7c478bd9Sstevel@tonic-gate ufid = (struct ufid *)fidp; 3963*7c478bd9Sstevel@tonic-gate bzero((char *)ufid, sizeof (struct ufid)); 3964*7c478bd9Sstevel@tonic-gate ufid->ufid_len = sizeof (struct ufid) - sizeof (ushort_t); 3965*7c478bd9Sstevel@tonic-gate ufid->ufid_ino = ip->i_number; 3966*7c478bd9Sstevel@tonic-gate ufid->ufid_gen = ip->i_gen; 3967*7c478bd9Sstevel@tonic-gate 3968*7c478bd9Sstevel@tonic-gate return (0); 3969*7c478bd9Sstevel@tonic-gate } 3970*7c478bd9Sstevel@tonic-gate 3971*7c478bd9Sstevel@tonic-gate /* ARGSUSED2 */ 3972*7c478bd9Sstevel@tonic-gate static int 3973*7c478bd9Sstevel@tonic-gate ufs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp) 3974*7c478bd9Sstevel@tonic-gate { 3975*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 3976*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 3977*7c478bd9Sstevel@tonic-gate int forcedirectio; 3978*7c478bd9Sstevel@tonic-gate 3979*7c478bd9Sstevel@tonic-gate /* 3980*7c478bd9Sstevel@tonic-gate * Read case is easy. 3981*7c478bd9Sstevel@tonic-gate */ 3982*7c478bd9Sstevel@tonic-gate if (!write_lock) { 3983*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_READER); 3984*7c478bd9Sstevel@tonic-gate return (V_WRITELOCK_FALSE); 3985*7c478bd9Sstevel@tonic-gate } 3986*7c478bd9Sstevel@tonic-gate 3987*7c478bd9Sstevel@tonic-gate /* 3988*7c478bd9Sstevel@tonic-gate * Caller has requested a writer lock, but that inhibits any 3989*7c478bd9Sstevel@tonic-gate * concurrency in the VOPs that follow. Acquire the lock shared 3990*7c478bd9Sstevel@tonic-gate * and defer exclusive access until it is known to be needed in 3991*7c478bd9Sstevel@tonic-gate * other VOP handlers. Some cases can be determined here. 3992*7c478bd9Sstevel@tonic-gate */ 3993*7c478bd9Sstevel@tonic-gate 3994*7c478bd9Sstevel@tonic-gate /* 3995*7c478bd9Sstevel@tonic-gate * If directio is not set, there is no chance of concurrency, 3996*7c478bd9Sstevel@tonic-gate * so just acquire the lock exclusive. Beware of a forced 3997*7c478bd9Sstevel@tonic-gate * unmount before looking at the mount option. 3998*7c478bd9Sstevel@tonic-gate */ 3999*7c478bd9Sstevel@tonic-gate ufsvfsp = ip->i_ufsvfs; 4000*7c478bd9Sstevel@tonic-gate forcedirectio = ufsvfsp ? ufsvfsp->vfs_forcedirectio : 0; 4001*7c478bd9Sstevel@tonic-gate if (!(ip->i_flag & IDIRECTIO || forcedirectio) || 4002*7c478bd9Sstevel@tonic-gate !ufs_allow_shared_writes) { 4003*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 4004*7c478bd9Sstevel@tonic-gate return (V_WRITELOCK_TRUE); 4005*7c478bd9Sstevel@tonic-gate } 4006*7c478bd9Sstevel@tonic-gate 4007*7c478bd9Sstevel@tonic-gate /* 4008*7c478bd9Sstevel@tonic-gate * Mandatory locking forces acquiring i_rwlock exclusive. 4009*7c478bd9Sstevel@tonic-gate */ 4010*7c478bd9Sstevel@tonic-gate if (MANDLOCK(vp, ip->i_mode)) { 4011*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 4012*7c478bd9Sstevel@tonic-gate return (V_WRITELOCK_TRUE); 4013*7c478bd9Sstevel@tonic-gate } 4014*7c478bd9Sstevel@tonic-gate 4015*7c478bd9Sstevel@tonic-gate /* 4016*7c478bd9Sstevel@tonic-gate * Acquire the lock shared in case a concurrent write follows. 4017*7c478bd9Sstevel@tonic-gate * Mandatory locking could have become enabled before the lock 4018*7c478bd9Sstevel@tonic-gate * was acquired. Re-check and upgrade if needed. 4019*7c478bd9Sstevel@tonic-gate */ 4020*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_READER); 4021*7c478bd9Sstevel@tonic-gate if (MANDLOCK(vp, ip->i_mode)) { 4022*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 4023*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 4024*7c478bd9Sstevel@tonic-gate return (V_WRITELOCK_TRUE); 4025*7c478bd9Sstevel@tonic-gate } 4026*7c478bd9Sstevel@tonic-gate return (V_WRITELOCK_FALSE); 4027*7c478bd9Sstevel@tonic-gate } 4028*7c478bd9Sstevel@tonic-gate 4029*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4030*7c478bd9Sstevel@tonic-gate static void 4031*7c478bd9Sstevel@tonic-gate ufs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp) 4032*7c478bd9Sstevel@tonic-gate { 4033*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 4034*7c478bd9Sstevel@tonic-gate 4035*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 4036*7c478bd9Sstevel@tonic-gate } 4037*7c478bd9Sstevel@tonic-gate 4038*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 4039*7c478bd9Sstevel@tonic-gate static int 4040*7c478bd9Sstevel@tonic-gate ufs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp) 4041*7c478bd9Sstevel@tonic-gate { 4042*7c478bd9Sstevel@tonic-gate return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 4043*7c478bd9Sstevel@tonic-gate } 4044*7c478bd9Sstevel@tonic-gate 4045*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 4046*7c478bd9Sstevel@tonic-gate static int 4047*7c478bd9Sstevel@tonic-gate ufs_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag, 4048*7c478bd9Sstevel@tonic-gate offset_t offset, struct flk_callback *flk_cbp, struct cred *cr) 4049*7c478bd9Sstevel@tonic-gate { 4050*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 4051*7c478bd9Sstevel@tonic-gate 4052*7c478bd9Sstevel@tonic-gate if (ip->i_ufsvfs == NULL) 4053*7c478bd9Sstevel@tonic-gate return (EIO); 4054*7c478bd9Sstevel@tonic-gate 4055*7c478bd9Sstevel@tonic-gate /* 4056*7c478bd9Sstevel@tonic-gate * If file is being mapped, disallow frlock. 4057*7c478bd9Sstevel@tonic-gate * XXX I am not holding tlock while checking i_mapcnt because the 4058*7c478bd9Sstevel@tonic-gate * current locking strategy drops all locks before calling fs_frlock. 4059*7c478bd9Sstevel@tonic-gate * So, mapcnt could change before we enter fs_frlock making is 4060*7c478bd9Sstevel@tonic-gate * meaningless to have held tlock in the first place. 4061*7c478bd9Sstevel@tonic-gate */ 4062*7c478bd9Sstevel@tonic-gate if (ip->i_mapcnt > 0 && MANDLOCK(vp, ip->i_mode)) 4063*7c478bd9Sstevel@tonic-gate return (EAGAIN); 4064*7c478bd9Sstevel@tonic-gate return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr)); 4065*7c478bd9Sstevel@tonic-gate } 4066*7c478bd9Sstevel@tonic-gate 4067*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 4068*7c478bd9Sstevel@tonic-gate static int 4069*7c478bd9Sstevel@tonic-gate ufs_space( 4070*7c478bd9Sstevel@tonic-gate struct vnode *vp, 4071*7c478bd9Sstevel@tonic-gate int cmd, 4072*7c478bd9Sstevel@tonic-gate struct flock64 *bfp, 4073*7c478bd9Sstevel@tonic-gate int flag, 4074*7c478bd9Sstevel@tonic-gate offset_t offset, 4075*7c478bd9Sstevel@tonic-gate cred_t *cr, 4076*7c478bd9Sstevel@tonic-gate caller_context_t *ct) 4077*7c478bd9Sstevel@tonic-gate { 4078*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = VTOI(vp)->i_ufsvfs; 4079*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 4080*7c478bd9Sstevel@tonic-gate int error; 4081*7c478bd9Sstevel@tonic-gate 4082*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_SPACE_MASK); 4083*7c478bd9Sstevel@tonic-gate if (error) 4084*7c478bd9Sstevel@tonic-gate return (error); 4085*7c478bd9Sstevel@tonic-gate 4086*7c478bd9Sstevel@tonic-gate 4087*7c478bd9Sstevel@tonic-gate if (cmd != F_FREESP) 4088*7c478bd9Sstevel@tonic-gate error = EINVAL; 4089*7c478bd9Sstevel@tonic-gate else if ((error = convoff(vp, bfp, 0, offset)) == 0) 4090*7c478bd9Sstevel@tonic-gate error = ufs_freesp(vp, bfp, flag, cr); 4091*7c478bd9Sstevel@tonic-gate 4092*7c478bd9Sstevel@tonic-gate if (ulp) 4093*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 4094*7c478bd9Sstevel@tonic-gate return (error); 4095*7c478bd9Sstevel@tonic-gate } 4096*7c478bd9Sstevel@tonic-gate 4097*7c478bd9Sstevel@tonic-gate /* 4098*7c478bd9Sstevel@tonic-gate * Used to determine if read ahead should be done. Also used to 4099*7c478bd9Sstevel@tonic-gate * to determine when write back occurs. 4100*7c478bd9Sstevel@tonic-gate */ 4101*7c478bd9Sstevel@tonic-gate #define CLUSTSZ(ip) ((ip)->i_ufsvfs->vfs_ioclustsz) 4102*7c478bd9Sstevel@tonic-gate 4103*7c478bd9Sstevel@tonic-gate /* 4104*7c478bd9Sstevel@tonic-gate * A faster version of ufs_getpage. 4105*7c478bd9Sstevel@tonic-gate * 4106*7c478bd9Sstevel@tonic-gate * We optimize by inlining the pvn_getpages iterator, eliminating 4107*7c478bd9Sstevel@tonic-gate * calls to bmap_read if file doesn't have UFS holes, and avoiding 4108*7c478bd9Sstevel@tonic-gate * the overhead of page_exists(). 4109*7c478bd9Sstevel@tonic-gate * 4110*7c478bd9Sstevel@tonic-gate * When files has UFS_HOLES and ufs_getpage is called with S_READ, 4111*7c478bd9Sstevel@tonic-gate * we set *protp to PROT_READ to avoid calling bmap_read. This approach 4112*7c478bd9Sstevel@tonic-gate * victimizes performance when a file with UFS holes is faulted 4113*7c478bd9Sstevel@tonic-gate * first in the S_READ mode, and then in the S_WRITE mode. We will get 4114*7c478bd9Sstevel@tonic-gate * two MMU faults in this case. 4115*7c478bd9Sstevel@tonic-gate * 4116*7c478bd9Sstevel@tonic-gate * XXX - the inode fields which control the sequential mode are not 4117*7c478bd9Sstevel@tonic-gate * protected by any mutex. The read ahead will act wild if 4118*7c478bd9Sstevel@tonic-gate * multiple processes will access the file concurrently and 4119*7c478bd9Sstevel@tonic-gate * some of them in sequential mode. One particulary bad case 4120*7c478bd9Sstevel@tonic-gate * is if another thread will change the value of i_nextrio between 4121*7c478bd9Sstevel@tonic-gate * the time this thread tests the i_nextrio value and then reads it 4122*7c478bd9Sstevel@tonic-gate * again to use it as the offset for the read ahead. 4123*7c478bd9Sstevel@tonic-gate */ 4124*7c478bd9Sstevel@tonic-gate static int 4125*7c478bd9Sstevel@tonic-gate ufs_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp, 4126*7c478bd9Sstevel@tonic-gate page_t *plarr[], size_t plsz, struct seg *seg, caddr_t addr, 4127*7c478bd9Sstevel@tonic-gate enum seg_rw rw, struct cred *cr) 4128*7c478bd9Sstevel@tonic-gate { 4129*7c478bd9Sstevel@tonic-gate u_offset_t uoff = (u_offset_t)off; /* type conversion */ 4130*7c478bd9Sstevel@tonic-gate u_offset_t pgoff; 4131*7c478bd9Sstevel@tonic-gate u_offset_t eoff; 4132*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 4133*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 4134*7c478bd9Sstevel@tonic-gate struct fs *fs; 4135*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 4136*7c478bd9Sstevel@tonic-gate page_t **pl; 4137*7c478bd9Sstevel@tonic-gate caddr_t pgaddr; 4138*7c478bd9Sstevel@tonic-gate krw_t rwtype; 4139*7c478bd9Sstevel@tonic-gate int err; 4140*7c478bd9Sstevel@tonic-gate int has_holes; 4141*7c478bd9Sstevel@tonic-gate int beyond_eof; 4142*7c478bd9Sstevel@tonic-gate int seqmode; 4143*7c478bd9Sstevel@tonic-gate int pgsize = PAGESIZE; 4144*7c478bd9Sstevel@tonic-gate int dolock; 4145*7c478bd9Sstevel@tonic-gate int do_qlock; 4146*7c478bd9Sstevel@tonic-gate int trans_size; 4147*7c478bd9Sstevel@tonic-gate 4148*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_GETPAGE_START, 4149*7c478bd9Sstevel@tonic-gate "ufs_getpage_start:vp %p", vp); 4150*7c478bd9Sstevel@tonic-gate 4151*7c478bd9Sstevel@tonic-gate ASSERT((uoff & PAGEOFFSET) == 0); 4152*7c478bd9Sstevel@tonic-gate 4153*7c478bd9Sstevel@tonic-gate if (protp) 4154*7c478bd9Sstevel@tonic-gate *protp = PROT_ALL; 4155*7c478bd9Sstevel@tonic-gate 4156*7c478bd9Sstevel@tonic-gate /* 4157*7c478bd9Sstevel@tonic-gate * Obey the lockfs protocol 4158*7c478bd9Sstevel@tonic-gate */ 4159*7c478bd9Sstevel@tonic-gate err = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, seg, 4160*7c478bd9Sstevel@tonic-gate rw == S_READ || rw == S_EXEC, protp); 4161*7c478bd9Sstevel@tonic-gate if (err) 4162*7c478bd9Sstevel@tonic-gate goto out; 4163*7c478bd9Sstevel@tonic-gate 4164*7c478bd9Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 4165*7c478bd9Sstevel@tonic-gate 4166*7c478bd9Sstevel@tonic-gate if (ulp && (rw == S_CREATE || rw == S_WRITE) && 4167*7c478bd9Sstevel@tonic-gate !(vp->v_flag & VISSWAP)) { 4168*7c478bd9Sstevel@tonic-gate /* 4169*7c478bd9Sstevel@tonic-gate * Try to start a transaction, will return if blocking is 4170*7c478bd9Sstevel@tonic-gate * expected to occur and the address space is not the 4171*7c478bd9Sstevel@tonic-gate * kernel address space. 4172*7c478bd9Sstevel@tonic-gate */ 4173*7c478bd9Sstevel@tonic-gate trans_size = TOP_GETPAGE_SIZE(ip); 4174*7c478bd9Sstevel@tonic-gate if (seg->s_as != &kas) { 4175*7c478bd9Sstevel@tonic-gate TRANS_TRY_BEGIN_ASYNC(ufsvfsp, TOP_GETPAGE, 4176*7c478bd9Sstevel@tonic-gate trans_size, err) 4177*7c478bd9Sstevel@tonic-gate if (err == EWOULDBLOCK) { 4178*7c478bd9Sstevel@tonic-gate /* 4179*7c478bd9Sstevel@tonic-gate * Use EDEADLK here because the VM code 4180*7c478bd9Sstevel@tonic-gate * can normally never see this error. 4181*7c478bd9Sstevel@tonic-gate */ 4182*7c478bd9Sstevel@tonic-gate err = EDEADLK; 4183*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 4184*7c478bd9Sstevel@tonic-gate goto out; 4185*7c478bd9Sstevel@tonic-gate } 4186*7c478bd9Sstevel@tonic-gate } else { 4187*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, TOP_GETPAGE, trans_size); 4188*7c478bd9Sstevel@tonic-gate } 4189*7c478bd9Sstevel@tonic-gate } 4190*7c478bd9Sstevel@tonic-gate 4191*7c478bd9Sstevel@tonic-gate if (vp->v_flag & VNOMAP) { 4192*7c478bd9Sstevel@tonic-gate err = ENOSYS; 4193*7c478bd9Sstevel@tonic-gate goto unlock; 4194*7c478bd9Sstevel@tonic-gate } 4195*7c478bd9Sstevel@tonic-gate 4196*7c478bd9Sstevel@tonic-gate seqmode = ip->i_nextr == uoff && rw != S_CREATE; 4197*7c478bd9Sstevel@tonic-gate 4198*7c478bd9Sstevel@tonic-gate rwtype = RW_READER; /* start as a reader */ 4199*7c478bd9Sstevel@tonic-gate dolock = (rw_owner(&ip->i_contents) != curthread); 4200*7c478bd9Sstevel@tonic-gate /* 4201*7c478bd9Sstevel@tonic-gate * If this thread owns the lock, i.e., this thread grabbed it 4202*7c478bd9Sstevel@tonic-gate * as writer somewhere above, then we don't need to grab the 4203*7c478bd9Sstevel@tonic-gate * lock as reader in this routine. 4204*7c478bd9Sstevel@tonic-gate */ 4205*7c478bd9Sstevel@tonic-gate do_qlock = (rw_owner(&ufsvfsp->vfs_dqrwlock) != curthread); 4206*7c478bd9Sstevel@tonic-gate 4207*7c478bd9Sstevel@tonic-gate retrylock: 4208*7c478bd9Sstevel@tonic-gate if (dolock) { 4209*7c478bd9Sstevel@tonic-gate /* 4210*7c478bd9Sstevel@tonic-gate * Grab the quota lock if we need to call 4211*7c478bd9Sstevel@tonic-gate * bmap_write() below (with i_contents as writer). 4212*7c478bd9Sstevel@tonic-gate */ 4213*7c478bd9Sstevel@tonic-gate if (do_qlock && rwtype == RW_WRITER) 4214*7c478bd9Sstevel@tonic-gate rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 4215*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, rwtype); 4216*7c478bd9Sstevel@tonic-gate } 4217*7c478bd9Sstevel@tonic-gate 4218*7c478bd9Sstevel@tonic-gate /* 4219*7c478bd9Sstevel@tonic-gate * We may be getting called as a side effect of a bmap using 4220*7c478bd9Sstevel@tonic-gate * fbread() when the blocks might be being allocated and the 4221*7c478bd9Sstevel@tonic-gate * size has not yet been up'ed. In this case we want to be 4222*7c478bd9Sstevel@tonic-gate * able to return zero pages if we get back UFS_HOLE from 4223*7c478bd9Sstevel@tonic-gate * calling bmap for a non write case here. We also might have 4224*7c478bd9Sstevel@tonic-gate * to read some frags from the disk into a page if we are 4225*7c478bd9Sstevel@tonic-gate * extending the number of frags for a given lbn in bmap(). 4226*7c478bd9Sstevel@tonic-gate * Large Files: The read of i_size here is atomic because 4227*7c478bd9Sstevel@tonic-gate * i_contents is held here. If dolock is zero, the lock 4228*7c478bd9Sstevel@tonic-gate * is held in bmap routines. 4229*7c478bd9Sstevel@tonic-gate */ 4230*7c478bd9Sstevel@tonic-gate beyond_eof = uoff + len > ip->i_size + PAGEOFFSET; 4231*7c478bd9Sstevel@tonic-gate if (beyond_eof && seg != segkmap) { 4232*7c478bd9Sstevel@tonic-gate if (dolock) { 4233*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 4234*7c478bd9Sstevel@tonic-gate if (do_qlock && rwtype == RW_WRITER) 4235*7c478bd9Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 4236*7c478bd9Sstevel@tonic-gate } 4237*7c478bd9Sstevel@tonic-gate err = EFAULT; 4238*7c478bd9Sstevel@tonic-gate goto unlock; 4239*7c478bd9Sstevel@tonic-gate } 4240*7c478bd9Sstevel@tonic-gate 4241*7c478bd9Sstevel@tonic-gate /* 4242*7c478bd9Sstevel@tonic-gate * Must hold i_contents lock throughout the call to pvn_getpages 4243*7c478bd9Sstevel@tonic-gate * since locked pages are returned from each call to ufs_getapage. 4244*7c478bd9Sstevel@tonic-gate * Must *not* return locked pages and then try for contents lock 4245*7c478bd9Sstevel@tonic-gate * due to lock ordering requirements (inode > page) 4246*7c478bd9Sstevel@tonic-gate */ 4247*7c478bd9Sstevel@tonic-gate 4248*7c478bd9Sstevel@tonic-gate has_holes = bmap_has_holes(ip); 4249*7c478bd9Sstevel@tonic-gate 4250*7c478bd9Sstevel@tonic-gate if ((rw == S_WRITE || rw == S_CREATE) && has_holes && !beyond_eof) { 4251*7c478bd9Sstevel@tonic-gate int blk_size; 4252*7c478bd9Sstevel@tonic-gate u_offset_t offset; 4253*7c478bd9Sstevel@tonic-gate 4254*7c478bd9Sstevel@tonic-gate /* 4255*7c478bd9Sstevel@tonic-gate * We must acquire the RW_WRITER lock in order to 4256*7c478bd9Sstevel@tonic-gate * call bmap_write(). 4257*7c478bd9Sstevel@tonic-gate */ 4258*7c478bd9Sstevel@tonic-gate if (dolock && rwtype == RW_READER) { 4259*7c478bd9Sstevel@tonic-gate rwtype = RW_WRITER; 4260*7c478bd9Sstevel@tonic-gate 4261*7c478bd9Sstevel@tonic-gate /* 4262*7c478bd9Sstevel@tonic-gate * Grab the quota lock before 4263*7c478bd9Sstevel@tonic-gate * upgrading i_contents, but if we can't grab it 4264*7c478bd9Sstevel@tonic-gate * don't wait here due to lock order: 4265*7c478bd9Sstevel@tonic-gate * vfs_dqrwlock > i_contents. 4266*7c478bd9Sstevel@tonic-gate */ 4267*7c478bd9Sstevel@tonic-gate if (do_qlock && rw_tryenter(&ufsvfsp->vfs_dqrwlock, 4268*7c478bd9Sstevel@tonic-gate RW_READER) == 0) { 4269*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 4270*7c478bd9Sstevel@tonic-gate goto retrylock; 4271*7c478bd9Sstevel@tonic-gate } 4272*7c478bd9Sstevel@tonic-gate if (!rw_tryupgrade(&ip->i_contents)) { 4273*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 4274*7c478bd9Sstevel@tonic-gate if (do_qlock) 4275*7c478bd9Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 4276*7c478bd9Sstevel@tonic-gate goto retrylock; 4277*7c478bd9Sstevel@tonic-gate } 4278*7c478bd9Sstevel@tonic-gate } 4279*7c478bd9Sstevel@tonic-gate 4280*7c478bd9Sstevel@tonic-gate /* 4281*7c478bd9Sstevel@tonic-gate * May be allocating disk blocks for holes here as 4282*7c478bd9Sstevel@tonic-gate * a result of mmap faults. write(2) does the bmap_write 4283*7c478bd9Sstevel@tonic-gate * in rdip/wrip, not here. We are not dealing with frags 4284*7c478bd9Sstevel@tonic-gate * in this case. 4285*7c478bd9Sstevel@tonic-gate */ 4286*7c478bd9Sstevel@tonic-gate /* 4287*7c478bd9Sstevel@tonic-gate * Large Files: We cast fs_bmask field to offset_t 4288*7c478bd9Sstevel@tonic-gate * just as we do for MAXBMASK because uoff is a 64-bit 4289*7c478bd9Sstevel@tonic-gate * data type. fs_bmask will still be a 32-bit type 4290*7c478bd9Sstevel@tonic-gate * as we cannot change any ondisk data structures. 4291*7c478bd9Sstevel@tonic-gate */ 4292*7c478bd9Sstevel@tonic-gate 4293*7c478bd9Sstevel@tonic-gate offset = uoff & (offset_t)fs->fs_bmask; 4294*7c478bd9Sstevel@tonic-gate while (offset < uoff + len) { 4295*7c478bd9Sstevel@tonic-gate blk_size = (int)blksize(fs, ip, lblkno(fs, offset)); 4296*7c478bd9Sstevel@tonic-gate err = bmap_write(ip, offset, blk_size, 0, cr); 4297*7c478bd9Sstevel@tonic-gate if (ip->i_flag & (ICHG|IUPD)) 4298*7c478bd9Sstevel@tonic-gate ip->i_seq++; 4299*7c478bd9Sstevel@tonic-gate if (err) 4300*7c478bd9Sstevel@tonic-gate goto update_inode; 4301*7c478bd9Sstevel@tonic-gate offset += blk_size; /* XXX - make this contig */ 4302*7c478bd9Sstevel@tonic-gate } 4303*7c478bd9Sstevel@tonic-gate } 4304*7c478bd9Sstevel@tonic-gate 4305*7c478bd9Sstevel@tonic-gate /* 4306*7c478bd9Sstevel@tonic-gate * Can be a reader from now on. 4307*7c478bd9Sstevel@tonic-gate */ 4308*7c478bd9Sstevel@tonic-gate if (dolock && rwtype == RW_WRITER) { 4309*7c478bd9Sstevel@tonic-gate rw_downgrade(&ip->i_contents); 4310*7c478bd9Sstevel@tonic-gate /* 4311*7c478bd9Sstevel@tonic-gate * We can release vfs_dqrwlock early so do it, but make 4312*7c478bd9Sstevel@tonic-gate * sure we don't try to release it again at the bottom. 4313*7c478bd9Sstevel@tonic-gate */ 4314*7c478bd9Sstevel@tonic-gate if (do_qlock) { 4315*7c478bd9Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 4316*7c478bd9Sstevel@tonic-gate do_qlock = 0; 4317*7c478bd9Sstevel@tonic-gate } 4318*7c478bd9Sstevel@tonic-gate } 4319*7c478bd9Sstevel@tonic-gate 4320*7c478bd9Sstevel@tonic-gate /* 4321*7c478bd9Sstevel@tonic-gate * We remove PROT_WRITE in cases when the file has UFS holes 4322*7c478bd9Sstevel@tonic-gate * because we don't want to call bmap_read() to check each 4323*7c478bd9Sstevel@tonic-gate * page if it is backed with a disk block. 4324*7c478bd9Sstevel@tonic-gate */ 4325*7c478bd9Sstevel@tonic-gate if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) 4326*7c478bd9Sstevel@tonic-gate *protp &= ~PROT_WRITE; 4327*7c478bd9Sstevel@tonic-gate 4328*7c478bd9Sstevel@tonic-gate err = 0; 4329*7c478bd9Sstevel@tonic-gate 4330*7c478bd9Sstevel@tonic-gate /* 4331*7c478bd9Sstevel@tonic-gate * The loop looks up pages in the range [off, off + len). 4332*7c478bd9Sstevel@tonic-gate * For each page, we first check if we should initiate an asynchronous 4333*7c478bd9Sstevel@tonic-gate * read ahead before we call page_lookup (we may sleep in page_lookup 4334*7c478bd9Sstevel@tonic-gate * for a previously initiated disk read). 4335*7c478bd9Sstevel@tonic-gate */ 4336*7c478bd9Sstevel@tonic-gate eoff = (uoff + len); 4337*7c478bd9Sstevel@tonic-gate for (pgoff = uoff, pgaddr = addr, pl = plarr; 4338*7c478bd9Sstevel@tonic-gate pgoff < eoff; /* empty */) { 4339*7c478bd9Sstevel@tonic-gate page_t *pp; 4340*7c478bd9Sstevel@tonic-gate u_offset_t nextrio; 4341*7c478bd9Sstevel@tonic-gate se_t se; 4342*7c478bd9Sstevel@tonic-gate int retval; 4343*7c478bd9Sstevel@tonic-gate 4344*7c478bd9Sstevel@tonic-gate se = ((rw == S_CREATE || rw == S_OTHER) ? SE_EXCL : SE_SHARED); 4345*7c478bd9Sstevel@tonic-gate 4346*7c478bd9Sstevel@tonic-gate /* Handle async getpage (faultahead) */ 4347*7c478bd9Sstevel@tonic-gate if (plarr == NULL) { 4348*7c478bd9Sstevel@tonic-gate ip->i_nextrio = pgoff; 4349*7c478bd9Sstevel@tonic-gate (void) ufs_getpage_ra(vp, pgoff, seg, pgaddr); 4350*7c478bd9Sstevel@tonic-gate pgoff += pgsize; 4351*7c478bd9Sstevel@tonic-gate pgaddr += pgsize; 4352*7c478bd9Sstevel@tonic-gate continue; 4353*7c478bd9Sstevel@tonic-gate } 4354*7c478bd9Sstevel@tonic-gate /* 4355*7c478bd9Sstevel@tonic-gate * Check if we should initiate read ahead of next cluster. 4356*7c478bd9Sstevel@tonic-gate * We call page_exists only when we need to confirm that 4357*7c478bd9Sstevel@tonic-gate * we have the current page before we initiate the read ahead. 4358*7c478bd9Sstevel@tonic-gate */ 4359*7c478bd9Sstevel@tonic-gate nextrio = ip->i_nextrio; 4360*7c478bd9Sstevel@tonic-gate if (seqmode && 4361*7c478bd9Sstevel@tonic-gate pgoff + CLUSTSZ(ip) >= nextrio && pgoff <= nextrio && 4362*7c478bd9Sstevel@tonic-gate nextrio < ip->i_size && page_exists(vp, pgoff)) { 4363*7c478bd9Sstevel@tonic-gate retval = ufs_getpage_ra(vp, pgoff, seg, pgaddr); 4364*7c478bd9Sstevel@tonic-gate /* 4365*7c478bd9Sstevel@tonic-gate * We always read ahead the next cluster of data 4366*7c478bd9Sstevel@tonic-gate * starting from i_nextrio. If the page (vp,nextrio) 4367*7c478bd9Sstevel@tonic-gate * is actually in core at this point, the routine 4368*7c478bd9Sstevel@tonic-gate * ufs_getpage_ra() will stop pre-fetching data 4369*7c478bd9Sstevel@tonic-gate * until we read that page in a synchronized manner 4370*7c478bd9Sstevel@tonic-gate * through ufs_getpage_miss(). So, we should increase 4371*7c478bd9Sstevel@tonic-gate * i_nextrio if the page (vp, nextrio) exists. 4372*7c478bd9Sstevel@tonic-gate */ 4373*7c478bd9Sstevel@tonic-gate if ((retval == 0) && page_exists(vp, nextrio)) { 4374*7c478bd9Sstevel@tonic-gate ip->i_nextrio = nextrio + pgsize; 4375*7c478bd9Sstevel@tonic-gate } 4376*7c478bd9Sstevel@tonic-gate } 4377*7c478bd9Sstevel@tonic-gate 4378*7c478bd9Sstevel@tonic-gate if ((pp = page_lookup(vp, pgoff, se)) != NULL) { 4379*7c478bd9Sstevel@tonic-gate /* 4380*7c478bd9Sstevel@tonic-gate * We found the page in the page cache. 4381*7c478bd9Sstevel@tonic-gate */ 4382*7c478bd9Sstevel@tonic-gate *pl++ = pp; 4383*7c478bd9Sstevel@tonic-gate pgoff += pgsize; 4384*7c478bd9Sstevel@tonic-gate pgaddr += pgsize; 4385*7c478bd9Sstevel@tonic-gate len -= pgsize; 4386*7c478bd9Sstevel@tonic-gate plsz -= pgsize; 4387*7c478bd9Sstevel@tonic-gate } else { 4388*7c478bd9Sstevel@tonic-gate /* 4389*7c478bd9Sstevel@tonic-gate * We have to create the page, or read it from disk. 4390*7c478bd9Sstevel@tonic-gate */ 4391*7c478bd9Sstevel@tonic-gate if (err = ufs_getpage_miss(vp, pgoff, len, seg, pgaddr, 4392*7c478bd9Sstevel@tonic-gate pl, plsz, rw, seqmode)) 4393*7c478bd9Sstevel@tonic-gate goto error; 4394*7c478bd9Sstevel@tonic-gate 4395*7c478bd9Sstevel@tonic-gate while (*pl != NULL) { 4396*7c478bd9Sstevel@tonic-gate pl++; 4397*7c478bd9Sstevel@tonic-gate pgoff += pgsize; 4398*7c478bd9Sstevel@tonic-gate pgaddr += pgsize; 4399*7c478bd9Sstevel@tonic-gate len -= pgsize; 4400*7c478bd9Sstevel@tonic-gate plsz -= pgsize; 4401*7c478bd9Sstevel@tonic-gate } 4402*7c478bd9Sstevel@tonic-gate } 4403*7c478bd9Sstevel@tonic-gate } 4404*7c478bd9Sstevel@tonic-gate 4405*7c478bd9Sstevel@tonic-gate /* 4406*7c478bd9Sstevel@tonic-gate * Return pages up to plsz if they are in the page cache. 4407*7c478bd9Sstevel@tonic-gate * We cannot return pages if there is a chance that they are 4408*7c478bd9Sstevel@tonic-gate * backed with a UFS hole and rw is S_WRITE or S_CREATE. 4409*7c478bd9Sstevel@tonic-gate */ 4410*7c478bd9Sstevel@tonic-gate if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) { 4411*7c478bd9Sstevel@tonic-gate 4412*7c478bd9Sstevel@tonic-gate ASSERT((protp == NULL) || 4413*7c478bd9Sstevel@tonic-gate !(has_holes && (*protp & PROT_WRITE))); 4414*7c478bd9Sstevel@tonic-gate 4415*7c478bd9Sstevel@tonic-gate eoff = pgoff + plsz; 4416*7c478bd9Sstevel@tonic-gate while (pgoff < eoff) { 4417*7c478bd9Sstevel@tonic-gate page_t *pp; 4418*7c478bd9Sstevel@tonic-gate 4419*7c478bd9Sstevel@tonic-gate if ((pp = page_lookup_nowait(vp, pgoff, 4420*7c478bd9Sstevel@tonic-gate SE_SHARED)) == NULL) 4421*7c478bd9Sstevel@tonic-gate break; 4422*7c478bd9Sstevel@tonic-gate 4423*7c478bd9Sstevel@tonic-gate *pl++ = pp; 4424*7c478bd9Sstevel@tonic-gate pgoff += pgsize; 4425*7c478bd9Sstevel@tonic-gate plsz -= pgsize; 4426*7c478bd9Sstevel@tonic-gate } 4427*7c478bd9Sstevel@tonic-gate } 4428*7c478bd9Sstevel@tonic-gate 4429*7c478bd9Sstevel@tonic-gate if (plarr) 4430*7c478bd9Sstevel@tonic-gate *pl = NULL; /* Terminate page list */ 4431*7c478bd9Sstevel@tonic-gate ip->i_nextr = pgoff; 4432*7c478bd9Sstevel@tonic-gate 4433*7c478bd9Sstevel@tonic-gate error: 4434*7c478bd9Sstevel@tonic-gate if (err && plarr) { 4435*7c478bd9Sstevel@tonic-gate /* 4436*7c478bd9Sstevel@tonic-gate * Release any pages we have locked. 4437*7c478bd9Sstevel@tonic-gate */ 4438*7c478bd9Sstevel@tonic-gate while (pl > &plarr[0]) 4439*7c478bd9Sstevel@tonic-gate page_unlock(*--pl); 4440*7c478bd9Sstevel@tonic-gate 4441*7c478bd9Sstevel@tonic-gate plarr[0] = NULL; 4442*7c478bd9Sstevel@tonic-gate } 4443*7c478bd9Sstevel@tonic-gate 4444*7c478bd9Sstevel@tonic-gate update_inode: 4445*7c478bd9Sstevel@tonic-gate /* 4446*7c478bd9Sstevel@tonic-gate * If the inode is not already marked for IACC (in rdip() for read) 4447*7c478bd9Sstevel@tonic-gate * and the inode is not marked for no access time update (in wrip() 4448*7c478bd9Sstevel@tonic-gate * for write) then update the inode access time and mod time now. 4449*7c478bd9Sstevel@tonic-gate */ 4450*7c478bd9Sstevel@tonic-gate if ((ip->i_flag & (IACC | INOACC)) == 0) { 4451*7c478bd9Sstevel@tonic-gate if ((rw != S_OTHER) && (ip->i_mode & IFMT) != IFDIR) { 4452*7c478bd9Sstevel@tonic-gate if (!ULOCKFS_IS_NOIACC(ITOUL(ip)) && 4453*7c478bd9Sstevel@tonic-gate (fs->fs_ronly == 0) && 4454*7c478bd9Sstevel@tonic-gate (!ufsvfsp->vfs_noatime)) { 4455*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 4456*7c478bd9Sstevel@tonic-gate ip->i_flag |= IACC; 4457*7c478bd9Sstevel@tonic-gate ITIMES_NOLOCK(ip); 4458*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 4459*7c478bd9Sstevel@tonic-gate } 4460*7c478bd9Sstevel@tonic-gate } 4461*7c478bd9Sstevel@tonic-gate } 4462*7c478bd9Sstevel@tonic-gate 4463*7c478bd9Sstevel@tonic-gate if (dolock) { 4464*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 4465*7c478bd9Sstevel@tonic-gate if (do_qlock && rwtype == RW_WRITER) 4466*7c478bd9Sstevel@tonic-gate rw_exit(&ufsvfsp->vfs_dqrwlock); 4467*7c478bd9Sstevel@tonic-gate } 4468*7c478bd9Sstevel@tonic-gate 4469*7c478bd9Sstevel@tonic-gate unlock: 4470*7c478bd9Sstevel@tonic-gate if (ulp) { 4471*7c478bd9Sstevel@tonic-gate if ((rw == S_CREATE || rw == S_WRITE) && 4472*7c478bd9Sstevel@tonic-gate !(vp->v_flag & VISSWAP)) { 4473*7c478bd9Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, TOP_GETPAGE, trans_size); 4474*7c478bd9Sstevel@tonic-gate } 4475*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 4476*7c478bd9Sstevel@tonic-gate } 4477*7c478bd9Sstevel@tonic-gate out: 4478*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_GETPAGE_END, 4479*7c478bd9Sstevel@tonic-gate "ufs_getpage_end:vp %p error %d", vp, err); 4480*7c478bd9Sstevel@tonic-gate return (err); 4481*7c478bd9Sstevel@tonic-gate } 4482*7c478bd9Sstevel@tonic-gate 4483*7c478bd9Sstevel@tonic-gate /* 4484*7c478bd9Sstevel@tonic-gate * ufs_getpage_miss is called when ufs_getpage missed the page in the page 4485*7c478bd9Sstevel@tonic-gate * cache. The page is either read from the disk, or it's created. 4486*7c478bd9Sstevel@tonic-gate * A page is created (without disk read) if rw == S_CREATE, or if 4487*7c478bd9Sstevel@tonic-gate * the page is not backed with a real disk block (UFS hole). 4488*7c478bd9Sstevel@tonic-gate */ 4489*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 4490*7c478bd9Sstevel@tonic-gate static int 4491*7c478bd9Sstevel@tonic-gate ufs_getpage_miss(struct vnode *vp, u_offset_t off, size_t len, struct seg *seg, 4492*7c478bd9Sstevel@tonic-gate caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw, int seq) 4493*7c478bd9Sstevel@tonic-gate { 4494*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 4495*7c478bd9Sstevel@tonic-gate page_t *pp; 4496*7c478bd9Sstevel@tonic-gate daddr_t bn; 4497*7c478bd9Sstevel@tonic-gate size_t io_len; 4498*7c478bd9Sstevel@tonic-gate int crpage; 4499*7c478bd9Sstevel@tonic-gate int err; 4500*7c478bd9Sstevel@tonic-gate int contig; 4501*7c478bd9Sstevel@tonic-gate int bsize = ip->i_fs->fs_bsize; 4502*7c478bd9Sstevel@tonic-gate 4503*7c478bd9Sstevel@tonic-gate /* 4504*7c478bd9Sstevel@tonic-gate * Figure out whether the page can be created, or must be 4505*7c478bd9Sstevel@tonic-gate * must be read from the disk. 4506*7c478bd9Sstevel@tonic-gate */ 4507*7c478bd9Sstevel@tonic-gate if (rw == S_CREATE) 4508*7c478bd9Sstevel@tonic-gate crpage = 1; 4509*7c478bd9Sstevel@tonic-gate else { 4510*7c478bd9Sstevel@tonic-gate contig = 0; 4511*7c478bd9Sstevel@tonic-gate if (err = bmap_read(ip, off, &bn, &contig)) 4512*7c478bd9Sstevel@tonic-gate return (err); 4513*7c478bd9Sstevel@tonic-gate crpage = (bn == UFS_HOLE); 4514*7c478bd9Sstevel@tonic-gate } 4515*7c478bd9Sstevel@tonic-gate 4516*7c478bd9Sstevel@tonic-gate if (crpage) { 4517*7c478bd9Sstevel@tonic-gate if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, seg, 4518*7c478bd9Sstevel@tonic-gate addr)) == NULL) { 4519*7c478bd9Sstevel@tonic-gate return (ufs_fault(vp, 4520*7c478bd9Sstevel@tonic-gate "ufs_getpage_miss: page_create == NULL")); 4521*7c478bd9Sstevel@tonic-gate } 4522*7c478bd9Sstevel@tonic-gate 4523*7c478bd9Sstevel@tonic-gate if (rw != S_CREATE) 4524*7c478bd9Sstevel@tonic-gate pagezero(pp, 0, PAGESIZE); 4525*7c478bd9Sstevel@tonic-gate io_len = PAGESIZE; 4526*7c478bd9Sstevel@tonic-gate } else { 4527*7c478bd9Sstevel@tonic-gate u_offset_t io_off; 4528*7c478bd9Sstevel@tonic-gate uint_t xlen; 4529*7c478bd9Sstevel@tonic-gate struct buf *bp; 4530*7c478bd9Sstevel@tonic-gate ufsvfs_t *ufsvfsp = ip->i_ufsvfs; 4531*7c478bd9Sstevel@tonic-gate 4532*7c478bd9Sstevel@tonic-gate /* 4533*7c478bd9Sstevel@tonic-gate * If access is not in sequential order, we read from disk 4534*7c478bd9Sstevel@tonic-gate * in bsize units. 4535*7c478bd9Sstevel@tonic-gate * 4536*7c478bd9Sstevel@tonic-gate * We limit the size of the transfer to bsize if we are reading 4537*7c478bd9Sstevel@tonic-gate * from the beginning of the file. Note in this situation we 4538*7c478bd9Sstevel@tonic-gate * will hedge our bets and initiate an async read ahead of 4539*7c478bd9Sstevel@tonic-gate * the second block. 4540*7c478bd9Sstevel@tonic-gate */ 4541*7c478bd9Sstevel@tonic-gate if (!seq || off == 0) 4542*7c478bd9Sstevel@tonic-gate contig = MIN(contig, bsize); 4543*7c478bd9Sstevel@tonic-gate 4544*7c478bd9Sstevel@tonic-gate pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 4545*7c478bd9Sstevel@tonic-gate &io_len, off, contig, 0); 4546*7c478bd9Sstevel@tonic-gate 4547*7c478bd9Sstevel@tonic-gate /* 4548*7c478bd9Sstevel@tonic-gate * Some other thread has entered the page. 4549*7c478bd9Sstevel@tonic-gate * ufs_getpage will retry page_lookup. 4550*7c478bd9Sstevel@tonic-gate */ 4551*7c478bd9Sstevel@tonic-gate if (pp == NULL) { 4552*7c478bd9Sstevel@tonic-gate pl[0] = NULL; 4553*7c478bd9Sstevel@tonic-gate return (0); 4554*7c478bd9Sstevel@tonic-gate } 4555*7c478bd9Sstevel@tonic-gate 4556*7c478bd9Sstevel@tonic-gate /* 4557*7c478bd9Sstevel@tonic-gate * Zero part of the page which we are not 4558*7c478bd9Sstevel@tonic-gate * going to read from the disk. 4559*7c478bd9Sstevel@tonic-gate */ 4560*7c478bd9Sstevel@tonic-gate xlen = io_len & PAGEOFFSET; 4561*7c478bd9Sstevel@tonic-gate if (xlen != 0) 4562*7c478bd9Sstevel@tonic-gate pagezero(pp->p_prev, xlen, PAGESIZE - xlen); 4563*7c478bd9Sstevel@tonic-gate 4564*7c478bd9Sstevel@tonic-gate bp = pageio_setup(pp, io_len, ip->i_devvp, B_READ); 4565*7c478bd9Sstevel@tonic-gate bp->b_edev = ip->i_dev; 4566*7c478bd9Sstevel@tonic-gate bp->b_dev = cmpdev(ip->i_dev); 4567*7c478bd9Sstevel@tonic-gate bp->b_blkno = bn; 4568*7c478bd9Sstevel@tonic-gate bp->b_un.b_addr = (caddr_t)0; 4569*7c478bd9Sstevel@tonic-gate bp->b_file = ip->i_vnode; 4570*7c478bd9Sstevel@tonic-gate bp->b_offset = off; 4571*7c478bd9Sstevel@tonic-gate 4572*7c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_log) { 4573*7c478bd9Sstevel@tonic-gate lufs_read_strategy(ufsvfsp->vfs_log, bp); 4574*7c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_snapshot) { 4575*7c478bd9Sstevel@tonic-gate fssnap_strategy(&ufsvfsp->vfs_snapshot, bp); 4576*7c478bd9Sstevel@tonic-gate } else { 4577*7c478bd9Sstevel@tonic-gate ufsvfsp->vfs_iotstamp = lbolt; 4578*7c478bd9Sstevel@tonic-gate ub.ub_getpages.value.ul++; 4579*7c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 4580*7c478bd9Sstevel@tonic-gate lwp_stat_update(LWP_STAT_INBLK, 1); 4581*7c478bd9Sstevel@tonic-gate } 4582*7c478bd9Sstevel@tonic-gate 4583*7c478bd9Sstevel@tonic-gate ip->i_nextrio = off + ((io_len + PAGESIZE - 1) & PAGEMASK); 4584*7c478bd9Sstevel@tonic-gate 4585*7c478bd9Sstevel@tonic-gate /* 4586*7c478bd9Sstevel@tonic-gate * If the file access is sequential, initiate read ahead 4587*7c478bd9Sstevel@tonic-gate * of the next cluster. 4588*7c478bd9Sstevel@tonic-gate */ 4589*7c478bd9Sstevel@tonic-gate if (seq && ip->i_nextrio < ip->i_size) 4590*7c478bd9Sstevel@tonic-gate (void) ufs_getpage_ra(vp, off, seg, addr); 4591*7c478bd9Sstevel@tonic-gate err = biowait(bp); 4592*7c478bd9Sstevel@tonic-gate pageio_done(bp); 4593*7c478bd9Sstevel@tonic-gate 4594*7c478bd9Sstevel@tonic-gate if (err) { 4595*7c478bd9Sstevel@tonic-gate pvn_read_done(pp, B_ERROR); 4596*7c478bd9Sstevel@tonic-gate return (err); 4597*7c478bd9Sstevel@tonic-gate } 4598*7c478bd9Sstevel@tonic-gate } 4599*7c478bd9Sstevel@tonic-gate 4600*7c478bd9Sstevel@tonic-gate pvn_plist_init(pp, pl, plsz, off, io_len, rw); 4601*7c478bd9Sstevel@tonic-gate return (0); 4602*7c478bd9Sstevel@tonic-gate } 4603*7c478bd9Sstevel@tonic-gate 4604*7c478bd9Sstevel@tonic-gate /* 4605*7c478bd9Sstevel@tonic-gate * Read ahead a cluster from the disk. Returns the length in bytes. 4606*7c478bd9Sstevel@tonic-gate */ 4607*7c478bd9Sstevel@tonic-gate static int 4608*7c478bd9Sstevel@tonic-gate ufs_getpage_ra(struct vnode *vp, u_offset_t off, struct seg *seg, caddr_t addr) 4609*7c478bd9Sstevel@tonic-gate { 4610*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 4611*7c478bd9Sstevel@tonic-gate page_t *pp; 4612*7c478bd9Sstevel@tonic-gate u_offset_t io_off = ip->i_nextrio; 4613*7c478bd9Sstevel@tonic-gate ufsvfs_t *ufsvfsp; 4614*7c478bd9Sstevel@tonic-gate caddr_t addr2 = addr + (io_off - off); 4615*7c478bd9Sstevel@tonic-gate struct buf *bp; 4616*7c478bd9Sstevel@tonic-gate daddr_t bn; 4617*7c478bd9Sstevel@tonic-gate size_t io_len; 4618*7c478bd9Sstevel@tonic-gate int contig; 4619*7c478bd9Sstevel@tonic-gate int xlen; 4620*7c478bd9Sstevel@tonic-gate int bsize = ip->i_fs->fs_bsize; 4621*7c478bd9Sstevel@tonic-gate 4622*7c478bd9Sstevel@tonic-gate /* 4623*7c478bd9Sstevel@tonic-gate * If the directio advisory is in effect on this file, 4624*7c478bd9Sstevel@tonic-gate * then do not do buffered read ahead. Read ahead makes 4625*7c478bd9Sstevel@tonic-gate * it more difficult on threads using directio as they 4626*7c478bd9Sstevel@tonic-gate * will be forced to flush the pages from this vnode. 4627*7c478bd9Sstevel@tonic-gate */ 4628*7c478bd9Sstevel@tonic-gate if ((ufsvfsp = ip->i_ufsvfs) == NULL) 4629*7c478bd9Sstevel@tonic-gate return (0); 4630*7c478bd9Sstevel@tonic-gate if (ip->i_flag & IDIRECTIO || ufsvfsp->vfs_forcedirectio) 4631*7c478bd9Sstevel@tonic-gate return (0); 4632*7c478bd9Sstevel@tonic-gate 4633*7c478bd9Sstevel@tonic-gate /* 4634*7c478bd9Sstevel@tonic-gate * Is this test needed? 4635*7c478bd9Sstevel@tonic-gate */ 4636*7c478bd9Sstevel@tonic-gate if (addr2 >= seg->s_base + seg->s_size) 4637*7c478bd9Sstevel@tonic-gate return (0); 4638*7c478bd9Sstevel@tonic-gate 4639*7c478bd9Sstevel@tonic-gate contig = 0; 4640*7c478bd9Sstevel@tonic-gate if (bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UFS_HOLE) 4641*7c478bd9Sstevel@tonic-gate return (0); 4642*7c478bd9Sstevel@tonic-gate 4643*7c478bd9Sstevel@tonic-gate /* 4644*7c478bd9Sstevel@tonic-gate * Limit the transfer size to bsize if this is the 2nd block. 4645*7c478bd9Sstevel@tonic-gate */ 4646*7c478bd9Sstevel@tonic-gate if (io_off == (u_offset_t)bsize) 4647*7c478bd9Sstevel@tonic-gate contig = MIN(contig, bsize); 4648*7c478bd9Sstevel@tonic-gate 4649*7c478bd9Sstevel@tonic-gate if ((pp = pvn_read_kluster(vp, io_off, seg, addr2, &io_off, 4650*7c478bd9Sstevel@tonic-gate &io_len, io_off, contig, 1)) == NULL) 4651*7c478bd9Sstevel@tonic-gate return (0); 4652*7c478bd9Sstevel@tonic-gate 4653*7c478bd9Sstevel@tonic-gate /* 4654*7c478bd9Sstevel@tonic-gate * Zero part of page which we are not going to read from disk 4655*7c478bd9Sstevel@tonic-gate */ 4656*7c478bd9Sstevel@tonic-gate if ((xlen = (io_len & PAGEOFFSET)) > 0) 4657*7c478bd9Sstevel@tonic-gate pagezero(pp->p_prev, xlen, PAGESIZE - xlen); 4658*7c478bd9Sstevel@tonic-gate 4659*7c478bd9Sstevel@tonic-gate ip->i_nextrio = (io_off + io_len + PAGESIZE - 1) & PAGEMASK; 4660*7c478bd9Sstevel@tonic-gate 4661*7c478bd9Sstevel@tonic-gate bp = pageio_setup(pp, io_len, ip->i_devvp, B_READ | B_ASYNC); 4662*7c478bd9Sstevel@tonic-gate bp->b_edev = ip->i_dev; 4663*7c478bd9Sstevel@tonic-gate bp->b_dev = cmpdev(ip->i_dev); 4664*7c478bd9Sstevel@tonic-gate bp->b_blkno = bn; 4665*7c478bd9Sstevel@tonic-gate bp->b_un.b_addr = (caddr_t)0; 4666*7c478bd9Sstevel@tonic-gate bp->b_file = ip->i_vnode; 4667*7c478bd9Sstevel@tonic-gate bp->b_offset = off; 4668*7c478bd9Sstevel@tonic-gate 4669*7c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_log) { 4670*7c478bd9Sstevel@tonic-gate lufs_read_strategy(ufsvfsp->vfs_log, bp); 4671*7c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_snapshot) { 4672*7c478bd9Sstevel@tonic-gate fssnap_strategy(&ufsvfsp->vfs_snapshot, bp); 4673*7c478bd9Sstevel@tonic-gate } else { 4674*7c478bd9Sstevel@tonic-gate ufsvfsp->vfs_iotstamp = lbolt; 4675*7c478bd9Sstevel@tonic-gate ub.ub_getras.value.ul++; 4676*7c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 4677*7c478bd9Sstevel@tonic-gate lwp_stat_update(LWP_STAT_INBLK, 1); 4678*7c478bd9Sstevel@tonic-gate } 4679*7c478bd9Sstevel@tonic-gate 4680*7c478bd9Sstevel@tonic-gate return (io_len); 4681*7c478bd9Sstevel@tonic-gate } 4682*7c478bd9Sstevel@tonic-gate 4683*7c478bd9Sstevel@tonic-gate int ufs_delay = 1; 4684*7c478bd9Sstevel@tonic-gate /* 4685*7c478bd9Sstevel@tonic-gate * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE, B_ASYNC} 4686*7c478bd9Sstevel@tonic-gate * 4687*7c478bd9Sstevel@tonic-gate * LMXXX - the inode really ought to contain a pointer to one of these 4688*7c478bd9Sstevel@tonic-gate * async args. Stuff gunk in there and just hand the whole mess off. 4689*7c478bd9Sstevel@tonic-gate * This would replace i_delaylen, i_delayoff. 4690*7c478bd9Sstevel@tonic-gate */ 4691*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4692*7c478bd9Sstevel@tonic-gate static int 4693*7c478bd9Sstevel@tonic-gate ufs_putpage(struct vnode *vp, offset_t off, size_t len, int flags, 4694*7c478bd9Sstevel@tonic-gate struct cred *cr) 4695*7c478bd9Sstevel@tonic-gate { 4696*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 4697*7c478bd9Sstevel@tonic-gate int err = 0; 4698*7c478bd9Sstevel@tonic-gate 4699*7c478bd9Sstevel@tonic-gate if (vp->v_count == 0) { 4700*7c478bd9Sstevel@tonic-gate return (ufs_fault(vp, "ufs_putpage: bad v_count == 0")); 4701*7c478bd9Sstevel@tonic-gate } 4702*7c478bd9Sstevel@tonic-gate 4703*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_PUTPAGE_START, 4704*7c478bd9Sstevel@tonic-gate "ufs_putpage_start:vp %p", vp); 4705*7c478bd9Sstevel@tonic-gate 4706*7c478bd9Sstevel@tonic-gate /* 4707*7c478bd9Sstevel@tonic-gate * XXX - Why should this check be made here? 4708*7c478bd9Sstevel@tonic-gate */ 4709*7c478bd9Sstevel@tonic-gate if (vp->v_flag & VNOMAP) { 4710*7c478bd9Sstevel@tonic-gate err = ENOSYS; 4711*7c478bd9Sstevel@tonic-gate goto errout; 4712*7c478bd9Sstevel@tonic-gate } 4713*7c478bd9Sstevel@tonic-gate 4714*7c478bd9Sstevel@tonic-gate if (ip->i_ufsvfs == NULL) { 4715*7c478bd9Sstevel@tonic-gate err = EIO; 4716*7c478bd9Sstevel@tonic-gate goto errout; 4717*7c478bd9Sstevel@tonic-gate } 4718*7c478bd9Sstevel@tonic-gate 4719*7c478bd9Sstevel@tonic-gate if (flags & B_ASYNC) { 4720*7c478bd9Sstevel@tonic-gate if (ufs_delay && len && 4721*7c478bd9Sstevel@tonic-gate (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) { 4722*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 4723*7c478bd9Sstevel@tonic-gate /* 4724*7c478bd9Sstevel@tonic-gate * If nobody stalled, start a new cluster. 4725*7c478bd9Sstevel@tonic-gate */ 4726*7c478bd9Sstevel@tonic-gate if (ip->i_delaylen == 0) { 4727*7c478bd9Sstevel@tonic-gate ip->i_delayoff = off; 4728*7c478bd9Sstevel@tonic-gate ip->i_delaylen = len; 4729*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 4730*7c478bd9Sstevel@tonic-gate goto errout; 4731*7c478bd9Sstevel@tonic-gate } 4732*7c478bd9Sstevel@tonic-gate /* 4733*7c478bd9Sstevel@tonic-gate * If we have a full cluster or they are not contig, 4734*7c478bd9Sstevel@tonic-gate * then push last cluster and start over. 4735*7c478bd9Sstevel@tonic-gate */ 4736*7c478bd9Sstevel@tonic-gate if (ip->i_delaylen >= CLUSTSZ(ip) || 4737*7c478bd9Sstevel@tonic-gate ip->i_delayoff + ip->i_delaylen != off) { 4738*7c478bd9Sstevel@tonic-gate u_offset_t doff; 4739*7c478bd9Sstevel@tonic-gate size_t dlen; 4740*7c478bd9Sstevel@tonic-gate 4741*7c478bd9Sstevel@tonic-gate doff = ip->i_delayoff; 4742*7c478bd9Sstevel@tonic-gate dlen = ip->i_delaylen; 4743*7c478bd9Sstevel@tonic-gate ip->i_delayoff = off; 4744*7c478bd9Sstevel@tonic-gate ip->i_delaylen = len; 4745*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 4746*7c478bd9Sstevel@tonic-gate err = ufs_putpages(vp, doff, dlen, 4747*7c478bd9Sstevel@tonic-gate flags, cr); 4748*7c478bd9Sstevel@tonic-gate /* LMXXX - flags are new val, not old */ 4749*7c478bd9Sstevel@tonic-gate goto errout; 4750*7c478bd9Sstevel@tonic-gate } 4751*7c478bd9Sstevel@tonic-gate /* 4752*7c478bd9Sstevel@tonic-gate * There is something there, it's not full, and 4753*7c478bd9Sstevel@tonic-gate * it is contig. 4754*7c478bd9Sstevel@tonic-gate */ 4755*7c478bd9Sstevel@tonic-gate ip->i_delaylen += len; 4756*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 4757*7c478bd9Sstevel@tonic-gate goto errout; 4758*7c478bd9Sstevel@tonic-gate } 4759*7c478bd9Sstevel@tonic-gate /* 4760*7c478bd9Sstevel@tonic-gate * Must have weird flags or we are not clustering. 4761*7c478bd9Sstevel@tonic-gate */ 4762*7c478bd9Sstevel@tonic-gate } 4763*7c478bd9Sstevel@tonic-gate 4764*7c478bd9Sstevel@tonic-gate err = ufs_putpages(vp, off, len, flags, cr); 4765*7c478bd9Sstevel@tonic-gate 4766*7c478bd9Sstevel@tonic-gate errout: 4767*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_PUTPAGE_END, 4768*7c478bd9Sstevel@tonic-gate "ufs_putpage_end:vp %p error %d", vp, err); 4769*7c478bd9Sstevel@tonic-gate return (err); 4770*7c478bd9Sstevel@tonic-gate } 4771*7c478bd9Sstevel@tonic-gate 4772*7c478bd9Sstevel@tonic-gate /* 4773*7c478bd9Sstevel@tonic-gate * If len == 0, do from off to EOF. 4774*7c478bd9Sstevel@tonic-gate * 4775*7c478bd9Sstevel@tonic-gate * The normal cases should be len == 0 & off == 0 (entire vp list), 4776*7c478bd9Sstevel@tonic-gate * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE 4777*7c478bd9Sstevel@tonic-gate * (from pageout). 4778*7c478bd9Sstevel@tonic-gate */ 4779*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4780*7c478bd9Sstevel@tonic-gate static int 4781*7c478bd9Sstevel@tonic-gate ufs_putpages( 4782*7c478bd9Sstevel@tonic-gate struct vnode *vp, 4783*7c478bd9Sstevel@tonic-gate offset_t off, 4784*7c478bd9Sstevel@tonic-gate size_t len, 4785*7c478bd9Sstevel@tonic-gate int flags, 4786*7c478bd9Sstevel@tonic-gate struct cred *cr) 4787*7c478bd9Sstevel@tonic-gate { 4788*7c478bd9Sstevel@tonic-gate u_offset_t io_off; 4789*7c478bd9Sstevel@tonic-gate u_offset_t eoff; 4790*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 4791*7c478bd9Sstevel@tonic-gate page_t *pp; 4792*7c478bd9Sstevel@tonic-gate size_t io_len; 4793*7c478bd9Sstevel@tonic-gate int err = 0; 4794*7c478bd9Sstevel@tonic-gate int dolock; 4795*7c478bd9Sstevel@tonic-gate 4796*7c478bd9Sstevel@tonic-gate if (vp->v_count == 0) 4797*7c478bd9Sstevel@tonic-gate return (ufs_fault(vp, "ufs_putpages: v_count == 0")); 4798*7c478bd9Sstevel@tonic-gate /* 4799*7c478bd9Sstevel@tonic-gate * Acquire the readers/write inode lock before locking 4800*7c478bd9Sstevel@tonic-gate * any pages in this inode. 4801*7c478bd9Sstevel@tonic-gate * The inode lock is held during i/o. 4802*7c478bd9Sstevel@tonic-gate */ 4803*7c478bd9Sstevel@tonic-gate if (len == 0) { 4804*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 4805*7c478bd9Sstevel@tonic-gate ip->i_delayoff = ip->i_delaylen = 0; 4806*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 4807*7c478bd9Sstevel@tonic-gate } 4808*7c478bd9Sstevel@tonic-gate dolock = (rw_owner(&ip->i_contents) != curthread); 4809*7c478bd9Sstevel@tonic-gate if (dolock) { 4810*7c478bd9Sstevel@tonic-gate /* 4811*7c478bd9Sstevel@tonic-gate * Must synchronize this thread and any possible thread 4812*7c478bd9Sstevel@tonic-gate * operating in the window of vulnerability in wrip(). 4813*7c478bd9Sstevel@tonic-gate * It is dangerous to allow both a thread doing a putpage 4814*7c478bd9Sstevel@tonic-gate * and a thread writing, so serialize them. The exception 4815*7c478bd9Sstevel@tonic-gate * is when the thread in wrip() does something which causes 4816*7c478bd9Sstevel@tonic-gate * a putpage operation. Then, the thread must be allowed 4817*7c478bd9Sstevel@tonic-gate * to continue. It may encounter a bmap_read problem in 4818*7c478bd9Sstevel@tonic-gate * ufs_putapage, but that is handled in ufs_putapage. 4819*7c478bd9Sstevel@tonic-gate * Allow async writers to proceed, we don't want to block 4820*7c478bd9Sstevel@tonic-gate * the pageout daemon. 4821*7c478bd9Sstevel@tonic-gate */ 4822*7c478bd9Sstevel@tonic-gate if (ip->i_writer == curthread) 4823*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 4824*7c478bd9Sstevel@tonic-gate else { 4825*7c478bd9Sstevel@tonic-gate for (;;) { 4826*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 4827*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 4828*7c478bd9Sstevel@tonic-gate /* 4829*7c478bd9Sstevel@tonic-gate * If there is no thread in the critical 4830*7c478bd9Sstevel@tonic-gate * section of wrip(), then proceed. 4831*7c478bd9Sstevel@tonic-gate * Otherwise, wait until there isn't one. 4832*7c478bd9Sstevel@tonic-gate */ 4833*7c478bd9Sstevel@tonic-gate if (ip->i_writer == NULL) { 4834*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 4835*7c478bd9Sstevel@tonic-gate break; 4836*7c478bd9Sstevel@tonic-gate } 4837*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 4838*7c478bd9Sstevel@tonic-gate /* 4839*7c478bd9Sstevel@tonic-gate * Bounce async writers when we have a writer 4840*7c478bd9Sstevel@tonic-gate * working on this file so we don't deadlock 4841*7c478bd9Sstevel@tonic-gate * the pageout daemon. 4842*7c478bd9Sstevel@tonic-gate */ 4843*7c478bd9Sstevel@tonic-gate if (flags & B_ASYNC) { 4844*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 4845*7c478bd9Sstevel@tonic-gate return (0); 4846*7c478bd9Sstevel@tonic-gate } 4847*7c478bd9Sstevel@tonic-gate cv_wait(&ip->i_wrcv, &ip->i_tlock); 4848*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 4849*7c478bd9Sstevel@tonic-gate } 4850*7c478bd9Sstevel@tonic-gate } 4851*7c478bd9Sstevel@tonic-gate } 4852*7c478bd9Sstevel@tonic-gate 4853*7c478bd9Sstevel@tonic-gate if (!vn_has_cached_data(vp)) { 4854*7c478bd9Sstevel@tonic-gate if (dolock) 4855*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 4856*7c478bd9Sstevel@tonic-gate return (0); 4857*7c478bd9Sstevel@tonic-gate } 4858*7c478bd9Sstevel@tonic-gate 4859*7c478bd9Sstevel@tonic-gate if (len == 0) { 4860*7c478bd9Sstevel@tonic-gate /* 4861*7c478bd9Sstevel@tonic-gate * Search the entire vp list for pages >= off. 4862*7c478bd9Sstevel@tonic-gate */ 4863*7c478bd9Sstevel@tonic-gate err = pvn_vplist_dirty(vp, (u_offset_t)off, ufs_putapage, 4864*7c478bd9Sstevel@tonic-gate flags, cr); 4865*7c478bd9Sstevel@tonic-gate } else { 4866*7c478bd9Sstevel@tonic-gate /* 4867*7c478bd9Sstevel@tonic-gate * Loop over all offsets in the range looking for 4868*7c478bd9Sstevel@tonic-gate * pages to deal with. 4869*7c478bd9Sstevel@tonic-gate */ 4870*7c478bd9Sstevel@tonic-gate if ((eoff = blkroundup(ip->i_fs, ip->i_size)) != 0) 4871*7c478bd9Sstevel@tonic-gate eoff = MIN(off + len, eoff); 4872*7c478bd9Sstevel@tonic-gate else 4873*7c478bd9Sstevel@tonic-gate eoff = off + len; 4874*7c478bd9Sstevel@tonic-gate 4875*7c478bd9Sstevel@tonic-gate for (io_off = off; io_off < eoff; io_off += io_len) { 4876*7c478bd9Sstevel@tonic-gate /* 4877*7c478bd9Sstevel@tonic-gate * If we are not invalidating, synchronously 4878*7c478bd9Sstevel@tonic-gate * freeing or writing pages, use the routine 4879*7c478bd9Sstevel@tonic-gate * page_lookup_nowait() to prevent reclaiming 4880*7c478bd9Sstevel@tonic-gate * them from the free list. 4881*7c478bd9Sstevel@tonic-gate */ 4882*7c478bd9Sstevel@tonic-gate if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 4883*7c478bd9Sstevel@tonic-gate pp = page_lookup(vp, io_off, 4884*7c478bd9Sstevel@tonic-gate (flags & (B_INVAL | B_FREE)) ? 4885*7c478bd9Sstevel@tonic-gate SE_EXCL : SE_SHARED); 4886*7c478bd9Sstevel@tonic-gate } else { 4887*7c478bd9Sstevel@tonic-gate pp = page_lookup_nowait(vp, io_off, 4888*7c478bd9Sstevel@tonic-gate (flags & B_FREE) ? SE_EXCL : SE_SHARED); 4889*7c478bd9Sstevel@tonic-gate } 4890*7c478bd9Sstevel@tonic-gate 4891*7c478bd9Sstevel@tonic-gate if (pp == NULL || pvn_getdirty(pp, flags) == 0) 4892*7c478bd9Sstevel@tonic-gate io_len = PAGESIZE; 4893*7c478bd9Sstevel@tonic-gate else { 4894*7c478bd9Sstevel@tonic-gate u_offset_t *io_offp = &io_off; 4895*7c478bd9Sstevel@tonic-gate 4896*7c478bd9Sstevel@tonic-gate err = ufs_putapage(vp, pp, io_offp, &io_len, 4897*7c478bd9Sstevel@tonic-gate flags, cr); 4898*7c478bd9Sstevel@tonic-gate if (err != 0) 4899*7c478bd9Sstevel@tonic-gate break; 4900*7c478bd9Sstevel@tonic-gate /* 4901*7c478bd9Sstevel@tonic-gate * "io_off" and "io_len" are returned as 4902*7c478bd9Sstevel@tonic-gate * the range of pages we actually wrote. 4903*7c478bd9Sstevel@tonic-gate * This allows us to skip ahead more quickly 4904*7c478bd9Sstevel@tonic-gate * since several pages may've been dealt 4905*7c478bd9Sstevel@tonic-gate * with by this iteration of the loop. 4906*7c478bd9Sstevel@tonic-gate */ 4907*7c478bd9Sstevel@tonic-gate } 4908*7c478bd9Sstevel@tonic-gate } 4909*7c478bd9Sstevel@tonic-gate } 4910*7c478bd9Sstevel@tonic-gate if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) { 4911*7c478bd9Sstevel@tonic-gate /* 4912*7c478bd9Sstevel@tonic-gate * We have just sync'ed back all the pages on 4913*7c478bd9Sstevel@tonic-gate * the inode, turn off the IMODTIME flag. 4914*7c478bd9Sstevel@tonic-gate */ 4915*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 4916*7c478bd9Sstevel@tonic-gate ip->i_flag &= ~IMODTIME; 4917*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 4918*7c478bd9Sstevel@tonic-gate } 4919*7c478bd9Sstevel@tonic-gate if (dolock) 4920*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 4921*7c478bd9Sstevel@tonic-gate return (err); 4922*7c478bd9Sstevel@tonic-gate } 4923*7c478bd9Sstevel@tonic-gate 4924*7c478bd9Sstevel@tonic-gate static void 4925*7c478bd9Sstevel@tonic-gate ufs_iodone(buf_t *bp) 4926*7c478bd9Sstevel@tonic-gate { 4927*7c478bd9Sstevel@tonic-gate struct inode *ip; 4928*7c478bd9Sstevel@tonic-gate 4929*7c478bd9Sstevel@tonic-gate ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ)); 4930*7c478bd9Sstevel@tonic-gate 4931*7c478bd9Sstevel@tonic-gate bp->b_iodone = NULL; 4932*7c478bd9Sstevel@tonic-gate 4933*7c478bd9Sstevel@tonic-gate ip = VTOI(bp->b_pages->p_vnode); 4934*7c478bd9Sstevel@tonic-gate 4935*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 4936*7c478bd9Sstevel@tonic-gate if (ip->i_writes >= ufs_LW) { 4937*7c478bd9Sstevel@tonic-gate if ((ip->i_writes -= bp->b_bcount) <= ufs_LW) 4938*7c478bd9Sstevel@tonic-gate if (ufs_WRITES) 4939*7c478bd9Sstevel@tonic-gate cv_broadcast(&ip->i_wrcv); /* wake all up */ 4940*7c478bd9Sstevel@tonic-gate } else { 4941*7c478bd9Sstevel@tonic-gate ip->i_writes -= bp->b_bcount; 4942*7c478bd9Sstevel@tonic-gate } 4943*7c478bd9Sstevel@tonic-gate 4944*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 4945*7c478bd9Sstevel@tonic-gate iodone(bp); 4946*7c478bd9Sstevel@tonic-gate } 4947*7c478bd9Sstevel@tonic-gate 4948*7c478bd9Sstevel@tonic-gate /* 4949*7c478bd9Sstevel@tonic-gate * Write out a single page, possibly klustering adjacent 4950*7c478bd9Sstevel@tonic-gate * dirty pages. The inode lock must be held. 4951*7c478bd9Sstevel@tonic-gate * 4952*7c478bd9Sstevel@tonic-gate * LMXXX - bsize < pagesize not done. 4953*7c478bd9Sstevel@tonic-gate */ 4954*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4955*7c478bd9Sstevel@tonic-gate int 4956*7c478bd9Sstevel@tonic-gate ufs_putapage( 4957*7c478bd9Sstevel@tonic-gate struct vnode *vp, 4958*7c478bd9Sstevel@tonic-gate page_t *pp, 4959*7c478bd9Sstevel@tonic-gate u_offset_t *offp, 4960*7c478bd9Sstevel@tonic-gate size_t *lenp, /* return values */ 4961*7c478bd9Sstevel@tonic-gate int flags, 4962*7c478bd9Sstevel@tonic-gate struct cred *cr) 4963*7c478bd9Sstevel@tonic-gate { 4964*7c478bd9Sstevel@tonic-gate u_offset_t io_off; 4965*7c478bd9Sstevel@tonic-gate u_offset_t off; 4966*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 4967*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 4968*7c478bd9Sstevel@tonic-gate struct fs *fs; 4969*7c478bd9Sstevel@tonic-gate struct buf *bp; 4970*7c478bd9Sstevel@tonic-gate size_t io_len; 4971*7c478bd9Sstevel@tonic-gate daddr_t bn; 4972*7c478bd9Sstevel@tonic-gate int err; 4973*7c478bd9Sstevel@tonic-gate int contig; 4974*7c478bd9Sstevel@tonic-gate 4975*7c478bd9Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&ip->i_contents)); 4976*7c478bd9Sstevel@tonic-gate 4977*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_PUTAPAGE_START, 4978*7c478bd9Sstevel@tonic-gate "ufs_putapage_start:vp %p", vp); 4979*7c478bd9Sstevel@tonic-gate 4980*7c478bd9Sstevel@tonic-gate if (ufsvfsp == NULL) { 4981*7c478bd9Sstevel@tonic-gate err = EIO; 4982*7c478bd9Sstevel@tonic-gate goto out_trace; 4983*7c478bd9Sstevel@tonic-gate } 4984*7c478bd9Sstevel@tonic-gate 4985*7c478bd9Sstevel@tonic-gate fs = ip->i_fs; 4986*7c478bd9Sstevel@tonic-gate ASSERT(fs->fs_ronly == 0); 4987*7c478bd9Sstevel@tonic-gate 4988*7c478bd9Sstevel@tonic-gate /* 4989*7c478bd9Sstevel@tonic-gate * If the modified time on the inode has not already been 4990*7c478bd9Sstevel@tonic-gate * set elsewhere (e.g. for write/setattr) we set the time now. 4991*7c478bd9Sstevel@tonic-gate * This gives us approximate modified times for mmap'ed files 4992*7c478bd9Sstevel@tonic-gate * which are modified via stores in the user address space. 4993*7c478bd9Sstevel@tonic-gate */ 4994*7c478bd9Sstevel@tonic-gate if ((ip->i_flag & IMODTIME) == 0) { 4995*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 4996*7c478bd9Sstevel@tonic-gate ip->i_flag |= IUPD; 4997*7c478bd9Sstevel@tonic-gate ip->i_seq++; 4998*7c478bd9Sstevel@tonic-gate ITIMES_NOLOCK(ip); 4999*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 5000*7c478bd9Sstevel@tonic-gate } 5001*7c478bd9Sstevel@tonic-gate 5002*7c478bd9Sstevel@tonic-gate /* 5003*7c478bd9Sstevel@tonic-gate * Align the request to a block boundry (for old file systems), 5004*7c478bd9Sstevel@tonic-gate * and go ask bmap() how contiguous things are for this file. 5005*7c478bd9Sstevel@tonic-gate */ 5006*7c478bd9Sstevel@tonic-gate off = pp->p_offset & (offset_t)fs->fs_bmask; /* block align it */ 5007*7c478bd9Sstevel@tonic-gate contig = 0; 5008*7c478bd9Sstevel@tonic-gate err = bmap_read(ip, off, &bn, &contig); 5009*7c478bd9Sstevel@tonic-gate if (err) 5010*7c478bd9Sstevel@tonic-gate goto out; 5011*7c478bd9Sstevel@tonic-gate if (bn == UFS_HOLE) { /* putpage never allocates */ 5012*7c478bd9Sstevel@tonic-gate /* 5013*7c478bd9Sstevel@tonic-gate * logging device is in error mode; simply return EIO 5014*7c478bd9Sstevel@tonic-gate */ 5015*7c478bd9Sstevel@tonic-gate if (TRANS_ISERROR(ufsvfsp)) { 5016*7c478bd9Sstevel@tonic-gate err = EIO; 5017*7c478bd9Sstevel@tonic-gate goto out; 5018*7c478bd9Sstevel@tonic-gate } 5019*7c478bd9Sstevel@tonic-gate /* 5020*7c478bd9Sstevel@tonic-gate * Oops, the thread in the window in wrip() did some 5021*7c478bd9Sstevel@tonic-gate * sort of operation which caused a putpage in the bad 5022*7c478bd9Sstevel@tonic-gate * range. In this case, just return an error which will 5023*7c478bd9Sstevel@tonic-gate * cause the software modified bit on the page to set 5024*7c478bd9Sstevel@tonic-gate * and the page will get written out again later. 5025*7c478bd9Sstevel@tonic-gate */ 5026*7c478bd9Sstevel@tonic-gate if (ip->i_writer == curthread) { 5027*7c478bd9Sstevel@tonic-gate err = EIO; 5028*7c478bd9Sstevel@tonic-gate goto out; 5029*7c478bd9Sstevel@tonic-gate } 5030*7c478bd9Sstevel@tonic-gate /* 5031*7c478bd9Sstevel@tonic-gate * If the pager is trying to push a page in the bad range 5032*7c478bd9Sstevel@tonic-gate * just tell him to try again later when things are better. 5033*7c478bd9Sstevel@tonic-gate */ 5034*7c478bd9Sstevel@tonic-gate if (flags & B_ASYNC) { 5035*7c478bd9Sstevel@tonic-gate err = EAGAIN; 5036*7c478bd9Sstevel@tonic-gate goto out; 5037*7c478bd9Sstevel@tonic-gate } 5038*7c478bd9Sstevel@tonic-gate err = ufs_fault(ITOV(ip), "ufs_putapage: bn == UFS_HOLE"); 5039*7c478bd9Sstevel@tonic-gate goto out; 5040*7c478bd9Sstevel@tonic-gate } 5041*7c478bd9Sstevel@tonic-gate 5042*7c478bd9Sstevel@tonic-gate /* 5043*7c478bd9Sstevel@tonic-gate * Take the length (of contiguous bytes) passed back from bmap() 5044*7c478bd9Sstevel@tonic-gate * and _try_ and get a set of pages covering that extent. 5045*7c478bd9Sstevel@tonic-gate */ 5046*7c478bd9Sstevel@tonic-gate pp = pvn_write_kluster(vp, pp, &io_off, &io_len, off, contig, flags); 5047*7c478bd9Sstevel@tonic-gate 5048*7c478bd9Sstevel@tonic-gate /* 5049*7c478bd9Sstevel@tonic-gate * May have run out of memory and not clustered backwards. 5050*7c478bd9Sstevel@tonic-gate * off p_offset 5051*7c478bd9Sstevel@tonic-gate * [ pp - 1 ][ pp ] 5052*7c478bd9Sstevel@tonic-gate * [ block ] 5053*7c478bd9Sstevel@tonic-gate * We told bmap off, so we have to adjust the bn accordingly. 5054*7c478bd9Sstevel@tonic-gate */ 5055*7c478bd9Sstevel@tonic-gate if (io_off > off) { 5056*7c478bd9Sstevel@tonic-gate bn += btod(io_off - off); 5057*7c478bd9Sstevel@tonic-gate contig -= (io_off - off); 5058*7c478bd9Sstevel@tonic-gate } 5059*7c478bd9Sstevel@tonic-gate 5060*7c478bd9Sstevel@tonic-gate /* 5061*7c478bd9Sstevel@tonic-gate * bmap was carefull to tell us the right size so use that. 5062*7c478bd9Sstevel@tonic-gate * There might be unallocated frags at the end. 5063*7c478bd9Sstevel@tonic-gate * LMXXX - bzero the end of the page? We must be writing after EOF. 5064*7c478bd9Sstevel@tonic-gate */ 5065*7c478bd9Sstevel@tonic-gate if (io_len > contig) { 5066*7c478bd9Sstevel@tonic-gate ASSERT(io_len - contig < fs->fs_bsize); 5067*7c478bd9Sstevel@tonic-gate io_len -= (io_len - contig); 5068*7c478bd9Sstevel@tonic-gate } 5069*7c478bd9Sstevel@tonic-gate 5070*7c478bd9Sstevel@tonic-gate /* 5071*7c478bd9Sstevel@tonic-gate * Handle the case where we are writing the last page after EOF. 5072*7c478bd9Sstevel@tonic-gate * 5073*7c478bd9Sstevel@tonic-gate * XXX - just a patch for i-mt3. 5074*7c478bd9Sstevel@tonic-gate */ 5075*7c478bd9Sstevel@tonic-gate if (io_len == 0) { 5076*7c478bd9Sstevel@tonic-gate ASSERT(pp->p_offset >= (u_offset_t)(roundup(ip->i_size, 5077*7c478bd9Sstevel@tonic-gate PAGESIZE))); 5078*7c478bd9Sstevel@tonic-gate io_len = PAGESIZE; 5079*7c478bd9Sstevel@tonic-gate } 5080*7c478bd9Sstevel@tonic-gate 5081*7c478bd9Sstevel@tonic-gate bp = pageio_setup(pp, io_len, ip->i_devvp, B_WRITE | flags); 5082*7c478bd9Sstevel@tonic-gate 5083*7c478bd9Sstevel@tonic-gate ULOCKFS_SET_MOD(ITOUL(ip)); 5084*7c478bd9Sstevel@tonic-gate 5085*7c478bd9Sstevel@tonic-gate bp->b_edev = ip->i_dev; 5086*7c478bd9Sstevel@tonic-gate bp->b_dev = cmpdev(ip->i_dev); 5087*7c478bd9Sstevel@tonic-gate bp->b_blkno = bn; 5088*7c478bd9Sstevel@tonic-gate bp->b_un.b_addr = (caddr_t)0; 5089*7c478bd9Sstevel@tonic-gate bp->b_file = ip->i_vnode; 5090*7c478bd9Sstevel@tonic-gate 5091*7c478bd9Sstevel@tonic-gate if (TRANS_ISTRANS(ufsvfsp)) { 5092*7c478bd9Sstevel@tonic-gate if ((ip->i_mode & IFMT) == IFSHAD) { 5093*7c478bd9Sstevel@tonic-gate TRANS_BUF(ufsvfsp, 0, io_len, bp, DT_SHAD); 5094*7c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_qinod == ip) { 5095*7c478bd9Sstevel@tonic-gate TRANS_DELTA(ufsvfsp, ldbtob(bn), bp->b_bcount, DT_QR, 5096*7c478bd9Sstevel@tonic-gate 0, 0); 5097*7c478bd9Sstevel@tonic-gate } 5098*7c478bd9Sstevel@tonic-gate } 5099*7c478bd9Sstevel@tonic-gate 5100*7c478bd9Sstevel@tonic-gate /* write throttle */ 5101*7c478bd9Sstevel@tonic-gate 5102*7c478bd9Sstevel@tonic-gate ASSERT(bp->b_iodone == NULL); 5103*7c478bd9Sstevel@tonic-gate bp->b_iodone = (int (*)())ufs_iodone; 5104*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 5105*7c478bd9Sstevel@tonic-gate ip->i_writes += bp->b_bcount; 5106*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 5107*7c478bd9Sstevel@tonic-gate 5108*7c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ASYNC) { 5109*7c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_log) { 5110*7c478bd9Sstevel@tonic-gate lufs_write_strategy(ufsvfsp->vfs_log, bp); 5111*7c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_snapshot) { 5112*7c478bd9Sstevel@tonic-gate fssnap_strategy(&ufsvfsp->vfs_snapshot, bp); 5113*7c478bd9Sstevel@tonic-gate } else { 5114*7c478bd9Sstevel@tonic-gate ufsvfsp->vfs_iotstamp = lbolt; 5115*7c478bd9Sstevel@tonic-gate ub.ub_putasyncs.value.ul++; 5116*7c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 5117*7c478bd9Sstevel@tonic-gate lwp_stat_update(LWP_STAT_OUBLK, 1); 5118*7c478bd9Sstevel@tonic-gate } 5119*7c478bd9Sstevel@tonic-gate } else { 5120*7c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_log) { 5121*7c478bd9Sstevel@tonic-gate lufs_write_strategy(ufsvfsp->vfs_log, bp); 5122*7c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_snapshot) { 5123*7c478bd9Sstevel@tonic-gate fssnap_strategy(&ufsvfsp->vfs_snapshot, bp); 5124*7c478bd9Sstevel@tonic-gate } else { 5125*7c478bd9Sstevel@tonic-gate ufsvfsp->vfs_iotstamp = lbolt; 5126*7c478bd9Sstevel@tonic-gate ub.ub_putsyncs.value.ul++; 5127*7c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 5128*7c478bd9Sstevel@tonic-gate lwp_stat_update(LWP_STAT_OUBLK, 1); 5129*7c478bd9Sstevel@tonic-gate } 5130*7c478bd9Sstevel@tonic-gate err = biowait(bp); 5131*7c478bd9Sstevel@tonic-gate pageio_done(bp); 5132*7c478bd9Sstevel@tonic-gate pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags); 5133*7c478bd9Sstevel@tonic-gate } 5134*7c478bd9Sstevel@tonic-gate 5135*7c478bd9Sstevel@tonic-gate pp = NULL; 5136*7c478bd9Sstevel@tonic-gate 5137*7c478bd9Sstevel@tonic-gate out: 5138*7c478bd9Sstevel@tonic-gate if (err != 0 && pp != NULL) 5139*7c478bd9Sstevel@tonic-gate pvn_write_done(pp, B_ERROR | B_WRITE | flags); 5140*7c478bd9Sstevel@tonic-gate 5141*7c478bd9Sstevel@tonic-gate if (offp) 5142*7c478bd9Sstevel@tonic-gate *offp = io_off; 5143*7c478bd9Sstevel@tonic-gate if (lenp) 5144*7c478bd9Sstevel@tonic-gate *lenp = io_len; 5145*7c478bd9Sstevel@tonic-gate out_trace: 5146*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_PUTAPAGE_END, 5147*7c478bd9Sstevel@tonic-gate "ufs_putapage_end:vp %p error %d", vp, err); 5148*7c478bd9Sstevel@tonic-gate return (err); 5149*7c478bd9Sstevel@tonic-gate } 5150*7c478bd9Sstevel@tonic-gate 5151*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 5152*7c478bd9Sstevel@tonic-gate static int 5153*7c478bd9Sstevel@tonic-gate ufs_map(struct vnode *vp, 5154*7c478bd9Sstevel@tonic-gate offset_t off, 5155*7c478bd9Sstevel@tonic-gate struct as *as, 5156*7c478bd9Sstevel@tonic-gate caddr_t *addrp, 5157*7c478bd9Sstevel@tonic-gate size_t len, 5158*7c478bd9Sstevel@tonic-gate uchar_t prot, 5159*7c478bd9Sstevel@tonic-gate uchar_t maxprot, 5160*7c478bd9Sstevel@tonic-gate uint_t flags, 5161*7c478bd9Sstevel@tonic-gate struct cred *cr) 5162*7c478bd9Sstevel@tonic-gate { 5163*7c478bd9Sstevel@tonic-gate struct segvn_crargs vn_a; 5164*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = VTOI(vp)->i_ufsvfs; 5165*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 5166*7c478bd9Sstevel@tonic-gate int error; 5167*7c478bd9Sstevel@tonic-gate 5168*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_MAP_START, 5169*7c478bd9Sstevel@tonic-gate "ufs_map_start:vp %p", vp); 5170*7c478bd9Sstevel@tonic-gate 5171*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_MAP_MASK); 5172*7c478bd9Sstevel@tonic-gate if (error) 5173*7c478bd9Sstevel@tonic-gate goto out; 5174*7c478bd9Sstevel@tonic-gate 5175*7c478bd9Sstevel@tonic-gate if (vp->v_flag & VNOMAP) { 5176*7c478bd9Sstevel@tonic-gate error = ENOSYS; 5177*7c478bd9Sstevel@tonic-gate goto unlock; 5178*7c478bd9Sstevel@tonic-gate } 5179*7c478bd9Sstevel@tonic-gate 5180*7c478bd9Sstevel@tonic-gate if (off < (offset_t)0 || (offset_t)(off + len) < (offset_t)0) { 5181*7c478bd9Sstevel@tonic-gate error = ENXIO; 5182*7c478bd9Sstevel@tonic-gate goto unlock; 5183*7c478bd9Sstevel@tonic-gate } 5184*7c478bd9Sstevel@tonic-gate 5185*7c478bd9Sstevel@tonic-gate if (vp->v_type != VREG) { 5186*7c478bd9Sstevel@tonic-gate error = ENODEV; 5187*7c478bd9Sstevel@tonic-gate goto unlock; 5188*7c478bd9Sstevel@tonic-gate } 5189*7c478bd9Sstevel@tonic-gate 5190*7c478bd9Sstevel@tonic-gate /* 5191*7c478bd9Sstevel@tonic-gate * If file is being locked, disallow mapping. 5192*7c478bd9Sstevel@tonic-gate */ 5193*7c478bd9Sstevel@tonic-gate if (vn_has_mandatory_locks(vp, VTOI(vp)->i_mode)) { 5194*7c478bd9Sstevel@tonic-gate error = EAGAIN; 5195*7c478bd9Sstevel@tonic-gate goto unlock; 5196*7c478bd9Sstevel@tonic-gate } 5197*7c478bd9Sstevel@tonic-gate 5198*7c478bd9Sstevel@tonic-gate as_rangelock(as); 5199*7c478bd9Sstevel@tonic-gate if ((flags & MAP_FIXED) == 0) { 5200*7c478bd9Sstevel@tonic-gate map_addr(addrp, len, off, 1, flags); 5201*7c478bd9Sstevel@tonic-gate if (*addrp == NULL) { 5202*7c478bd9Sstevel@tonic-gate as_rangeunlock(as); 5203*7c478bd9Sstevel@tonic-gate error = ENOMEM; 5204*7c478bd9Sstevel@tonic-gate goto unlock; 5205*7c478bd9Sstevel@tonic-gate } 5206*7c478bd9Sstevel@tonic-gate } else { 5207*7c478bd9Sstevel@tonic-gate /* 5208*7c478bd9Sstevel@tonic-gate * User specified address - blow away any previous mappings 5209*7c478bd9Sstevel@tonic-gate */ 5210*7c478bd9Sstevel@tonic-gate (void) as_unmap(as, *addrp, len); 5211*7c478bd9Sstevel@tonic-gate } 5212*7c478bd9Sstevel@tonic-gate 5213*7c478bd9Sstevel@tonic-gate vn_a.vp = vp; 5214*7c478bd9Sstevel@tonic-gate vn_a.offset = (u_offset_t)off; 5215*7c478bd9Sstevel@tonic-gate vn_a.type = flags & MAP_TYPE; 5216*7c478bd9Sstevel@tonic-gate vn_a.prot = prot; 5217*7c478bd9Sstevel@tonic-gate vn_a.maxprot = maxprot; 5218*7c478bd9Sstevel@tonic-gate vn_a.cred = cr; 5219*7c478bd9Sstevel@tonic-gate vn_a.amp = NULL; 5220*7c478bd9Sstevel@tonic-gate vn_a.flags = flags & ~MAP_TYPE; 5221*7c478bd9Sstevel@tonic-gate vn_a.szc = 0; 5222*7c478bd9Sstevel@tonic-gate vn_a.lgrp_mem_policy_flags = 0; 5223*7c478bd9Sstevel@tonic-gate 5224*7c478bd9Sstevel@tonic-gate error = as_map(as, *addrp, len, segvn_create, &vn_a); 5225*7c478bd9Sstevel@tonic-gate as_rangeunlock(as); 5226*7c478bd9Sstevel@tonic-gate 5227*7c478bd9Sstevel@tonic-gate unlock: 5228*7c478bd9Sstevel@tonic-gate if (ulp) { 5229*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 5230*7c478bd9Sstevel@tonic-gate } 5231*7c478bd9Sstevel@tonic-gate out: 5232*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_UFS, TR_UFS_MAP_END, 5233*7c478bd9Sstevel@tonic-gate "ufs_map_end:vp %p error %d", vp, error); 5234*7c478bd9Sstevel@tonic-gate return (error); 5235*7c478bd9Sstevel@tonic-gate } 5236*7c478bd9Sstevel@tonic-gate 5237*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 5238*7c478bd9Sstevel@tonic-gate static int 5239*7c478bd9Sstevel@tonic-gate ufs_addmap(struct vnode *vp, 5240*7c478bd9Sstevel@tonic-gate offset_t off, 5241*7c478bd9Sstevel@tonic-gate struct as *as, 5242*7c478bd9Sstevel@tonic-gate caddr_t addr, 5243*7c478bd9Sstevel@tonic-gate size_t len, 5244*7c478bd9Sstevel@tonic-gate uchar_t prot, 5245*7c478bd9Sstevel@tonic-gate uchar_t maxprot, 5246*7c478bd9Sstevel@tonic-gate uint_t flags, 5247*7c478bd9Sstevel@tonic-gate struct cred *cr) 5248*7c478bd9Sstevel@tonic-gate { 5249*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 5250*7c478bd9Sstevel@tonic-gate 5251*7c478bd9Sstevel@tonic-gate if (vp->v_flag & VNOMAP) { 5252*7c478bd9Sstevel@tonic-gate return (ENOSYS); 5253*7c478bd9Sstevel@tonic-gate } 5254*7c478bd9Sstevel@tonic-gate 5255*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 5256*7c478bd9Sstevel@tonic-gate ip->i_mapcnt += btopr(len); 5257*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 5258*7c478bd9Sstevel@tonic-gate return (0); 5259*7c478bd9Sstevel@tonic-gate } 5260*7c478bd9Sstevel@tonic-gate 5261*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 5262*7c478bd9Sstevel@tonic-gate static int 5263*7c478bd9Sstevel@tonic-gate ufs_delmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr, 5264*7c478bd9Sstevel@tonic-gate size_t len, uint_t prot, uint_t maxprot, uint_t flags, 5265*7c478bd9Sstevel@tonic-gate struct cred *cr) 5266*7c478bd9Sstevel@tonic-gate { 5267*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 5268*7c478bd9Sstevel@tonic-gate 5269*7c478bd9Sstevel@tonic-gate if (vp->v_flag & VNOMAP) { 5270*7c478bd9Sstevel@tonic-gate return (ENOSYS); 5271*7c478bd9Sstevel@tonic-gate } 5272*7c478bd9Sstevel@tonic-gate 5273*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 5274*7c478bd9Sstevel@tonic-gate ip->i_mapcnt -= btopr(len); /* Count released mappings */ 5275*7c478bd9Sstevel@tonic-gate ASSERT(ip->i_mapcnt >= 0); 5276*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 5277*7c478bd9Sstevel@tonic-gate return (0); 5278*7c478bd9Sstevel@tonic-gate } 5279*7c478bd9Sstevel@tonic-gate /* 5280*7c478bd9Sstevel@tonic-gate * Return the answer requested to poll() for non-device files 5281*7c478bd9Sstevel@tonic-gate */ 5282*7c478bd9Sstevel@tonic-gate struct pollhead ufs_pollhd; 5283*7c478bd9Sstevel@tonic-gate 5284*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 5285*7c478bd9Sstevel@tonic-gate int 5286*7c478bd9Sstevel@tonic-gate ufs_poll(vnode_t *vp, short ev, int any, short *revp, struct pollhead **phpp) 5287*7c478bd9Sstevel@tonic-gate { 5288*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 5289*7c478bd9Sstevel@tonic-gate 5290*7c478bd9Sstevel@tonic-gate *revp = 0; 5291*7c478bd9Sstevel@tonic-gate ufsvfsp = VTOI(vp)->i_ufsvfs; 5292*7c478bd9Sstevel@tonic-gate 5293*7c478bd9Sstevel@tonic-gate if (!ufsvfsp) { 5294*7c478bd9Sstevel@tonic-gate *revp = POLLHUP; 5295*7c478bd9Sstevel@tonic-gate goto out; 5296*7c478bd9Sstevel@tonic-gate } 5297*7c478bd9Sstevel@tonic-gate 5298*7c478bd9Sstevel@tonic-gate if (ULOCKFS_IS_HLOCK(&ufsvfsp->vfs_ulockfs) || 5299*7c478bd9Sstevel@tonic-gate ULOCKFS_IS_ELOCK(&ufsvfsp->vfs_ulockfs)) { 5300*7c478bd9Sstevel@tonic-gate *revp |= POLLERR; 5301*7c478bd9Sstevel@tonic-gate 5302*7c478bd9Sstevel@tonic-gate } else { 5303*7c478bd9Sstevel@tonic-gate if ((ev & POLLOUT) && !ufsvfsp->vfs_fs->fs_ronly && 5304*7c478bd9Sstevel@tonic-gate !ULOCKFS_IS_WLOCK(&ufsvfsp->vfs_ulockfs)) 5305*7c478bd9Sstevel@tonic-gate *revp |= POLLOUT; 5306*7c478bd9Sstevel@tonic-gate 5307*7c478bd9Sstevel@tonic-gate if ((ev & POLLWRBAND) && !ufsvfsp->vfs_fs->fs_ronly && 5308*7c478bd9Sstevel@tonic-gate !ULOCKFS_IS_WLOCK(&ufsvfsp->vfs_ulockfs)) 5309*7c478bd9Sstevel@tonic-gate *revp |= POLLWRBAND; 5310*7c478bd9Sstevel@tonic-gate 5311*7c478bd9Sstevel@tonic-gate if (ev & POLLIN) 5312*7c478bd9Sstevel@tonic-gate *revp |= POLLIN; 5313*7c478bd9Sstevel@tonic-gate 5314*7c478bd9Sstevel@tonic-gate if (ev & POLLRDNORM) 5315*7c478bd9Sstevel@tonic-gate *revp |= POLLRDNORM; 5316*7c478bd9Sstevel@tonic-gate 5317*7c478bd9Sstevel@tonic-gate if (ev & POLLRDBAND) 5318*7c478bd9Sstevel@tonic-gate *revp |= POLLRDBAND; 5319*7c478bd9Sstevel@tonic-gate } 5320*7c478bd9Sstevel@tonic-gate 5321*7c478bd9Sstevel@tonic-gate if ((ev & POLLPRI) && (*revp & (POLLERR|POLLHUP))) 5322*7c478bd9Sstevel@tonic-gate *revp |= POLLPRI; 5323*7c478bd9Sstevel@tonic-gate out: 5324*7c478bd9Sstevel@tonic-gate *phpp = !any && !*revp ? &ufs_pollhd : (struct pollhead *)NULL; 5325*7c478bd9Sstevel@tonic-gate 5326*7c478bd9Sstevel@tonic-gate return (0); 5327*7c478bd9Sstevel@tonic-gate } 5328*7c478bd9Sstevel@tonic-gate 5329*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 5330*7c478bd9Sstevel@tonic-gate static int 5331*7c478bd9Sstevel@tonic-gate ufs_l_pathconf(struct vnode *vp, int cmd, ulong_t *valp, struct cred *cr) 5332*7c478bd9Sstevel@tonic-gate { 5333*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = VTOI(vp)->i_ufsvfs; 5334*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp = NULL; 5335*7c478bd9Sstevel@tonic-gate struct inode *sip = NULL; 5336*7c478bd9Sstevel@tonic-gate int error; 5337*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 5338*7c478bd9Sstevel@tonic-gate int issync; 5339*7c478bd9Sstevel@tonic-gate 5340*7c478bd9Sstevel@tonic-gate error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_PATHCONF_MASK); 5341*7c478bd9Sstevel@tonic-gate if (error) 5342*7c478bd9Sstevel@tonic-gate return (error); 5343*7c478bd9Sstevel@tonic-gate 5344*7c478bd9Sstevel@tonic-gate switch (cmd) { 5345*7c478bd9Sstevel@tonic-gate /* 5346*7c478bd9Sstevel@tonic-gate * Have to handle _PC_NAME_MAX here, because the normal way 5347*7c478bd9Sstevel@tonic-gate * [fs_pathconf() -> VOP_STATVFS() -> ufs_statvfs()] 5348*7c478bd9Sstevel@tonic-gate * results in a lock ordering reversal between 5349*7c478bd9Sstevel@tonic-gate * ufs_lockfs_{begin,end}() and 5350*7c478bd9Sstevel@tonic-gate * ufs_thread_{suspend,continue}(). 5351*7c478bd9Sstevel@tonic-gate * 5352*7c478bd9Sstevel@tonic-gate * Keep in sync with ufs_statvfs(). 5353*7c478bd9Sstevel@tonic-gate */ 5354*7c478bd9Sstevel@tonic-gate case _PC_NAME_MAX: 5355*7c478bd9Sstevel@tonic-gate *valp = MAXNAMLEN; 5356*7c478bd9Sstevel@tonic-gate break; 5357*7c478bd9Sstevel@tonic-gate 5358*7c478bd9Sstevel@tonic-gate case _PC_FILESIZEBITS: 5359*7c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_lfflags & UFS_LARGEFILES) 5360*7c478bd9Sstevel@tonic-gate *valp = UFS_FILESIZE_BITS; 5361*7c478bd9Sstevel@tonic-gate else 5362*7c478bd9Sstevel@tonic-gate *valp = 32; 5363*7c478bd9Sstevel@tonic-gate break; 5364*7c478bd9Sstevel@tonic-gate 5365*7c478bd9Sstevel@tonic-gate case _PC_XATTR_EXISTS: 5366*7c478bd9Sstevel@tonic-gate if (vp->v_vfsp->vfs_flag & VFS_XATTR) { 5367*7c478bd9Sstevel@tonic-gate 5368*7c478bd9Sstevel@tonic-gate error = ufs_xattr_getattrdir(vp, &sip, LOOKUP_XATTR, 5369*7c478bd9Sstevel@tonic-gate cr); 5370*7c478bd9Sstevel@tonic-gate if (error == 0 && sip != NULL) { 5371*7c478bd9Sstevel@tonic-gate /* Start transaction */ 5372*7c478bd9Sstevel@tonic-gate if (ulp) { 5373*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_CSYNC(ufsvfsp, issync, 5374*7c478bd9Sstevel@tonic-gate TOP_RMDIR, TOP_RMDIR_SIZE); 5375*7c478bd9Sstevel@tonic-gate } 5376*7c478bd9Sstevel@tonic-gate /* 5377*7c478bd9Sstevel@tonic-gate * Is directory empty 5378*7c478bd9Sstevel@tonic-gate */ 5379*7c478bd9Sstevel@tonic-gate rw_enter(&sip->i_rwlock, RW_WRITER); 5380*7c478bd9Sstevel@tonic-gate rw_enter(&sip->i_contents, RW_WRITER); 5381*7c478bd9Sstevel@tonic-gate if (ufs_xattrdirempty(sip, 5382*7c478bd9Sstevel@tonic-gate sip->i_number, CRED())) { 5383*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 5384*7c478bd9Sstevel@tonic-gate ufs_unhook_shadow(ip, sip); 5385*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 5386*7c478bd9Sstevel@tonic-gate 5387*7c478bd9Sstevel@tonic-gate *valp = 0; 5388*7c478bd9Sstevel@tonic-gate 5389*7c478bd9Sstevel@tonic-gate } else 5390*7c478bd9Sstevel@tonic-gate *valp = 1; 5391*7c478bd9Sstevel@tonic-gate rw_exit(&sip->i_contents); 5392*7c478bd9Sstevel@tonic-gate rw_exit(&sip->i_rwlock); 5393*7c478bd9Sstevel@tonic-gate if (ulp) { 5394*7c478bd9Sstevel@tonic-gate TRANS_END_CSYNC(ufsvfsp, error, issync, 5395*7c478bd9Sstevel@tonic-gate TOP_RMDIR, TOP_RMDIR_SIZE); 5396*7c478bd9Sstevel@tonic-gate } 5397*7c478bd9Sstevel@tonic-gate VN_RELE(ITOV(sip)); 5398*7c478bd9Sstevel@tonic-gate } else if (error == ENOENT) { 5399*7c478bd9Sstevel@tonic-gate *valp = 0; 5400*7c478bd9Sstevel@tonic-gate error = 0; 5401*7c478bd9Sstevel@tonic-gate } 5402*7c478bd9Sstevel@tonic-gate } else { 5403*7c478bd9Sstevel@tonic-gate error = fs_pathconf(vp, cmd, valp, cr); 5404*7c478bd9Sstevel@tonic-gate } 5405*7c478bd9Sstevel@tonic-gate break; 5406*7c478bd9Sstevel@tonic-gate 5407*7c478bd9Sstevel@tonic-gate case _PC_ACL_ENABLED: 5408*7c478bd9Sstevel@tonic-gate *valp = _ACL_ACLENT_ENABLED; 5409*7c478bd9Sstevel@tonic-gate break; 5410*7c478bd9Sstevel@tonic-gate 5411*7c478bd9Sstevel@tonic-gate case _PC_MIN_HOLE_SIZE: 5412*7c478bd9Sstevel@tonic-gate *valp = (ulong_t)ip->i_fs->fs_bsize; 5413*7c478bd9Sstevel@tonic-gate break; 5414*7c478bd9Sstevel@tonic-gate 5415*7c478bd9Sstevel@tonic-gate default: 5416*7c478bd9Sstevel@tonic-gate error = fs_pathconf(vp, cmd, valp, cr); 5417*7c478bd9Sstevel@tonic-gate } 5418*7c478bd9Sstevel@tonic-gate 5419*7c478bd9Sstevel@tonic-gate if (ulp != NULL) { 5420*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 5421*7c478bd9Sstevel@tonic-gate } 5422*7c478bd9Sstevel@tonic-gate return (error); 5423*7c478bd9Sstevel@tonic-gate } 5424*7c478bd9Sstevel@tonic-gate 5425*7c478bd9Sstevel@tonic-gate int ufs_pageio_writes, ufs_pageio_reads; 5426*7c478bd9Sstevel@tonic-gate 5427*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 5428*7c478bd9Sstevel@tonic-gate static int 5429*7c478bd9Sstevel@tonic-gate ufs_pageio(struct vnode *vp, page_t *pp, u_offset_t io_off, size_t io_len, 5430*7c478bd9Sstevel@tonic-gate int flags, struct cred *cr) 5431*7c478bd9Sstevel@tonic-gate { 5432*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 5433*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 5434*7c478bd9Sstevel@tonic-gate page_t *npp = NULL, *opp = NULL, *cpp = pp; 5435*7c478bd9Sstevel@tonic-gate struct buf *bp; 5436*7c478bd9Sstevel@tonic-gate daddr_t bn; 5437*7c478bd9Sstevel@tonic-gate size_t done_len = 0, cur_len = 0; 5438*7c478bd9Sstevel@tonic-gate int err = 0; 5439*7c478bd9Sstevel@tonic-gate int contig = 0; 5440*7c478bd9Sstevel@tonic-gate int dolock; 5441*7c478bd9Sstevel@tonic-gate int vmpss = 0; 5442*7c478bd9Sstevel@tonic-gate 5443*7c478bd9Sstevel@tonic-gate if ((flags & B_READ) && pp != NULL && pp->p_vnode == vp && 5444*7c478bd9Sstevel@tonic-gate vp->v_mpssdata != NULL) { 5445*7c478bd9Sstevel@tonic-gate vmpss = 1; 5446*7c478bd9Sstevel@tonic-gate } 5447*7c478bd9Sstevel@tonic-gate 5448*7c478bd9Sstevel@tonic-gate dolock = (rw_owner(&ip->i_contents) != curthread); 5449*7c478bd9Sstevel@tonic-gate /* 5450*7c478bd9Sstevel@tonic-gate * We need a better check. Ideally, we would use another 5451*7c478bd9Sstevel@tonic-gate * vnodeops so that hlocked and forcibly unmounted file 5452*7c478bd9Sstevel@tonic-gate * systems would return EIO where appropriate and w/o the 5453*7c478bd9Sstevel@tonic-gate * need for these checks. 5454*7c478bd9Sstevel@tonic-gate */ 5455*7c478bd9Sstevel@tonic-gate if ((ufsvfsp = ip->i_ufsvfs) == NULL) 5456*7c478bd9Sstevel@tonic-gate return (EIO); 5457*7c478bd9Sstevel@tonic-gate 5458*7c478bd9Sstevel@tonic-gate if (dolock) { 5459*7c478bd9Sstevel@tonic-gate /* 5460*7c478bd9Sstevel@tonic-gate * segvn may call VOP_PAGEIO() instead of VOP_GETPAGE() to 5461*7c478bd9Sstevel@tonic-gate * handle a fault against a segment that maps vnode pages with 5462*7c478bd9Sstevel@tonic-gate * large mappings. Segvn creates pages and holds them locked 5463*7c478bd9Sstevel@tonic-gate * SE_EXCL during VOP_PAGEIO() call. In this case we have to 5464*7c478bd9Sstevel@tonic-gate * use rw_tryenter() to avoid a potential deadlock since in 5465*7c478bd9Sstevel@tonic-gate * lock order i_contents needs to be taken first. 5466*7c478bd9Sstevel@tonic-gate * Segvn will retry via VOP_GETPAGE() if VOP_PAGEIO() fails. 5467*7c478bd9Sstevel@tonic-gate */ 5468*7c478bd9Sstevel@tonic-gate if (!vmpss) { 5469*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 5470*7c478bd9Sstevel@tonic-gate } else if (!rw_tryenter(&ip->i_contents, RW_READER)) { 5471*7c478bd9Sstevel@tonic-gate return (EDEADLK); 5472*7c478bd9Sstevel@tonic-gate } 5473*7c478bd9Sstevel@tonic-gate } 5474*7c478bd9Sstevel@tonic-gate 5475*7c478bd9Sstevel@tonic-gate if (pp == NULL) { 5476*7c478bd9Sstevel@tonic-gate if (bmap_has_holes(ip)) { 5477*7c478bd9Sstevel@tonic-gate err = ENOSYS; 5478*7c478bd9Sstevel@tonic-gate } else { 5479*7c478bd9Sstevel@tonic-gate err = EINVAL; 5480*7c478bd9Sstevel@tonic-gate } 5481*7c478bd9Sstevel@tonic-gate if (dolock) 5482*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 5483*7c478bd9Sstevel@tonic-gate return (err); 5484*7c478bd9Sstevel@tonic-gate } 5485*7c478bd9Sstevel@tonic-gate 5486*7c478bd9Sstevel@tonic-gate /* 5487*7c478bd9Sstevel@tonic-gate * Break the io request into chunks, one for each contiguous 5488*7c478bd9Sstevel@tonic-gate * stretch of disk blocks in the target file. 5489*7c478bd9Sstevel@tonic-gate */ 5490*7c478bd9Sstevel@tonic-gate while (done_len < io_len) { 5491*7c478bd9Sstevel@tonic-gate ASSERT(cpp); 5492*7c478bd9Sstevel@tonic-gate contig = 0; 5493*7c478bd9Sstevel@tonic-gate if (err = bmap_read(ip, (u_offset_t)(io_off + done_len), 5494*7c478bd9Sstevel@tonic-gate &bn, &contig)) 5495*7c478bd9Sstevel@tonic-gate break; 5496*7c478bd9Sstevel@tonic-gate 5497*7c478bd9Sstevel@tonic-gate if (bn == UFS_HOLE) { /* No holey swapfiles */ 5498*7c478bd9Sstevel@tonic-gate if (vmpss) { 5499*7c478bd9Sstevel@tonic-gate err = EFAULT; 5500*7c478bd9Sstevel@tonic-gate break; 5501*7c478bd9Sstevel@tonic-gate } 5502*7c478bd9Sstevel@tonic-gate err = ufs_fault(ITOV(ip), "ufs_pageio: bn == UFS_HOLE"); 5503*7c478bd9Sstevel@tonic-gate break; 5504*7c478bd9Sstevel@tonic-gate } 5505*7c478bd9Sstevel@tonic-gate 5506*7c478bd9Sstevel@tonic-gate cur_len = MIN(io_len - done_len, contig); 5507*7c478bd9Sstevel@tonic-gate /* 5508*7c478bd9Sstevel@tonic-gate * Zero out a page beyond EOF, when the last block of 5509*7c478bd9Sstevel@tonic-gate * a file is a UFS fragment so that ufs_pageio() can be used 5510*7c478bd9Sstevel@tonic-gate * instead of ufs_getpage() to handle faults against 5511*7c478bd9Sstevel@tonic-gate * segvn segments that use large pages. 5512*7c478bd9Sstevel@tonic-gate */ 5513*7c478bd9Sstevel@tonic-gate page_list_break(&cpp, &npp, btopr(cur_len)); 5514*7c478bd9Sstevel@tonic-gate if ((flags & B_READ) && (cur_len & PAGEOFFSET)) { 5515*7c478bd9Sstevel@tonic-gate size_t xlen = cur_len & PAGEOFFSET; 5516*7c478bd9Sstevel@tonic-gate pagezero(cpp->p_prev, xlen, PAGESIZE - xlen); 5517*7c478bd9Sstevel@tonic-gate } 5518*7c478bd9Sstevel@tonic-gate 5519*7c478bd9Sstevel@tonic-gate bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags); 5520*7c478bd9Sstevel@tonic-gate ASSERT(bp != NULL); 5521*7c478bd9Sstevel@tonic-gate 5522*7c478bd9Sstevel@tonic-gate bp->b_edev = ip->i_dev; 5523*7c478bd9Sstevel@tonic-gate bp->b_dev = cmpdev(ip->i_dev); 5524*7c478bd9Sstevel@tonic-gate bp->b_blkno = bn; 5525*7c478bd9Sstevel@tonic-gate bp->b_un.b_addr = (caddr_t)0; 5526*7c478bd9Sstevel@tonic-gate bp->b_file = ip->i_vnode; 5527*7c478bd9Sstevel@tonic-gate 5528*7c478bd9Sstevel@tonic-gate ufsvfsp->vfs_iotstamp = lbolt; 5529*7c478bd9Sstevel@tonic-gate ub.ub_pageios.value.ul++; 5530*7c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_snapshot) 5531*7c478bd9Sstevel@tonic-gate fssnap_strategy(&(ufsvfsp->vfs_snapshot), bp); 5532*7c478bd9Sstevel@tonic-gate else 5533*7c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 5534*7c478bd9Sstevel@tonic-gate 5535*7c478bd9Sstevel@tonic-gate if (flags & B_READ) 5536*7c478bd9Sstevel@tonic-gate ufs_pageio_reads++; 5537*7c478bd9Sstevel@tonic-gate else 5538*7c478bd9Sstevel@tonic-gate ufs_pageio_writes++; 5539*7c478bd9Sstevel@tonic-gate if (flags & B_READ) 5540*7c478bd9Sstevel@tonic-gate lwp_stat_update(LWP_STAT_INBLK, 1); 5541*7c478bd9Sstevel@tonic-gate else 5542*7c478bd9Sstevel@tonic-gate lwp_stat_update(LWP_STAT_OUBLK, 1); 5543*7c478bd9Sstevel@tonic-gate /* 5544*7c478bd9Sstevel@tonic-gate * If the request is not B_ASYNC, wait for i/o to complete 5545*7c478bd9Sstevel@tonic-gate * and re-assemble the page list to return to the caller. 5546*7c478bd9Sstevel@tonic-gate * If it is B_ASYNC we leave the page list in pieces and 5547*7c478bd9Sstevel@tonic-gate * cleanup() will dispose of them. 5548*7c478bd9Sstevel@tonic-gate */ 5549*7c478bd9Sstevel@tonic-gate if ((flags & B_ASYNC) == 0) { 5550*7c478bd9Sstevel@tonic-gate err = biowait(bp); 5551*7c478bd9Sstevel@tonic-gate pageio_done(bp); 5552*7c478bd9Sstevel@tonic-gate if (err) 5553*7c478bd9Sstevel@tonic-gate break; 5554*7c478bd9Sstevel@tonic-gate page_list_concat(&opp, &cpp); 5555*7c478bd9Sstevel@tonic-gate } 5556*7c478bd9Sstevel@tonic-gate cpp = npp; 5557*7c478bd9Sstevel@tonic-gate npp = NULL; 5558*7c478bd9Sstevel@tonic-gate if (flags & B_READ) 5559*7c478bd9Sstevel@tonic-gate cur_len = P2ROUNDUP_TYPED(cur_len, PAGESIZE, size_t); 5560*7c478bd9Sstevel@tonic-gate done_len += cur_len; 5561*7c478bd9Sstevel@tonic-gate } 5562*7c478bd9Sstevel@tonic-gate ASSERT(err || (cpp == NULL && npp == NULL && done_len == io_len)); 5563*7c478bd9Sstevel@tonic-gate if (err) { 5564*7c478bd9Sstevel@tonic-gate if (flags & B_ASYNC) { 5565*7c478bd9Sstevel@tonic-gate /* Cleanup unprocessed parts of list */ 5566*7c478bd9Sstevel@tonic-gate page_list_concat(&cpp, &npp); 5567*7c478bd9Sstevel@tonic-gate if (flags & B_READ) 5568*7c478bd9Sstevel@tonic-gate pvn_read_done(cpp, B_ERROR); 5569*7c478bd9Sstevel@tonic-gate else 5570*7c478bd9Sstevel@tonic-gate pvn_write_done(cpp, B_ERROR); 5571*7c478bd9Sstevel@tonic-gate } else { 5572*7c478bd9Sstevel@tonic-gate /* Re-assemble list and let caller clean up */ 5573*7c478bd9Sstevel@tonic-gate page_list_concat(&opp, &cpp); 5574*7c478bd9Sstevel@tonic-gate page_list_concat(&opp, &npp); 5575*7c478bd9Sstevel@tonic-gate } 5576*7c478bd9Sstevel@tonic-gate } 5577*7c478bd9Sstevel@tonic-gate if (dolock) 5578*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 5579*7c478bd9Sstevel@tonic-gate return (err); 5580*7c478bd9Sstevel@tonic-gate } 5581*7c478bd9Sstevel@tonic-gate 5582*7c478bd9Sstevel@tonic-gate /* 5583*7c478bd9Sstevel@tonic-gate * Called when the kernel is in a frozen state to dump data 5584*7c478bd9Sstevel@tonic-gate * directly to the device. It uses a private dump data structure, 5585*7c478bd9Sstevel@tonic-gate * set up by dump_ctl, to locate the correct disk block to which to dump. 5586*7c478bd9Sstevel@tonic-gate */ 5587*7c478bd9Sstevel@tonic-gate static int 5588*7c478bd9Sstevel@tonic-gate ufs_dump(vnode_t *vp, caddr_t addr, int ldbn, int dblks) 5589*7c478bd9Sstevel@tonic-gate { 5590*7c478bd9Sstevel@tonic-gate u_offset_t file_size; 5591*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 5592*7c478bd9Sstevel@tonic-gate struct fs *fs = ip->i_fs; 5593*7c478bd9Sstevel@tonic-gate daddr_t dbn, lfsbn; 5594*7c478bd9Sstevel@tonic-gate int disk_blks = fs->fs_bsize >> DEV_BSHIFT; 5595*7c478bd9Sstevel@tonic-gate int error = 0; 5596*7c478bd9Sstevel@tonic-gate int ndbs, nfsbs; 5597*7c478bd9Sstevel@tonic-gate 5598*7c478bd9Sstevel@tonic-gate /* 5599*7c478bd9Sstevel@tonic-gate * forced unmount case 5600*7c478bd9Sstevel@tonic-gate */ 5601*7c478bd9Sstevel@tonic-gate if (ip->i_ufsvfs == NULL) 5602*7c478bd9Sstevel@tonic-gate return (EIO); 5603*7c478bd9Sstevel@tonic-gate /* 5604*7c478bd9Sstevel@tonic-gate * Validate the inode that it has not been modified since 5605*7c478bd9Sstevel@tonic-gate * the dump structure is allocated. 5606*7c478bd9Sstevel@tonic-gate */ 5607*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 5608*7c478bd9Sstevel@tonic-gate if ((dump_info == NULL) || 5609*7c478bd9Sstevel@tonic-gate (dump_info->ip != ip) || 5610*7c478bd9Sstevel@tonic-gate (dump_info->time.tv_sec != ip->i_mtime.tv_sec) || 5611*7c478bd9Sstevel@tonic-gate (dump_info->time.tv_usec != ip->i_mtime.tv_usec)) { 5612*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 5613*7c478bd9Sstevel@tonic-gate return (-1); 5614*7c478bd9Sstevel@tonic-gate } 5615*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 5616*7c478bd9Sstevel@tonic-gate 5617*7c478bd9Sstevel@tonic-gate /* 5618*7c478bd9Sstevel@tonic-gate * See that the file has room for this write 5619*7c478bd9Sstevel@tonic-gate */ 5620*7c478bd9Sstevel@tonic-gate UFS_GET_ISIZE(&file_size, ip); 5621*7c478bd9Sstevel@tonic-gate 5622*7c478bd9Sstevel@tonic-gate if (ldbtob((offset_t)(ldbn + dblks)) > file_size) 5623*7c478bd9Sstevel@tonic-gate return (ENOSPC); 5624*7c478bd9Sstevel@tonic-gate 5625*7c478bd9Sstevel@tonic-gate /* 5626*7c478bd9Sstevel@tonic-gate * Find the physical disk block numbers from the dump 5627*7c478bd9Sstevel@tonic-gate * private data structure directly and write out the data 5628*7c478bd9Sstevel@tonic-gate * in contiguous block lumps 5629*7c478bd9Sstevel@tonic-gate */ 5630*7c478bd9Sstevel@tonic-gate while (dblks > 0 && !error) { 5631*7c478bd9Sstevel@tonic-gate lfsbn = (daddr_t)lblkno(fs, ldbtob((offset_t)ldbn)); 5632*7c478bd9Sstevel@tonic-gate dbn = fsbtodb(fs, dump_info->dblk[lfsbn]) + ldbn % disk_blks; 5633*7c478bd9Sstevel@tonic-gate nfsbs = 1; 5634*7c478bd9Sstevel@tonic-gate ndbs = disk_blks - ldbn % disk_blks; 5635*7c478bd9Sstevel@tonic-gate while (ndbs < dblks && fsbtodb(fs, dump_info->dblk[lfsbn + 5636*7c478bd9Sstevel@tonic-gate nfsbs]) == dbn + ndbs) { 5637*7c478bd9Sstevel@tonic-gate nfsbs++; 5638*7c478bd9Sstevel@tonic-gate ndbs += disk_blks; 5639*7c478bd9Sstevel@tonic-gate } 5640*7c478bd9Sstevel@tonic-gate if (ndbs > dblks) 5641*7c478bd9Sstevel@tonic-gate ndbs = dblks; 5642*7c478bd9Sstevel@tonic-gate error = bdev_dump(ip->i_dev, addr, dbn, ndbs); 5643*7c478bd9Sstevel@tonic-gate addr += ldbtob((offset_t)ndbs); 5644*7c478bd9Sstevel@tonic-gate dblks -= ndbs; 5645*7c478bd9Sstevel@tonic-gate ldbn += ndbs; 5646*7c478bd9Sstevel@tonic-gate } 5647*7c478bd9Sstevel@tonic-gate return (error); 5648*7c478bd9Sstevel@tonic-gate 5649*7c478bd9Sstevel@tonic-gate } 5650*7c478bd9Sstevel@tonic-gate 5651*7c478bd9Sstevel@tonic-gate /* 5652*7c478bd9Sstevel@tonic-gate * Prepare the file system before and after the dump operation. 5653*7c478bd9Sstevel@tonic-gate * 5654*7c478bd9Sstevel@tonic-gate * action = DUMP_ALLOC: 5655*7c478bd9Sstevel@tonic-gate * Preparation before dump, allocate dump private data structure 5656*7c478bd9Sstevel@tonic-gate * to hold all the direct and indirect block info for dump. 5657*7c478bd9Sstevel@tonic-gate * 5658*7c478bd9Sstevel@tonic-gate * action = DUMP_FREE: 5659*7c478bd9Sstevel@tonic-gate * Clean up after dump, deallocate the dump private data structure. 5660*7c478bd9Sstevel@tonic-gate * 5661*7c478bd9Sstevel@tonic-gate * action = DUMP_SCAN: 5662*7c478bd9Sstevel@tonic-gate * Scan dump_info for *blkp DEV_BSIZE blocks of contig fs space; 5663*7c478bd9Sstevel@tonic-gate * if found, the starting file-relative DEV_BSIZE lbn is written 5664*7c478bd9Sstevel@tonic-gate * to *bklp; that lbn is intended for use with VOP_DUMP() 5665*7c478bd9Sstevel@tonic-gate */ 5666*7c478bd9Sstevel@tonic-gate static int 5667*7c478bd9Sstevel@tonic-gate ufs_dumpctl(vnode_t *vp, int action, int *blkp) 5668*7c478bd9Sstevel@tonic-gate { 5669*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 5670*7c478bd9Sstevel@tonic-gate ufsvfs_t *ufsvfsp = ip->i_ufsvfs; 5671*7c478bd9Sstevel@tonic-gate struct fs *fs; 5672*7c478bd9Sstevel@tonic-gate daddr32_t *dblk, *storeblk; 5673*7c478bd9Sstevel@tonic-gate daddr32_t *nextblk, *endblk; 5674*7c478bd9Sstevel@tonic-gate struct buf *bp; 5675*7c478bd9Sstevel@tonic-gate int i, entry, entries; 5676*7c478bd9Sstevel@tonic-gate int n, ncontig; 5677*7c478bd9Sstevel@tonic-gate 5678*7c478bd9Sstevel@tonic-gate /* 5679*7c478bd9Sstevel@tonic-gate * check for forced unmount 5680*7c478bd9Sstevel@tonic-gate */ 5681*7c478bd9Sstevel@tonic-gate if (ufsvfsp == NULL) 5682*7c478bd9Sstevel@tonic-gate return (EIO); 5683*7c478bd9Sstevel@tonic-gate 5684*7c478bd9Sstevel@tonic-gate if (action == DUMP_ALLOC) { 5685*7c478bd9Sstevel@tonic-gate /* 5686*7c478bd9Sstevel@tonic-gate * alloc and record dump_info 5687*7c478bd9Sstevel@tonic-gate */ 5688*7c478bd9Sstevel@tonic-gate if (dump_info != NULL) 5689*7c478bd9Sstevel@tonic-gate return (EINVAL); 5690*7c478bd9Sstevel@tonic-gate 5691*7c478bd9Sstevel@tonic-gate ASSERT(vp->v_type == VREG); 5692*7c478bd9Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 5693*7c478bd9Sstevel@tonic-gate 5694*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 5695*7c478bd9Sstevel@tonic-gate 5696*7c478bd9Sstevel@tonic-gate if (bmap_has_holes(ip)) { 5697*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 5698*7c478bd9Sstevel@tonic-gate return (EFAULT); 5699*7c478bd9Sstevel@tonic-gate } 5700*7c478bd9Sstevel@tonic-gate 5701*7c478bd9Sstevel@tonic-gate /* 5702*7c478bd9Sstevel@tonic-gate * calculate and allocate space needed according to i_size 5703*7c478bd9Sstevel@tonic-gate */ 5704*7c478bd9Sstevel@tonic-gate entries = (int)lblkno(fs, blkroundup(fs, ip->i_size)); 5705*7c478bd9Sstevel@tonic-gate if ((dump_info = (struct dump *) 5706*7c478bd9Sstevel@tonic-gate kmem_alloc(sizeof (struct dump) + 5707*7c478bd9Sstevel@tonic-gate (entries - 1) * sizeof (daddr32_t), KM_NOSLEEP)) == NULL) { 5708*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 5709*7c478bd9Sstevel@tonic-gate return (ENOMEM); 5710*7c478bd9Sstevel@tonic-gate } 5711*7c478bd9Sstevel@tonic-gate 5712*7c478bd9Sstevel@tonic-gate /* Start saving the info */ 5713*7c478bd9Sstevel@tonic-gate dump_info->fsbs = entries; 5714*7c478bd9Sstevel@tonic-gate dump_info->ip = ip; 5715*7c478bd9Sstevel@tonic-gate storeblk = &dump_info->dblk[0]; 5716*7c478bd9Sstevel@tonic-gate 5717*7c478bd9Sstevel@tonic-gate /* Direct Blocks */ 5718*7c478bd9Sstevel@tonic-gate for (entry = 0; entry < NDADDR && entry < entries; entry++) 5719*7c478bd9Sstevel@tonic-gate *storeblk++ = ip->i_db[entry]; 5720*7c478bd9Sstevel@tonic-gate 5721*7c478bd9Sstevel@tonic-gate /* Indirect Blocks */ 5722*7c478bd9Sstevel@tonic-gate for (i = 0; i < NIADDR; i++) { 5723*7c478bd9Sstevel@tonic-gate int error = 0; 5724*7c478bd9Sstevel@tonic-gate 5725*7c478bd9Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, 5726*7c478bd9Sstevel@tonic-gate ip->i_dev, fsbtodb(fs, ip->i_ib[i]), 5727*7c478bd9Sstevel@tonic-gate fs->fs_bsize); 5728*7c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) 5729*7c478bd9Sstevel@tonic-gate error = EIO; 5730*7c478bd9Sstevel@tonic-gate else { 5731*7c478bd9Sstevel@tonic-gate dblk = bp->b_un.b_daddr; 5732*7c478bd9Sstevel@tonic-gate if ((storeblk = save_dblks(ip, ufsvfsp, 5733*7c478bd9Sstevel@tonic-gate storeblk, dblk, i, entries)) == NULL) 5734*7c478bd9Sstevel@tonic-gate error = EIO; 5735*7c478bd9Sstevel@tonic-gate } 5736*7c478bd9Sstevel@tonic-gate 5737*7c478bd9Sstevel@tonic-gate brelse(bp); 5738*7c478bd9Sstevel@tonic-gate 5739*7c478bd9Sstevel@tonic-gate if (error != 0) { 5740*7c478bd9Sstevel@tonic-gate kmem_free(dump_info, sizeof (struct dump) + 5741*7c478bd9Sstevel@tonic-gate (entries - 1) * sizeof (daddr32_t)); 5742*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 5743*7c478bd9Sstevel@tonic-gate dump_info = NULL; 5744*7c478bd9Sstevel@tonic-gate return (error); 5745*7c478bd9Sstevel@tonic-gate } 5746*7c478bd9Sstevel@tonic-gate } 5747*7c478bd9Sstevel@tonic-gate /* and time stamp the information */ 5748*7c478bd9Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 5749*7c478bd9Sstevel@tonic-gate dump_info->time = ip->i_mtime; 5750*7c478bd9Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 5751*7c478bd9Sstevel@tonic-gate 5752*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 5753*7c478bd9Sstevel@tonic-gate } else if (action == DUMP_FREE) { 5754*7c478bd9Sstevel@tonic-gate /* 5755*7c478bd9Sstevel@tonic-gate * free dump_info 5756*7c478bd9Sstevel@tonic-gate */ 5757*7c478bd9Sstevel@tonic-gate if (dump_info == NULL) 5758*7c478bd9Sstevel@tonic-gate return (EINVAL); 5759*7c478bd9Sstevel@tonic-gate entries = dump_info->fsbs - 1; 5760*7c478bd9Sstevel@tonic-gate kmem_free(dump_info, sizeof (struct dump) + 5761*7c478bd9Sstevel@tonic-gate entries * sizeof (daddr32_t)); 5762*7c478bd9Sstevel@tonic-gate dump_info = NULL; 5763*7c478bd9Sstevel@tonic-gate } else if (action == DUMP_SCAN) { 5764*7c478bd9Sstevel@tonic-gate /* 5765*7c478bd9Sstevel@tonic-gate * scan dump_info 5766*7c478bd9Sstevel@tonic-gate */ 5767*7c478bd9Sstevel@tonic-gate if (dump_info == NULL) 5768*7c478bd9Sstevel@tonic-gate return (EINVAL); 5769*7c478bd9Sstevel@tonic-gate 5770*7c478bd9Sstevel@tonic-gate dblk = dump_info->dblk; 5771*7c478bd9Sstevel@tonic-gate nextblk = dblk + 1; 5772*7c478bd9Sstevel@tonic-gate endblk = dblk + dump_info->fsbs - 1; 5773*7c478bd9Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 5774*7c478bd9Sstevel@tonic-gate ncontig = *blkp >> (fs->fs_bshift - DEV_BSHIFT); 5775*7c478bd9Sstevel@tonic-gate 5776*7c478bd9Sstevel@tonic-gate /* 5777*7c478bd9Sstevel@tonic-gate * scan dblk[] entries; contig fs space is found when: 5778*7c478bd9Sstevel@tonic-gate * ((current blkno + frags per block) == next blkno) 5779*7c478bd9Sstevel@tonic-gate */ 5780*7c478bd9Sstevel@tonic-gate n = 0; 5781*7c478bd9Sstevel@tonic-gate while (n < ncontig && dblk < endblk) { 5782*7c478bd9Sstevel@tonic-gate if ((*dblk + fs->fs_frag) == *nextblk) 5783*7c478bd9Sstevel@tonic-gate n++; 5784*7c478bd9Sstevel@tonic-gate else 5785*7c478bd9Sstevel@tonic-gate n = 0; 5786*7c478bd9Sstevel@tonic-gate dblk++; 5787*7c478bd9Sstevel@tonic-gate nextblk++; 5788*7c478bd9Sstevel@tonic-gate } 5789*7c478bd9Sstevel@tonic-gate 5790*7c478bd9Sstevel@tonic-gate /* 5791*7c478bd9Sstevel@tonic-gate * index is where size bytes of contig space begins; 5792*7c478bd9Sstevel@tonic-gate * conversion from index to the file's DEV_BSIZE lbn 5793*7c478bd9Sstevel@tonic-gate * is equivalent to: (index * fs_bsize) / DEV_BSIZE 5794*7c478bd9Sstevel@tonic-gate */ 5795*7c478bd9Sstevel@tonic-gate if (n == ncontig) { 5796*7c478bd9Sstevel@tonic-gate i = (dblk - dump_info->dblk) - ncontig; 5797*7c478bd9Sstevel@tonic-gate *blkp = i << (fs->fs_bshift - DEV_BSHIFT); 5798*7c478bd9Sstevel@tonic-gate } else 5799*7c478bd9Sstevel@tonic-gate return (EFAULT); 5800*7c478bd9Sstevel@tonic-gate } 5801*7c478bd9Sstevel@tonic-gate return (0); 5802*7c478bd9Sstevel@tonic-gate } 5803*7c478bd9Sstevel@tonic-gate 5804*7c478bd9Sstevel@tonic-gate /* 5805*7c478bd9Sstevel@tonic-gate * Recursive helper function for ufs_dumpctl(). It follows the indirect file 5806*7c478bd9Sstevel@tonic-gate * system blocks until it reaches the the disk block addresses, which are 5807*7c478bd9Sstevel@tonic-gate * then stored into the given buffer, storeblk. 5808*7c478bd9Sstevel@tonic-gate */ 5809*7c478bd9Sstevel@tonic-gate static daddr32_t * 5810*7c478bd9Sstevel@tonic-gate save_dblks(struct inode *ip, struct ufsvfs *ufsvfsp, daddr32_t *storeblk, 5811*7c478bd9Sstevel@tonic-gate daddr32_t *dblk, int level, int entries) 5812*7c478bd9Sstevel@tonic-gate { 5813*7c478bd9Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 5814*7c478bd9Sstevel@tonic-gate struct buf *bp; 5815*7c478bd9Sstevel@tonic-gate int i; 5816*7c478bd9Sstevel@tonic-gate 5817*7c478bd9Sstevel@tonic-gate if (level == 0) { 5818*7c478bd9Sstevel@tonic-gate for (i = 0; i < NINDIR(fs); i++) { 5819*7c478bd9Sstevel@tonic-gate if (storeblk - dump_info->dblk >= entries) 5820*7c478bd9Sstevel@tonic-gate break; 5821*7c478bd9Sstevel@tonic-gate *storeblk++ = dblk[i]; 5822*7c478bd9Sstevel@tonic-gate } 5823*7c478bd9Sstevel@tonic-gate return (storeblk); 5824*7c478bd9Sstevel@tonic-gate } 5825*7c478bd9Sstevel@tonic-gate for (i = 0; i < NINDIR(fs); i++) { 5826*7c478bd9Sstevel@tonic-gate if (storeblk - dump_info->dblk >= entries) 5827*7c478bd9Sstevel@tonic-gate break; 5828*7c478bd9Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, 5829*7c478bd9Sstevel@tonic-gate ip->i_dev, fsbtodb(fs, dblk[i]), fs->fs_bsize); 5830*7c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 5831*7c478bd9Sstevel@tonic-gate brelse(bp); 5832*7c478bd9Sstevel@tonic-gate return (NULL); 5833*7c478bd9Sstevel@tonic-gate } 5834*7c478bd9Sstevel@tonic-gate storeblk = save_dblks(ip, ufsvfsp, storeblk, bp->b_un.b_daddr, 5835*7c478bd9Sstevel@tonic-gate level - 1, entries); 5836*7c478bd9Sstevel@tonic-gate brelse(bp); 5837*7c478bd9Sstevel@tonic-gate 5838*7c478bd9Sstevel@tonic-gate if (storeblk == NULL) 5839*7c478bd9Sstevel@tonic-gate return (NULL); 5840*7c478bd9Sstevel@tonic-gate } 5841*7c478bd9Sstevel@tonic-gate return (storeblk); 5842*7c478bd9Sstevel@tonic-gate } 5843*7c478bd9Sstevel@tonic-gate 5844*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 5845*7c478bd9Sstevel@tonic-gate static int 5846*7c478bd9Sstevel@tonic-gate ufs_getsecattr(struct vnode *vp, vsecattr_t *vsap, int flag, 5847*7c478bd9Sstevel@tonic-gate struct cred *cr) 5848*7c478bd9Sstevel@tonic-gate { 5849*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 5850*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 5851*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 5852*7c478bd9Sstevel@tonic-gate ulong_t vsa_mask = vsap->vsa_mask; 5853*7c478bd9Sstevel@tonic-gate int err = EINVAL; 5854*7c478bd9Sstevel@tonic-gate 5855*7c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_UFS, TR_UFS_GETSECATTR_START, 5856*7c478bd9Sstevel@tonic-gate "ufs_getsecattr_start:vp %p, vsap %p, flags %x", vp, vsap, flag); 5857*7c478bd9Sstevel@tonic-gate 5858*7c478bd9Sstevel@tonic-gate vsa_mask &= (VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT); 5859*7c478bd9Sstevel@tonic-gate 5860*7c478bd9Sstevel@tonic-gate /* 5861*7c478bd9Sstevel@tonic-gate * Only grab locks if needed - they're not needed to check vsa_mask 5862*7c478bd9Sstevel@tonic-gate * or if the mask contains no acl flags. 5863*7c478bd9Sstevel@tonic-gate */ 5864*7c478bd9Sstevel@tonic-gate if (vsa_mask != 0) { 5865*7c478bd9Sstevel@tonic-gate if (err = ufs_lockfs_begin(ufsvfsp, &ulp, 5866*7c478bd9Sstevel@tonic-gate ULOCKFS_GETATTR_MASK)) 5867*7c478bd9Sstevel@tonic-gate return (err); 5868*7c478bd9Sstevel@tonic-gate 5869*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 5870*7c478bd9Sstevel@tonic-gate err = ufs_acl_get(ip, vsap, flag, cr); 5871*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 5872*7c478bd9Sstevel@tonic-gate 5873*7c478bd9Sstevel@tonic-gate if (ulp) 5874*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 5875*7c478bd9Sstevel@tonic-gate } 5876*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_GETSECATTR_END, 5877*7c478bd9Sstevel@tonic-gate "ufs_getsecattr_end:vp %p", vp); 5878*7c478bd9Sstevel@tonic-gate return (err); 5879*7c478bd9Sstevel@tonic-gate } 5880*7c478bd9Sstevel@tonic-gate 5881*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 5882*7c478bd9Sstevel@tonic-gate static int 5883*7c478bd9Sstevel@tonic-gate ufs_setsecattr(struct vnode *vp, vsecattr_t *vsap, int flag, struct cred *cr) 5884*7c478bd9Sstevel@tonic-gate { 5885*7c478bd9Sstevel@tonic-gate struct inode *ip = VTOI(vp); 5886*7c478bd9Sstevel@tonic-gate struct ulockfs *ulp = NULL; 5887*7c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = VTOI(vp)->i_ufsvfs; 5888*7c478bd9Sstevel@tonic-gate ulong_t vsa_mask = vsap->vsa_mask; 5889*7c478bd9Sstevel@tonic-gate int err; 5890*7c478bd9Sstevel@tonic-gate int haverwlock = 1; 5891*7c478bd9Sstevel@tonic-gate int trans_size; 5892*7c478bd9Sstevel@tonic-gate int donetrans = 0; 5893*7c478bd9Sstevel@tonic-gate int retry = 1; 5894*7c478bd9Sstevel@tonic-gate 5895*7c478bd9Sstevel@tonic-gate 5896*7c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_UFS, TR_UFS_SETSECATTR_START, 5897*7c478bd9Sstevel@tonic-gate "ufs_setsecattr_start:vp %p, vsap %p, flags %x", vp, vsap, flag); 5898*7c478bd9Sstevel@tonic-gate 5899*7c478bd9Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&ip->i_rwlock)); 5900*7c478bd9Sstevel@tonic-gate 5901*7c478bd9Sstevel@tonic-gate /* Abort now if the request is either empty or invalid. */ 5902*7c478bd9Sstevel@tonic-gate vsa_mask &= (VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT); 5903*7c478bd9Sstevel@tonic-gate if ((vsa_mask == 0) || 5904*7c478bd9Sstevel@tonic-gate ((vsap->vsa_aclentp == NULL) && 5905*7c478bd9Sstevel@tonic-gate (vsap->vsa_dfaclentp == NULL))) { 5906*7c478bd9Sstevel@tonic-gate err = EINVAL; 5907*7c478bd9Sstevel@tonic-gate goto out; 5908*7c478bd9Sstevel@tonic-gate } 5909*7c478bd9Sstevel@tonic-gate 5910*7c478bd9Sstevel@tonic-gate /* 5911*7c478bd9Sstevel@tonic-gate * Following convention, if this is a directory then we acquire the 5912*7c478bd9Sstevel@tonic-gate * inode's i_rwlock after starting a UFS logging transaction; 5913*7c478bd9Sstevel@tonic-gate * otherwise, we acquire it beforehand. Since we were called (and 5914*7c478bd9Sstevel@tonic-gate * must therefore return) with the lock held, we will have to drop it, 5915*7c478bd9Sstevel@tonic-gate * and later reacquire it, if operating on a directory. 5916*7c478bd9Sstevel@tonic-gate */ 5917*7c478bd9Sstevel@tonic-gate if (vp->v_type == VDIR) { 5918*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 5919*7c478bd9Sstevel@tonic-gate haverwlock = 0; 5920*7c478bd9Sstevel@tonic-gate } else { 5921*7c478bd9Sstevel@tonic-gate /* Upgrade the lock if required. */ 5922*7c478bd9Sstevel@tonic-gate if (!rw_write_held(&ip->i_rwlock)) { 5923*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 5924*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 5925*7c478bd9Sstevel@tonic-gate } 5926*7c478bd9Sstevel@tonic-gate } 5927*7c478bd9Sstevel@tonic-gate 5928*7c478bd9Sstevel@tonic-gate again: 5929*7c478bd9Sstevel@tonic-gate ASSERT(!(vp->v_type == VDIR && haverwlock)); 5930*7c478bd9Sstevel@tonic-gate if (err = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_SETATTR_MASK)) { 5931*7c478bd9Sstevel@tonic-gate ulp = NULL; 5932*7c478bd9Sstevel@tonic-gate retry = 0; 5933*7c478bd9Sstevel@tonic-gate goto out; 5934*7c478bd9Sstevel@tonic-gate } 5935*7c478bd9Sstevel@tonic-gate 5936*7c478bd9Sstevel@tonic-gate /* 5937*7c478bd9Sstevel@tonic-gate * Check that the file system supports this operation. Note that 5938*7c478bd9Sstevel@tonic-gate * ufs_lockfs_begin() will have checked that the file system had 5939*7c478bd9Sstevel@tonic-gate * not been forcibly unmounted. 5940*7c478bd9Sstevel@tonic-gate */ 5941*7c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_fs->fs_ronly) { 5942*7c478bd9Sstevel@tonic-gate err = EROFS; 5943*7c478bd9Sstevel@tonic-gate goto out; 5944*7c478bd9Sstevel@tonic-gate } 5945*7c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_nosetsec) { 5946*7c478bd9Sstevel@tonic-gate err = ENOSYS; 5947*7c478bd9Sstevel@tonic-gate goto out; 5948*7c478bd9Sstevel@tonic-gate } 5949*7c478bd9Sstevel@tonic-gate 5950*7c478bd9Sstevel@tonic-gate if (ulp) { 5951*7c478bd9Sstevel@tonic-gate TRANS_BEGIN_ASYNC(ufsvfsp, TOP_SETSECATTR, 5952*7c478bd9Sstevel@tonic-gate trans_size = TOP_SETSECATTR_SIZE(VTOI(vp))); 5953*7c478bd9Sstevel@tonic-gate donetrans = 1; 5954*7c478bd9Sstevel@tonic-gate } 5955*7c478bd9Sstevel@tonic-gate 5956*7c478bd9Sstevel@tonic-gate if (vp->v_type == VDIR) { 5957*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_WRITER); 5958*7c478bd9Sstevel@tonic-gate haverwlock = 1; 5959*7c478bd9Sstevel@tonic-gate } 5960*7c478bd9Sstevel@tonic-gate 5961*7c478bd9Sstevel@tonic-gate ASSERT(haverwlock); 5962*7c478bd9Sstevel@tonic-gate 5963*7c478bd9Sstevel@tonic-gate /* Do the actual work. */ 5964*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 5965*7c478bd9Sstevel@tonic-gate /* 5966*7c478bd9Sstevel@tonic-gate * Suppress out of inodes messages if we will retry. 5967*7c478bd9Sstevel@tonic-gate */ 5968*7c478bd9Sstevel@tonic-gate if (retry) 5969*7c478bd9Sstevel@tonic-gate ip->i_flag |= IQUIET; 5970*7c478bd9Sstevel@tonic-gate err = ufs_acl_set(ip, vsap, flag, cr); 5971*7c478bd9Sstevel@tonic-gate ip->i_flag &= ~IQUIET; 5972*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 5973*7c478bd9Sstevel@tonic-gate 5974*7c478bd9Sstevel@tonic-gate out: 5975*7c478bd9Sstevel@tonic-gate if (ulp) { 5976*7c478bd9Sstevel@tonic-gate if (donetrans) { 5977*7c478bd9Sstevel@tonic-gate /* 5978*7c478bd9Sstevel@tonic-gate * top_end_async() can eventually call 5979*7c478bd9Sstevel@tonic-gate * top_end_sync(), which can block. We must 5980*7c478bd9Sstevel@tonic-gate * therefore observe the lock-ordering protocol 5981*7c478bd9Sstevel@tonic-gate * here as well. 5982*7c478bd9Sstevel@tonic-gate */ 5983*7c478bd9Sstevel@tonic-gate if (vp->v_type == VDIR) { 5984*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 5985*7c478bd9Sstevel@tonic-gate haverwlock = 0; 5986*7c478bd9Sstevel@tonic-gate } 5987*7c478bd9Sstevel@tonic-gate TRANS_END_ASYNC(ufsvfsp, TOP_SETSECATTR, trans_size); 5988*7c478bd9Sstevel@tonic-gate } 5989*7c478bd9Sstevel@tonic-gate ufs_lockfs_end(ulp); 5990*7c478bd9Sstevel@tonic-gate } 5991*7c478bd9Sstevel@tonic-gate /* 5992*7c478bd9Sstevel@tonic-gate * If no inodes available, try scaring a logically- 5993*7c478bd9Sstevel@tonic-gate * free one out of the delete queue to someplace 5994*7c478bd9Sstevel@tonic-gate * that we can find it. 5995*7c478bd9Sstevel@tonic-gate */ 5996*7c478bd9Sstevel@tonic-gate if ((err == ENOSPC) && retry && TRANS_ISTRANS(ufsvfsp)) { 5997*7c478bd9Sstevel@tonic-gate ufs_delete_drain_wait(ufsvfsp, 1); 5998*7c478bd9Sstevel@tonic-gate retry = 0; 5999*7c478bd9Sstevel@tonic-gate if (vp->v_type == VDIR && haverwlock) { 6000*7c478bd9Sstevel@tonic-gate rw_exit(&ip->i_rwlock); 6001*7c478bd9Sstevel@tonic-gate haverwlock = 0; 6002*7c478bd9Sstevel@tonic-gate } 6003*7c478bd9Sstevel@tonic-gate goto again; 6004*7c478bd9Sstevel@tonic-gate } 6005*7c478bd9Sstevel@tonic-gate /* 6006*7c478bd9Sstevel@tonic-gate * If we need to reacquire the lock then it is safe to do so 6007*7c478bd9Sstevel@tonic-gate * as a reader. This is because ufs_rwunlock(), which will be 6008*7c478bd9Sstevel@tonic-gate * called by our caller after we return, does not differentiate 6009*7c478bd9Sstevel@tonic-gate * between shared and exclusive locks. 6010*7c478bd9Sstevel@tonic-gate */ 6011*7c478bd9Sstevel@tonic-gate if (!haverwlock) { 6012*7c478bd9Sstevel@tonic-gate ASSERT(vp->v_type == VDIR); 6013*7c478bd9Sstevel@tonic-gate rw_enter(&ip->i_rwlock, RW_READER); 6014*7c478bd9Sstevel@tonic-gate } 6015*7c478bd9Sstevel@tonic-gate 6016*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_UFS, TR_UFS_SETSECATTR_END, 6017*7c478bd9Sstevel@tonic-gate "ufs_setsecattr_end:vp %p", vp); 6018*7c478bd9Sstevel@tonic-gate return (err); 6019*7c478bd9Sstevel@tonic-gate } 6020