1*fa9e4066Sahrens /* 2*fa9e4066Sahrens * CDDL HEADER START 3*fa9e4066Sahrens * 4*fa9e4066Sahrens * The contents of this file are subject to the terms of the 5*fa9e4066Sahrens * Common Development and Distribution License, Version 1.0 only 6*fa9e4066Sahrens * (the "License"). You may not use this file except in compliance 7*fa9e4066Sahrens * with the License. 8*fa9e4066Sahrens * 9*fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 11*fa9e4066Sahrens * See the License for the specific language governing permissions 12*fa9e4066Sahrens * and limitations under the License. 13*fa9e4066Sahrens * 14*fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 15*fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 17*fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 18*fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 19*fa9e4066Sahrens * 20*fa9e4066Sahrens * CDDL HEADER END 21*fa9e4066Sahrens */ 22*fa9e4066Sahrens /* 23*fa9e4066Sahrens * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*fa9e4066Sahrens * Use is subject to license terms. 25*fa9e4066Sahrens */ 26*fa9e4066Sahrens 27*fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28*fa9e4066Sahrens 29*fa9e4066Sahrens #include <sys/types.h> 30*fa9e4066Sahrens #include <sys/param.h> 31*fa9e4066Sahrens #include <sys/systm.h> 32*fa9e4066Sahrens #include <sys/sysmacros.h> 33*fa9e4066Sahrens #include <sys/kmem.h> 34*fa9e4066Sahrens #include <sys/pathname.h> 35*fa9e4066Sahrens #include <sys/acl.h> 36*fa9e4066Sahrens #include <sys/vnode.h> 37*fa9e4066Sahrens #include <sys/vfs.h> 38*fa9e4066Sahrens #include <sys/mntent.h> 39*fa9e4066Sahrens #include <sys/mount.h> 40*fa9e4066Sahrens #include <sys/cmn_err.h> 41*fa9e4066Sahrens #include "fs/fs_subr.h" 42*fa9e4066Sahrens #include <sys/zfs_znode.h> 43*fa9e4066Sahrens #include <sys/zil.h> 44*fa9e4066Sahrens #include <sys/fs/zfs.h> 45*fa9e4066Sahrens #include <sys/dmu.h> 46*fa9e4066Sahrens #include <sys/dsl_prop.h> 47*fa9e4066Sahrens #include <sys/spa.h> 48*fa9e4066Sahrens #include <sys/zap.h> 49*fa9e4066Sahrens #include <sys/varargs.h> 50*fa9e4066Sahrens #include <sys/policy.h> 51*fa9e4066Sahrens #include <sys/atomic.h> 52*fa9e4066Sahrens #include <sys/mkdev.h> 53*fa9e4066Sahrens #include <sys/modctl.h> 54*fa9e4066Sahrens #include <sys/zfs_ioctl.h> 55*fa9e4066Sahrens #include <sys/zfs_ctldir.h> 56*fa9e4066Sahrens 57*fa9e4066Sahrens int zfsfstype; 58*fa9e4066Sahrens vfsops_t *zfs_vfsops = NULL; 59*fa9e4066Sahrens static major_t zfs_major; 60*fa9e4066Sahrens static minor_t zfs_minor; 61*fa9e4066Sahrens static kmutex_t zfs_dev_mtx; 62*fa9e4066Sahrens 63*fa9e4066Sahrens static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); 64*fa9e4066Sahrens static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); 65*fa9e4066Sahrens static int zfs_root(vfs_t *vfsp, vnode_t **vpp); 66*fa9e4066Sahrens static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); 67*fa9e4066Sahrens static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); 68*fa9e4066Sahrens static void zfs_freevfs(vfs_t *vfsp); 69*fa9e4066Sahrens static void zfs_objset_close(zfsvfs_t *zfsvfs); 70*fa9e4066Sahrens 71*fa9e4066Sahrens static const fs_operation_def_t zfs_vfsops_template[] = { 72*fa9e4066Sahrens VFSNAME_MOUNT, zfs_mount, 73*fa9e4066Sahrens VFSNAME_UNMOUNT, zfs_umount, 74*fa9e4066Sahrens VFSNAME_ROOT, zfs_root, 75*fa9e4066Sahrens VFSNAME_STATVFS, zfs_statvfs, 76*fa9e4066Sahrens VFSNAME_SYNC, (fs_generic_func_p) zfs_sync, 77*fa9e4066Sahrens VFSNAME_VGET, zfs_vget, 78*fa9e4066Sahrens VFSNAME_FREEVFS, (fs_generic_func_p) zfs_freevfs, 79*fa9e4066Sahrens NULL, NULL 80*fa9e4066Sahrens }; 81*fa9e4066Sahrens 82*fa9e4066Sahrens static const fs_operation_def_t zfs_vfsops_eio_template[] = { 83*fa9e4066Sahrens VFSNAME_FREEVFS, (fs_generic_func_p) zfs_freevfs, 84*fa9e4066Sahrens NULL, NULL 85*fa9e4066Sahrens }; 86*fa9e4066Sahrens 87*fa9e4066Sahrens /* 88*fa9e4066Sahrens * We need to keep a count of active fs's. 89*fa9e4066Sahrens * This is necessary to prevent our module 90*fa9e4066Sahrens * from being unloaded after a umount -f 91*fa9e4066Sahrens */ 92*fa9e4066Sahrens static uint32_t zfs_active_fs_count = 0; 93*fa9e4066Sahrens 94*fa9e4066Sahrens static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; 95*fa9e4066Sahrens static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; 96*fa9e4066Sahrens 97*fa9e4066Sahrens static mntopt_t mntopts[] = { 98*fa9e4066Sahrens { MNTOPT_XATTR, NULL, NULL, MO_NODISPLAY|MO_DEFAULT, NULL }, 99*fa9e4066Sahrens { MNTOPT_NOATIME, noatime_cancel, NULL, MO_DEFAULT, NULL }, 100*fa9e4066Sahrens { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } 101*fa9e4066Sahrens }; 102*fa9e4066Sahrens 103*fa9e4066Sahrens static mntopts_t zfs_mntopts = { 104*fa9e4066Sahrens sizeof (mntopts) / sizeof (mntopt_t), 105*fa9e4066Sahrens mntopts 106*fa9e4066Sahrens }; 107*fa9e4066Sahrens 108*fa9e4066Sahrens /*ARGSUSED*/ 109*fa9e4066Sahrens int 110*fa9e4066Sahrens zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 111*fa9e4066Sahrens { 112*fa9e4066Sahrens /* 113*fa9e4066Sahrens * Data integrity is job one. We don't want a compromised kernel 114*fa9e4066Sahrens * writing to the storage pool, so we never sync during panic. 115*fa9e4066Sahrens */ 116*fa9e4066Sahrens if (panicstr) 117*fa9e4066Sahrens return (0); 118*fa9e4066Sahrens 119*fa9e4066Sahrens /* 120*fa9e4066Sahrens * SYNC_ATTR is used by fsflush() to force old filesystems like UFS 121*fa9e4066Sahrens * to sync metadata, which they would otherwise cache indefinitely. 122*fa9e4066Sahrens * Semantically, the only requirement is that the sync be initiated. 123*fa9e4066Sahrens * The DMU syncs out txgs frequently, so there's nothing to do. 124*fa9e4066Sahrens */ 125*fa9e4066Sahrens if (flag & SYNC_ATTR) 126*fa9e4066Sahrens return (0); 127*fa9e4066Sahrens 128*fa9e4066Sahrens if (vfsp != NULL) { 129*fa9e4066Sahrens /* 130*fa9e4066Sahrens * Sync a specific filesystem. 131*fa9e4066Sahrens */ 132*fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 133*fa9e4066Sahrens 134*fa9e4066Sahrens ZFS_ENTER(zfsvfs); 135*fa9e4066Sahrens if (zfsvfs->z_log != NULL) 136*fa9e4066Sahrens zil_commit(zfsvfs->z_log, UINT64_MAX, FSYNC); 137*fa9e4066Sahrens else 138*fa9e4066Sahrens txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 139*fa9e4066Sahrens ZFS_EXIT(zfsvfs); 140*fa9e4066Sahrens } else { 141*fa9e4066Sahrens /* 142*fa9e4066Sahrens * Sync all ZFS filesystems. This is what happens when you 143*fa9e4066Sahrens * run sync(1M). Unlike other filesystems, ZFS honors the 144*fa9e4066Sahrens * request by waiting for all pools to commit all dirty data. 145*fa9e4066Sahrens */ 146*fa9e4066Sahrens spa_sync_allpools(); 147*fa9e4066Sahrens } 148*fa9e4066Sahrens 149*fa9e4066Sahrens return (0); 150*fa9e4066Sahrens } 151*fa9e4066Sahrens 152*fa9e4066Sahrens static void 153*fa9e4066Sahrens atime_changed_cb(void *arg, uint64_t newval) 154*fa9e4066Sahrens { 155*fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 156*fa9e4066Sahrens 157*fa9e4066Sahrens if (newval == TRUE) { 158*fa9e4066Sahrens zfsvfs->z_atime = TRUE; 159*fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 160*fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 161*fa9e4066Sahrens } else { 162*fa9e4066Sahrens zfsvfs->z_atime = FALSE; 163*fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 164*fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 165*fa9e4066Sahrens } 166*fa9e4066Sahrens } 167*fa9e4066Sahrens 168*fa9e4066Sahrens static void 169*fa9e4066Sahrens blksz_changed_cb(void *arg, uint64_t newval) 170*fa9e4066Sahrens { 171*fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 172*fa9e4066Sahrens 173*fa9e4066Sahrens if (newval < SPA_MINBLOCKSIZE || 174*fa9e4066Sahrens newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 175*fa9e4066Sahrens newval = SPA_MAXBLOCKSIZE; 176*fa9e4066Sahrens 177*fa9e4066Sahrens zfsvfs->z_max_blksz = newval; 178*fa9e4066Sahrens zfsvfs->z_vfs->vfs_bsize = newval; 179*fa9e4066Sahrens } 180*fa9e4066Sahrens 181*fa9e4066Sahrens static void 182*fa9e4066Sahrens readonly_changed_cb(void *arg, uint64_t newval) 183*fa9e4066Sahrens { 184*fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 185*fa9e4066Sahrens 186*fa9e4066Sahrens if (newval) { 187*fa9e4066Sahrens /* XXX locking on vfs_flag? */ 188*fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 189*fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 190*fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 191*fa9e4066Sahrens (void) zfs_delete_thread_target(zfsvfs, 0); 192*fa9e4066Sahrens } else { 193*fa9e4066Sahrens /* XXX locking on vfs_flag? */ 194*fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 195*fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 196*fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 197*fa9e4066Sahrens (void) zfs_delete_thread_target(zfsvfs, 1); 198*fa9e4066Sahrens } 199*fa9e4066Sahrens } 200*fa9e4066Sahrens 201*fa9e4066Sahrens static void 202*fa9e4066Sahrens devices_changed_cb(void *arg, uint64_t newval) 203*fa9e4066Sahrens { 204*fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 205*fa9e4066Sahrens 206*fa9e4066Sahrens if (newval == FALSE) { 207*fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; 208*fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); 209*fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); 210*fa9e4066Sahrens } else { 211*fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; 212*fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); 213*fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); 214*fa9e4066Sahrens } 215*fa9e4066Sahrens } 216*fa9e4066Sahrens 217*fa9e4066Sahrens static void 218*fa9e4066Sahrens setuid_changed_cb(void *arg, uint64_t newval) 219*fa9e4066Sahrens { 220*fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 221*fa9e4066Sahrens 222*fa9e4066Sahrens if (newval == FALSE) { 223*fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 224*fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 225*fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 226*fa9e4066Sahrens } else { 227*fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 228*fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 229*fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 230*fa9e4066Sahrens } 231*fa9e4066Sahrens } 232*fa9e4066Sahrens 233*fa9e4066Sahrens static void 234*fa9e4066Sahrens exec_changed_cb(void *arg, uint64_t newval) 235*fa9e4066Sahrens { 236*fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 237*fa9e4066Sahrens 238*fa9e4066Sahrens if (newval == FALSE) { 239*fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 240*fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 241*fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 242*fa9e4066Sahrens } else { 243*fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 244*fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 245*fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 246*fa9e4066Sahrens } 247*fa9e4066Sahrens } 248*fa9e4066Sahrens 249*fa9e4066Sahrens static void 250*fa9e4066Sahrens snapdir_changed_cb(void *arg, uint64_t newval) 251*fa9e4066Sahrens { 252*fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 253*fa9e4066Sahrens 254*fa9e4066Sahrens zfsvfs->z_show_ctldir = newval; 255*fa9e4066Sahrens } 256*fa9e4066Sahrens 257*fa9e4066Sahrens static void 258*fa9e4066Sahrens acl_mode_changed_cb(void *arg, uint64_t newval) 259*fa9e4066Sahrens { 260*fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 261*fa9e4066Sahrens 262*fa9e4066Sahrens zfsvfs->z_acl_mode = newval; 263*fa9e4066Sahrens } 264*fa9e4066Sahrens 265*fa9e4066Sahrens static void 266*fa9e4066Sahrens acl_inherit_changed_cb(void *arg, uint64_t newval) 267*fa9e4066Sahrens { 268*fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 269*fa9e4066Sahrens 270*fa9e4066Sahrens zfsvfs->z_acl_inherit = newval; 271*fa9e4066Sahrens } 272*fa9e4066Sahrens 273*fa9e4066Sahrens /*ARGSUSED*/ 274*fa9e4066Sahrens static int 275*fa9e4066Sahrens zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 276*fa9e4066Sahrens { 277*fa9e4066Sahrens zfsvfs_t *zfsvfs = NULL; 278*fa9e4066Sahrens znode_t *zp = NULL; 279*fa9e4066Sahrens vnode_t *vp = NULL; 280*fa9e4066Sahrens objset_t *os = NULL; 281*fa9e4066Sahrens struct dsl_dataset *ds; 282*fa9e4066Sahrens char *osname; 283*fa9e4066Sahrens uint64_t readonly, recordsize; 284*fa9e4066Sahrens pathname_t spn; 285*fa9e4066Sahrens dev_t mount_dev; 286*fa9e4066Sahrens major_t new_major; 287*fa9e4066Sahrens int mode; 288*fa9e4066Sahrens int error = 0; 289*fa9e4066Sahrens uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? 290*fa9e4066Sahrens UIO_SYSSPACE : UIO_USERSPACE; 291*fa9e4066Sahrens int canwrite; 292*fa9e4066Sahrens 293*fa9e4066Sahrens if (mvp->v_type != VDIR) 294*fa9e4066Sahrens return (ENOTDIR); 295*fa9e4066Sahrens 296*fa9e4066Sahrens mutex_enter(&mvp->v_lock); 297*fa9e4066Sahrens if ((uap->flags & MS_REMOUNT) == 0 && 298*fa9e4066Sahrens (uap->flags & MS_OVERLAY) == 0 && 299*fa9e4066Sahrens (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 300*fa9e4066Sahrens mutex_exit(&mvp->v_lock); 301*fa9e4066Sahrens return (EBUSY); 302*fa9e4066Sahrens } 303*fa9e4066Sahrens mutex_exit(&mvp->v_lock); 304*fa9e4066Sahrens 305*fa9e4066Sahrens /* 306*fa9e4066Sahrens * ZFS does not support passing unparsed data in via MS_DATA. 307*fa9e4066Sahrens * Users should use the MS_OPTIONSTR interface; this means 308*fa9e4066Sahrens * that all option parsing is already done and the options struct 309*fa9e4066Sahrens * can be interrogated. 310*fa9e4066Sahrens */ 311*fa9e4066Sahrens if ((uap->flags & MS_DATA) && uap->datalen > 0) 312*fa9e4066Sahrens return (EINVAL); 313*fa9e4066Sahrens 314*fa9e4066Sahrens /* 315*fa9e4066Sahrens * When doing a remount, we simply refresh our temporary properties 316*fa9e4066Sahrens * according to those options set in the current VFS options. 317*fa9e4066Sahrens */ 318*fa9e4066Sahrens if (uap->flags & MS_REMOUNT) { 319*fa9e4066Sahrens zfsvfs = vfsp->vfs_data; 320*fa9e4066Sahrens 321*fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 322*fa9e4066Sahrens readonly_changed_cb(zfsvfs, B_TRUE); 323*fa9e4066Sahrens else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 324*fa9e4066Sahrens if (dmu_objset_is_snapshot(zfsvfs->z_os)) 325*fa9e4066Sahrens return (EROFS); 326*fa9e4066Sahrens readonly_changed_cb(zfsvfs, B_FALSE); 327*fa9e4066Sahrens } 328*fa9e4066Sahrens 329*fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 330*fa9e4066Sahrens devices_changed_cb(zfsvfs, B_FALSE); 331*fa9e4066Sahrens setuid_changed_cb(zfsvfs, B_FALSE); 332*fa9e4066Sahrens } else { 333*fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 334*fa9e4066Sahrens devices_changed_cb(zfsvfs, B_FALSE); 335*fa9e4066Sahrens else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) 336*fa9e4066Sahrens devices_changed_cb(zfsvfs, B_TRUE); 337*fa9e4066Sahrens 338*fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 339*fa9e4066Sahrens setuid_changed_cb(zfsvfs, B_FALSE); 340*fa9e4066Sahrens else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 341*fa9e4066Sahrens setuid_changed_cb(zfsvfs, B_TRUE); 342*fa9e4066Sahrens } 343*fa9e4066Sahrens 344*fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 345*fa9e4066Sahrens exec_changed_cb(zfsvfs, B_FALSE); 346*fa9e4066Sahrens else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 347*fa9e4066Sahrens exec_changed_cb(zfsvfs, B_TRUE); 348*fa9e4066Sahrens 349*fa9e4066Sahrens return (0); 350*fa9e4066Sahrens } 351*fa9e4066Sahrens 352*fa9e4066Sahrens /* 353*fa9e4066Sahrens * Get the objset name (the "special" mount argument). 354*fa9e4066Sahrens */ 355*fa9e4066Sahrens if (error = pn_get(uap->spec, fromspace, &spn)) 356*fa9e4066Sahrens return (error); 357*fa9e4066Sahrens 358*fa9e4066Sahrens osname = spn.pn_path; 359*fa9e4066Sahrens 360*fa9e4066Sahrens if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) 361*fa9e4066Sahrens goto out; 362*fa9e4066Sahrens 363*fa9e4066Sahrens /* 364*fa9e4066Sahrens * Refuse to mount a filesystem if we are in a local zone and the 365*fa9e4066Sahrens * dataset is not visible. 366*fa9e4066Sahrens */ 367*fa9e4066Sahrens if (!INGLOBALZONE(curproc) && 368*fa9e4066Sahrens (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 369*fa9e4066Sahrens error = EPERM; 370*fa9e4066Sahrens goto out; 371*fa9e4066Sahrens } 372*fa9e4066Sahrens 373*fa9e4066Sahrens /* 374*fa9e4066Sahrens * Initialize the zfs-specific filesystem structure. 375*fa9e4066Sahrens * Should probably make this a kmem cache, shuffle fields, 376*fa9e4066Sahrens * and just bzero upto z_hold_mtx[]. 377*fa9e4066Sahrens */ 378*fa9e4066Sahrens zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 379*fa9e4066Sahrens zfsvfs->z_vfs = vfsp; 380*fa9e4066Sahrens zfsvfs->z_parent = zfsvfs; 381*fa9e4066Sahrens zfsvfs->z_assign = TXG_NOWAIT; 382*fa9e4066Sahrens zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 383*fa9e4066Sahrens zfsvfs->z_show_ctldir = VISIBLE; 384*fa9e4066Sahrens 385*fa9e4066Sahrens mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 386*fa9e4066Sahrens list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 387*fa9e4066Sahrens offsetof(znode_t, z_link_node)); 388*fa9e4066Sahrens rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); 389*fa9e4066Sahrens 390*fa9e4066Sahrens /* 391*fa9e4066Sahrens * Initialize the generic filesystem structure. 392*fa9e4066Sahrens */ 393*fa9e4066Sahrens vfsp->vfs_bcount = 0; 394*fa9e4066Sahrens vfsp->vfs_data = NULL; 395*fa9e4066Sahrens 396*fa9e4066Sahrens /* 397*fa9e4066Sahrens * Create a unique device for the mount. 398*fa9e4066Sahrens */ 399*fa9e4066Sahrens do { 400*fa9e4066Sahrens ASSERT3U(zfs_minor, <=, MAXMIN32); 401*fa9e4066Sahrens int start = zfs_minor; 402*fa9e4066Sahrens do { 403*fa9e4066Sahrens mutex_enter(&zfs_dev_mtx); 404*fa9e4066Sahrens zfs_minor++; 405*fa9e4066Sahrens if (zfs_minor > MAXMIN32) 406*fa9e4066Sahrens zfs_minor = 0; 407*fa9e4066Sahrens mount_dev = makedevice(zfs_major, zfs_minor); 408*fa9e4066Sahrens mutex_exit(&zfs_dev_mtx); 409*fa9e4066Sahrens } while (vfs_devismounted(mount_dev) && zfs_minor != start); 410*fa9e4066Sahrens if (zfs_minor == start) { 411*fa9e4066Sahrens /* 412*fa9e4066Sahrens * We are using all ~262,000 minor numbers 413*fa9e4066Sahrens * for the current major number. Create a 414*fa9e4066Sahrens * new major number. 415*fa9e4066Sahrens */ 416*fa9e4066Sahrens if ((new_major = getudev()) == (major_t)-1) { 417*fa9e4066Sahrens cmn_err(CE_WARN, 418*fa9e4066Sahrens "zfs_mount: Can't get unique" 419*fa9e4066Sahrens " major device number."); 420*fa9e4066Sahrens goto out; 421*fa9e4066Sahrens } 422*fa9e4066Sahrens mutex_enter(&zfs_dev_mtx); 423*fa9e4066Sahrens zfs_major = new_major; 424*fa9e4066Sahrens zfs_minor = 0; 425*fa9e4066Sahrens mutex_exit(&zfs_dev_mtx); 426*fa9e4066Sahrens } else { 427*fa9e4066Sahrens break; 428*fa9e4066Sahrens } 429*fa9e4066Sahrens /* CONSTANTCONDITION */ 430*fa9e4066Sahrens } while (1); 431*fa9e4066Sahrens 432*fa9e4066Sahrens ASSERT(vfs_devismounted(mount_dev) == 0); 433*fa9e4066Sahrens 434*fa9e4066Sahrens if (dsl_prop_get_integer(osname, "recordsize", &recordsize, NULL) != 0) 435*fa9e4066Sahrens recordsize = SPA_MAXBLOCKSIZE; 436*fa9e4066Sahrens 437*fa9e4066Sahrens vfsp->vfs_dev = mount_dev; 438*fa9e4066Sahrens vfsp->vfs_fstype = zfsfstype; 439*fa9e4066Sahrens vfsp->vfs_bsize = recordsize; 440*fa9e4066Sahrens vfsp->vfs_flag |= VFS_NOTRUNC; 441*fa9e4066Sahrens vfsp->vfs_data = zfsvfs; 442*fa9e4066Sahrens 443*fa9e4066Sahrens error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL); 444*fa9e4066Sahrens if (error) 445*fa9e4066Sahrens goto out; 446*fa9e4066Sahrens 447*fa9e4066Sahrens if (readonly) 448*fa9e4066Sahrens mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 449*fa9e4066Sahrens else 450*fa9e4066Sahrens mode = DS_MODE_PRIMARY; 451*fa9e4066Sahrens 452*fa9e4066Sahrens error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 453*fa9e4066Sahrens if (error == EROFS) { 454*fa9e4066Sahrens mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 455*fa9e4066Sahrens error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 456*fa9e4066Sahrens &zfsvfs->z_os); 457*fa9e4066Sahrens } 458*fa9e4066Sahrens os = zfsvfs->z_os; 459*fa9e4066Sahrens 460*fa9e4066Sahrens if (error) 461*fa9e4066Sahrens goto out; 462*fa9e4066Sahrens 463*fa9e4066Sahrens if (error = zfs_init_fs(zfsvfs, &zp, cr)) 464*fa9e4066Sahrens goto out; 465*fa9e4066Sahrens 466*fa9e4066Sahrens if (dmu_objset_is_snapshot(os)) { 467*fa9e4066Sahrens ASSERT(mode & DS_MODE_READONLY); 468*fa9e4066Sahrens atime_changed_cb(zfsvfs, B_FALSE); 469*fa9e4066Sahrens readonly_changed_cb(zfsvfs, B_TRUE); 470*fa9e4066Sahrens zfsvfs->z_issnap = B_TRUE; 471*fa9e4066Sahrens } else { 472*fa9e4066Sahrens int do_readonly = FALSE, readonly; 473*fa9e4066Sahrens int do_setuid = FALSE, setuid; 474*fa9e4066Sahrens int do_exec = FALSE, exec; 475*fa9e4066Sahrens int do_devices = FALSE, devices; 476*fa9e4066Sahrens 477*fa9e4066Sahrens /* 478*fa9e4066Sahrens * Start a delete thread running. 479*fa9e4066Sahrens */ 480*fa9e4066Sahrens (void) zfs_delete_thread_target(zfsvfs, 1); 481*fa9e4066Sahrens 482*fa9e4066Sahrens /* 483*fa9e4066Sahrens * Parse and replay the intent log. 484*fa9e4066Sahrens */ 485*fa9e4066Sahrens zil_replay(os, zfsvfs, &zfsvfs->z_assign, zfs_replay_vector, 486*fa9e4066Sahrens (void (*)(void *))zfs_delete_wait_empty); 487*fa9e4066Sahrens 488*fa9e4066Sahrens if (!zil_disable) 489*fa9e4066Sahrens zfsvfs->z_log = zil_open(os, zfs_get_data); 490*fa9e4066Sahrens 491*fa9e4066Sahrens /* 492*fa9e4066Sahrens * The act of registering our callbacks will destroy any mount 493*fa9e4066Sahrens * options we may have. In order to enable temporary overrides 494*fa9e4066Sahrens * of mount options, we stash away the current values and 495*fa9e4066Sahrens * restore them after we register the callbacks. 496*fa9e4066Sahrens */ 497*fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 498*fa9e4066Sahrens readonly = B_TRUE; 499*fa9e4066Sahrens do_readonly = B_TRUE; 500*fa9e4066Sahrens } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 501*fa9e4066Sahrens readonly = B_FALSE; 502*fa9e4066Sahrens do_readonly = B_TRUE; 503*fa9e4066Sahrens } 504*fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 505*fa9e4066Sahrens devices = B_FALSE; 506*fa9e4066Sahrens setuid = B_FALSE; 507*fa9e4066Sahrens do_devices = B_TRUE; 508*fa9e4066Sahrens do_setuid = B_TRUE; 509*fa9e4066Sahrens } else { 510*fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 511*fa9e4066Sahrens devices = B_FALSE; 512*fa9e4066Sahrens do_devices = B_TRUE; 513*fa9e4066Sahrens } else if (vfs_optionisset(vfsp, 514*fa9e4066Sahrens MNTOPT_DEVICES, NULL)) { 515*fa9e4066Sahrens devices = B_TRUE; 516*fa9e4066Sahrens do_devices = B_TRUE; 517*fa9e4066Sahrens } 518*fa9e4066Sahrens 519*fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 520*fa9e4066Sahrens setuid = B_FALSE; 521*fa9e4066Sahrens do_setuid = B_TRUE; 522*fa9e4066Sahrens } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 523*fa9e4066Sahrens setuid = B_TRUE; 524*fa9e4066Sahrens do_setuid = B_TRUE; 525*fa9e4066Sahrens } 526*fa9e4066Sahrens } 527*fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 528*fa9e4066Sahrens exec = B_FALSE; 529*fa9e4066Sahrens do_exec = B_TRUE; 530*fa9e4066Sahrens } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 531*fa9e4066Sahrens exec = B_TRUE; 532*fa9e4066Sahrens do_exec = B_TRUE; 533*fa9e4066Sahrens } 534*fa9e4066Sahrens 535*fa9e4066Sahrens /* 536*fa9e4066Sahrens * Register property callbacks. 537*fa9e4066Sahrens */ 538*fa9e4066Sahrens ds = dmu_objset_ds(os); 539*fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "atime", atime_changed_cb, 540*fa9e4066Sahrens zfsvfs) == 0); 541*fa9e4066Sahrens 542*fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "recordsize", blksz_changed_cb, 543*fa9e4066Sahrens zfsvfs) == 0); 544*fa9e4066Sahrens 545*fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "readonly", readonly_changed_cb, 546*fa9e4066Sahrens zfsvfs) == 0); 547*fa9e4066Sahrens 548*fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "devices", devices_changed_cb, 549*fa9e4066Sahrens zfsvfs) == 0); 550*fa9e4066Sahrens 551*fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "setuid", setuid_changed_cb, 552*fa9e4066Sahrens zfsvfs) == 0); 553*fa9e4066Sahrens 554*fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "exec", exec_changed_cb, 555*fa9e4066Sahrens zfsvfs) == 0); 556*fa9e4066Sahrens 557*fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "snapdir", snapdir_changed_cb, 558*fa9e4066Sahrens zfsvfs) == 0); 559*fa9e4066Sahrens 560*fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "aclmode", acl_mode_changed_cb, 561*fa9e4066Sahrens zfsvfs) == 0); 562*fa9e4066Sahrens 563*fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "aclinherit", 564*fa9e4066Sahrens acl_inherit_changed_cb, zfsvfs) == 0); 565*fa9e4066Sahrens 566*fa9e4066Sahrens 567*fa9e4066Sahrens /* 568*fa9e4066Sahrens * Invoke our callbacks to restore temporary mount options. 569*fa9e4066Sahrens */ 570*fa9e4066Sahrens if (do_readonly) 571*fa9e4066Sahrens readonly_changed_cb(zfsvfs, readonly); 572*fa9e4066Sahrens if (do_setuid) 573*fa9e4066Sahrens setuid_changed_cb(zfsvfs, setuid); 574*fa9e4066Sahrens if (do_exec) 575*fa9e4066Sahrens exec_changed_cb(zfsvfs, exec); 576*fa9e4066Sahrens if (do_devices) 577*fa9e4066Sahrens devices_changed_cb(zfsvfs, devices); 578*fa9e4066Sahrens } 579*fa9e4066Sahrens 580*fa9e4066Sahrens vp = ZTOV(zp); 581*fa9e4066Sahrens if (!zfsvfs->z_issnap) 582*fa9e4066Sahrens zfsctl_create(zfsvfs); 583*fa9e4066Sahrens out: 584*fa9e4066Sahrens if (error) { 585*fa9e4066Sahrens if (zp) 586*fa9e4066Sahrens VN_RELE(vp); 587*fa9e4066Sahrens 588*fa9e4066Sahrens if (zfsvfs) { 589*fa9e4066Sahrens if (os) 590*fa9e4066Sahrens dmu_objset_close(os); 591*fa9e4066Sahrens kmem_free(zfsvfs, sizeof (zfsvfs_t)); 592*fa9e4066Sahrens } 593*fa9e4066Sahrens } else { 594*fa9e4066Sahrens atomic_add_32(&zfs_active_fs_count, 1); 595*fa9e4066Sahrens VN_RELE(vp); 596*fa9e4066Sahrens } 597*fa9e4066Sahrens 598*fa9e4066Sahrens pn_free(&spn); 599*fa9e4066Sahrens return (error); 600*fa9e4066Sahrens } 601*fa9e4066Sahrens 602*fa9e4066Sahrens static int 603*fa9e4066Sahrens zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) 604*fa9e4066Sahrens { 605*fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 606*fa9e4066Sahrens dmu_objset_stats_t dstats; 607*fa9e4066Sahrens dev32_t d32; 608*fa9e4066Sahrens 609*fa9e4066Sahrens ZFS_ENTER(zfsvfs); 610*fa9e4066Sahrens 611*fa9e4066Sahrens dmu_objset_stats(zfsvfs->z_os, &dstats); 612*fa9e4066Sahrens 613*fa9e4066Sahrens /* 614*fa9e4066Sahrens * The underlying storage pool actually uses multiple block sizes. 615*fa9e4066Sahrens * We report the fragsize as the smallest block size we support, 616*fa9e4066Sahrens * and we report our blocksize as the filesystem's maximum blocksize. 617*fa9e4066Sahrens */ 618*fa9e4066Sahrens statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; 619*fa9e4066Sahrens statp->f_bsize = zfsvfs->z_max_blksz; 620*fa9e4066Sahrens 621*fa9e4066Sahrens /* 622*fa9e4066Sahrens * The following report "total" blocks of various kinds in the 623*fa9e4066Sahrens * file system, but reported in terms of f_frsize - the 624*fa9e4066Sahrens * "fragment" size. 625*fa9e4066Sahrens */ 626*fa9e4066Sahrens 627*fa9e4066Sahrens statp->f_blocks = 628*fa9e4066Sahrens (dstats.dds_space_refd + dstats.dds_available) >> SPA_MINBLOCKSHIFT; 629*fa9e4066Sahrens statp->f_bfree = dstats.dds_available >> SPA_MINBLOCKSHIFT; 630*fa9e4066Sahrens statp->f_bavail = statp->f_bfree; /* no root reservation */ 631*fa9e4066Sahrens 632*fa9e4066Sahrens /* 633*fa9e4066Sahrens * statvfs() should really be called statufs(), because it assumes 634*fa9e4066Sahrens * static metadata. ZFS doesn't preallocate files, so the best 635*fa9e4066Sahrens * we can do is report the max that could possibly fit in f_files, 636*fa9e4066Sahrens * and that minus the number actually used in f_ffree. 637*fa9e4066Sahrens * For f_ffree, report the smaller of the number of object available 638*fa9e4066Sahrens * and the number of blocks (each object will take at least a block). 639*fa9e4066Sahrens */ 640*fa9e4066Sahrens statp->f_ffree = MIN(dstats.dds_objects_avail, statp->f_bfree); 641*fa9e4066Sahrens statp->f_favail = statp->f_ffree; /* no "root reservation" */ 642*fa9e4066Sahrens statp->f_files = statp->f_ffree + dstats.dds_objects_used; 643*fa9e4066Sahrens 644*fa9e4066Sahrens (void) cmpldev(&d32, vfsp->vfs_dev); 645*fa9e4066Sahrens statp->f_fsid = d32; 646*fa9e4066Sahrens 647*fa9e4066Sahrens /* 648*fa9e4066Sahrens * We're a zfs filesystem. 649*fa9e4066Sahrens */ 650*fa9e4066Sahrens (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); 651*fa9e4066Sahrens 652*fa9e4066Sahrens statp->f_flag = 0; 653*fa9e4066Sahrens 654*fa9e4066Sahrens statp->f_namemax = ZFS_MAXNAMELEN; 655*fa9e4066Sahrens 656*fa9e4066Sahrens /* 657*fa9e4066Sahrens * We have all of 32 characters to stuff a string here. 658*fa9e4066Sahrens * Is there anything useful we could/should provide? 659*fa9e4066Sahrens */ 660*fa9e4066Sahrens bzero(statp->f_fstr, sizeof (statp->f_fstr)); 661*fa9e4066Sahrens 662*fa9e4066Sahrens ZFS_EXIT(zfsvfs); 663*fa9e4066Sahrens return (0); 664*fa9e4066Sahrens } 665*fa9e4066Sahrens 666*fa9e4066Sahrens static int 667*fa9e4066Sahrens zfs_root(vfs_t *vfsp, vnode_t **vpp) 668*fa9e4066Sahrens { 669*fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 670*fa9e4066Sahrens znode_t *rootzp; 671*fa9e4066Sahrens int error; 672*fa9e4066Sahrens 673*fa9e4066Sahrens ZFS_ENTER(zfsvfs); 674*fa9e4066Sahrens 675*fa9e4066Sahrens error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 676*fa9e4066Sahrens if (error == 0) 677*fa9e4066Sahrens *vpp = ZTOV(rootzp); 678*fa9e4066Sahrens 679*fa9e4066Sahrens ZFS_EXIT(zfsvfs); 680*fa9e4066Sahrens return (error); 681*fa9e4066Sahrens } 682*fa9e4066Sahrens 683*fa9e4066Sahrens /*ARGSUSED*/ 684*fa9e4066Sahrens static int 685*fa9e4066Sahrens zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) 686*fa9e4066Sahrens { 687*fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 688*fa9e4066Sahrens int ret; 689*fa9e4066Sahrens 690*fa9e4066Sahrens if ((ret = secpolicy_fs_unmount(cr, vfsp)) != 0) 691*fa9e4066Sahrens return (ret); 692*fa9e4066Sahrens 693*fa9e4066Sahrens /* 694*fa9e4066Sahrens * Unmount any snapshots mounted under .zfs before unmounting the 695*fa9e4066Sahrens * dataset itself. 696*fa9e4066Sahrens */ 697*fa9e4066Sahrens if (zfsvfs->z_ctldir != NULL && 698*fa9e4066Sahrens (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 699*fa9e4066Sahrens return (ret); 700*fa9e4066Sahrens 701*fa9e4066Sahrens if (fflag & MS_FORCE) { 702*fa9e4066Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 703*fa9e4066Sahrens zfsvfs->z_unmounted1 = B_TRUE; 704*fa9e4066Sahrens 705*fa9e4066Sahrens /* 706*fa9e4066Sahrens * Wait for all zfs threads to leave zfs. 707*fa9e4066Sahrens * Grabbing a rwlock as reader in all vops and 708*fa9e4066Sahrens * as writer here doesn't work because it too easy to get 709*fa9e4066Sahrens * multiple reader enters as zfs can re-enter itself. 710*fa9e4066Sahrens * This can lead to deadlock if there is an intervening 711*fa9e4066Sahrens * rw_enter as writer. 712*fa9e4066Sahrens * So a file system threads ref count (z_op_cnt) is used. 713*fa9e4066Sahrens * A polling loop on z_op_cnt may seem inefficient, but 714*fa9e4066Sahrens * - this saves all threads on exit from having to grab a 715*fa9e4066Sahrens * mutex in order to cv_signal 716*fa9e4066Sahrens * - only occurs on forced unmount in the rare case when 717*fa9e4066Sahrens * there are outstanding threads within the file system. 718*fa9e4066Sahrens */ 719*fa9e4066Sahrens while (zfsvfs->z_op_cnt) { 720*fa9e4066Sahrens delay(1); 721*fa9e4066Sahrens } 722*fa9e4066Sahrens 723*fa9e4066Sahrens zfs_objset_close(zfsvfs); 724*fa9e4066Sahrens 725*fa9e4066Sahrens return (0); 726*fa9e4066Sahrens } 727*fa9e4066Sahrens 728*fa9e4066Sahrens zfs_zcache_flush(zfsvfs); 729*fa9e4066Sahrens 730*fa9e4066Sahrens /* 731*fa9e4066Sahrens * Stop all delete threads. 732*fa9e4066Sahrens */ 733*fa9e4066Sahrens (void) zfs_delete_thread_target(zfsvfs, 0); 734*fa9e4066Sahrens 735*fa9e4066Sahrens /* 736*fa9e4066Sahrens * Check the number of active vnodes in the file system. 737*fa9e4066Sahrens * Our count is maintained in the vfs structure, but the number 738*fa9e4066Sahrens * is off by 1 to indicate a hold on the vfs structure itself. 739*fa9e4066Sahrens * 740*fa9e4066Sahrens * The '.zfs' directory maintains a reference of its own, and any active 741*fa9e4066Sahrens * references underneath are reflected in the vnode count. 742*fa9e4066Sahrens */ 743*fa9e4066Sahrens if (zfsvfs->z_ctldir == NULL) { 744*fa9e4066Sahrens if (vfsp->vfs_count > 1) { 745*fa9e4066Sahrens if ((zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) == 0) 746*fa9e4066Sahrens (void) zfs_delete_thread_target(zfsvfs, 1); 747*fa9e4066Sahrens return (EBUSY); 748*fa9e4066Sahrens } 749*fa9e4066Sahrens } else { 750*fa9e4066Sahrens if (vfsp->vfs_count > 2 || 751*fa9e4066Sahrens (zfsvfs->z_ctldir->v_count > 1 && !(fflag & MS_FORCE))) { 752*fa9e4066Sahrens if ((zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) == 0) 753*fa9e4066Sahrens (void) zfs_delete_thread_target(zfsvfs, 1); 754*fa9e4066Sahrens return (EBUSY); 755*fa9e4066Sahrens } 756*fa9e4066Sahrens } 757*fa9e4066Sahrens 758*fa9e4066Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 759*fa9e4066Sahrens zfs_objset_close(zfsvfs); 760*fa9e4066Sahrens 761*fa9e4066Sahrens /* 762*fa9e4066Sahrens * We can now safely destroy the '.zfs' directory node, which will 763*fa9e4066Sahrens * release its hold on the vfs_t. 764*fa9e4066Sahrens */ 765*fa9e4066Sahrens if (zfsvfs->z_ctldir != NULL) 766*fa9e4066Sahrens zfsctl_destroy(zfsvfs); 767*fa9e4066Sahrens 768*fa9e4066Sahrens return (0); 769*fa9e4066Sahrens } 770*fa9e4066Sahrens 771*fa9e4066Sahrens static int 772*fa9e4066Sahrens zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 773*fa9e4066Sahrens { 774*fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 775*fa9e4066Sahrens znode_t *zp; 776*fa9e4066Sahrens uint64_t object = 0; 777*fa9e4066Sahrens uint64_t fid_gen = 0; 778*fa9e4066Sahrens uint64_t gen_mask; 779*fa9e4066Sahrens uint64_t zp_gen; 780*fa9e4066Sahrens int i, err; 781*fa9e4066Sahrens 782*fa9e4066Sahrens *vpp = NULL; 783*fa9e4066Sahrens 784*fa9e4066Sahrens ZFS_ENTER(zfsvfs); 785*fa9e4066Sahrens 786*fa9e4066Sahrens if (fidp->fid_len == LONG_FID_LEN) { 787*fa9e4066Sahrens zfid_long_t *zlfid = (zfid_long_t *)fidp; 788*fa9e4066Sahrens uint64_t objsetid = 0; 789*fa9e4066Sahrens uint64_t setgen = 0; 790*fa9e4066Sahrens 791*fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 792*fa9e4066Sahrens objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 793*fa9e4066Sahrens 794*fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 795*fa9e4066Sahrens setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 796*fa9e4066Sahrens 797*fa9e4066Sahrens ZFS_EXIT(zfsvfs); 798*fa9e4066Sahrens 799*fa9e4066Sahrens err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 800*fa9e4066Sahrens if (err) 801*fa9e4066Sahrens return (EINVAL); 802*fa9e4066Sahrens ZFS_ENTER(zfsvfs); 803*fa9e4066Sahrens } 804*fa9e4066Sahrens 805*fa9e4066Sahrens if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 806*fa9e4066Sahrens zfid_short_t *zfid = (zfid_short_t *)fidp; 807*fa9e4066Sahrens 808*fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 809*fa9e4066Sahrens object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 810*fa9e4066Sahrens 811*fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 812*fa9e4066Sahrens fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 813*fa9e4066Sahrens } else { 814*fa9e4066Sahrens ZFS_EXIT(zfsvfs); 815*fa9e4066Sahrens return (EINVAL); 816*fa9e4066Sahrens } 817*fa9e4066Sahrens 818*fa9e4066Sahrens /* A zero fid_gen means we are in the .zfs control directories */ 819*fa9e4066Sahrens if (fid_gen == 0 && 820*fa9e4066Sahrens (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 821*fa9e4066Sahrens *vpp = zfsvfs->z_ctldir; 822*fa9e4066Sahrens ASSERT(*vpp != NULL); 823*fa9e4066Sahrens if (object == ZFSCTL_INO_SNAPDIR) { 824*fa9e4066Sahrens VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 825*fa9e4066Sahrens 0, NULL, NULL) == 0); 826*fa9e4066Sahrens } else { 827*fa9e4066Sahrens VN_HOLD(*vpp); 828*fa9e4066Sahrens } 829*fa9e4066Sahrens ZFS_EXIT(zfsvfs); 830*fa9e4066Sahrens return (0); 831*fa9e4066Sahrens } 832*fa9e4066Sahrens 833*fa9e4066Sahrens gen_mask = -1ULL >> (64 - 8 * i); 834*fa9e4066Sahrens 835*fa9e4066Sahrens dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 836*fa9e4066Sahrens if (err = zfs_zget(zfsvfs, object, &zp)) { 837*fa9e4066Sahrens ZFS_EXIT(zfsvfs); 838*fa9e4066Sahrens return (err); 839*fa9e4066Sahrens } 840*fa9e4066Sahrens zp_gen = zp->z_phys->zp_gen & gen_mask; 841*fa9e4066Sahrens if (zp_gen == 0) 842*fa9e4066Sahrens zp_gen = 1; 843*fa9e4066Sahrens if (zp->z_reap || zp_gen != fid_gen) { 844*fa9e4066Sahrens dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 845*fa9e4066Sahrens VN_RELE(ZTOV(zp)); 846*fa9e4066Sahrens ZFS_EXIT(zfsvfs); 847*fa9e4066Sahrens return (EINVAL); 848*fa9e4066Sahrens } 849*fa9e4066Sahrens 850*fa9e4066Sahrens *vpp = ZTOV(zp); 851*fa9e4066Sahrens ZFS_EXIT(zfsvfs); 852*fa9e4066Sahrens return (0); 853*fa9e4066Sahrens } 854*fa9e4066Sahrens 855*fa9e4066Sahrens static void 856*fa9e4066Sahrens zfs_objset_close(zfsvfs_t *zfsvfs) 857*fa9e4066Sahrens { 858*fa9e4066Sahrens zfs_delete_t *zd = &zfsvfs->z_delete_head; 859*fa9e4066Sahrens znode_t *zp, *nextzp; 860*fa9e4066Sahrens objset_t *os = zfsvfs->z_os; 861*fa9e4066Sahrens struct dsl_dataset *ds; 862*fa9e4066Sahrens 863*fa9e4066Sahrens /* 864*fa9e4066Sahrens * Stop all delete threads. 865*fa9e4066Sahrens */ 866*fa9e4066Sahrens (void) zfs_delete_thread_target(zfsvfs, 0); 867*fa9e4066Sahrens 868*fa9e4066Sahrens /* 869*fa9e4066Sahrens * For forced unmount, at this point all vops except zfs_inactive 870*fa9e4066Sahrens * are erroring EIO. We need to now suspend zfs_inactive threads 871*fa9e4066Sahrens * while we are freeing dbufs before switching zfs_inactive 872*fa9e4066Sahrens * to use behaviour without a objset. 873*fa9e4066Sahrens */ 874*fa9e4066Sahrens rw_enter(&zfsvfs->z_um_lock, RW_WRITER); 875*fa9e4066Sahrens 876*fa9e4066Sahrens zfs_zcache_flush(zfsvfs); 877*fa9e4066Sahrens 878*fa9e4066Sahrens /* 879*fa9e4066Sahrens * Release all delete in progress znodes 880*fa9e4066Sahrens * They will be processed when the file system remounts. 881*fa9e4066Sahrens */ 882*fa9e4066Sahrens mutex_enter(&zd->z_mutex); 883*fa9e4066Sahrens while (zp = list_head(&zd->z_znodes)) { 884*fa9e4066Sahrens list_remove(&zd->z_znodes, zp); 885*fa9e4066Sahrens zp->z_dbuf_held = 0; 886*fa9e4066Sahrens dmu_buf_rele(zp->z_dbuf); 887*fa9e4066Sahrens } 888*fa9e4066Sahrens mutex_exit(&zd->z_mutex); 889*fa9e4066Sahrens 890*fa9e4066Sahrens /* 891*fa9e4066Sahrens * Release all holds on dbufs 892*fa9e4066Sahrens * Note, although we have stopped all other vop threads and 893*fa9e4066Sahrens * zfs_inactive(), the dmu can callback via znode_pageout_func() 894*fa9e4066Sahrens * which can zfs_znode_free() the znode. 895*fa9e4066Sahrens * So we lock z_all_znodes; search the list for a held 896*fa9e4066Sahrens * dbuf; drop the lock (we know zp can't disappear if we hold 897*fa9e4066Sahrens * a dbuf lock; then regrab the lock and restart. 898*fa9e4066Sahrens */ 899*fa9e4066Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 900*fa9e4066Sahrens for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { 901*fa9e4066Sahrens nextzp = list_next(&zfsvfs->z_all_znodes, zp); 902*fa9e4066Sahrens if (zp->z_dbuf_held) { 903*fa9e4066Sahrens /* dbufs should only be held when force unmounting */ 904*fa9e4066Sahrens zp->z_dbuf_held = 0; 905*fa9e4066Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 906*fa9e4066Sahrens dmu_buf_rele(zp->z_dbuf); 907*fa9e4066Sahrens /* Start again */ 908*fa9e4066Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 909*fa9e4066Sahrens nextzp = list_head(&zfsvfs->z_all_znodes); 910*fa9e4066Sahrens } 911*fa9e4066Sahrens } 912*fa9e4066Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 913*fa9e4066Sahrens 914*fa9e4066Sahrens /* 915*fa9e4066Sahrens * Unregister properties. 916*fa9e4066Sahrens */ 917*fa9e4066Sahrens if (!dmu_objset_is_snapshot(os)) { 918*fa9e4066Sahrens ds = dmu_objset_ds(os); 919*fa9e4066Sahrens 920*fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 921*fa9e4066Sahrens zfsvfs) == 0); 922*fa9e4066Sahrens 923*fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 924*fa9e4066Sahrens zfsvfs) == 0); 925*fa9e4066Sahrens 926*fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 927*fa9e4066Sahrens zfsvfs) == 0); 928*fa9e4066Sahrens 929*fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, 930*fa9e4066Sahrens zfsvfs) == 0); 931*fa9e4066Sahrens 932*fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 933*fa9e4066Sahrens zfsvfs) == 0); 934*fa9e4066Sahrens 935*fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 936*fa9e4066Sahrens zfsvfs) == 0); 937*fa9e4066Sahrens 938*fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 939*fa9e4066Sahrens zfsvfs) == 0); 940*fa9e4066Sahrens 941*fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 942*fa9e4066Sahrens zfsvfs) == 0); 943*fa9e4066Sahrens 944*fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "aclinherit", 945*fa9e4066Sahrens acl_inherit_changed_cb, zfsvfs) == 0); 946*fa9e4066Sahrens } 947*fa9e4066Sahrens 948*fa9e4066Sahrens /* 949*fa9e4066Sahrens * Make the dmu drop all it dbuf holds so that zfs_inactive 950*fa9e4066Sahrens * can then safely free znode/vnodes. 951*fa9e4066Sahrens */ 952*fa9e4066Sahrens txg_wait_synced(dmu_objset_pool(os), 0); 953*fa9e4066Sahrens 954*fa9e4066Sahrens /* 955*fa9e4066Sahrens * Switch zfs_inactive to behaviour without an objset. 956*fa9e4066Sahrens * It just tosses cached pages and frees the znode & vnode. 957*fa9e4066Sahrens * Then re-enable zfs_inactive threads in that new behaviour. 958*fa9e4066Sahrens */ 959*fa9e4066Sahrens zfsvfs->z_unmounted2 = B_TRUE; 960*fa9e4066Sahrens rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */ 961*fa9e4066Sahrens 962*fa9e4066Sahrens /* 963*fa9e4066Sahrens * Close the zil. Can't close the zil while zfs_inactive 964*fa9e4066Sahrens * threads are blocked as zil_close can call zfs_inactive. 965*fa9e4066Sahrens */ 966*fa9e4066Sahrens if (zfsvfs->z_log) { 967*fa9e4066Sahrens zil_close(zfsvfs->z_log); 968*fa9e4066Sahrens zfsvfs->z_log = NULL; 969*fa9e4066Sahrens } 970*fa9e4066Sahrens 971*fa9e4066Sahrens /* 972*fa9e4066Sahrens * Finally close the objset 973*fa9e4066Sahrens */ 974*fa9e4066Sahrens dmu_objset_close(os); 975*fa9e4066Sahrens 976*fa9e4066Sahrens } 977*fa9e4066Sahrens 978*fa9e4066Sahrens static void 979*fa9e4066Sahrens zfs_freevfs(vfs_t *vfsp) 980*fa9e4066Sahrens { 981*fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 982*fa9e4066Sahrens 983*fa9e4066Sahrens kmem_free(zfsvfs, sizeof (zfsvfs_t)); 984*fa9e4066Sahrens 985*fa9e4066Sahrens atomic_add_32(&zfs_active_fs_count, -1); 986*fa9e4066Sahrens } 987*fa9e4066Sahrens 988*fa9e4066Sahrens /* 989*fa9e4066Sahrens * VFS_INIT() initialization. Note that there is no VFS_FINI(), 990*fa9e4066Sahrens * so we can't safely do any non-idempotent initialization here. 991*fa9e4066Sahrens * Leave that to zfs_init() and zfs_fini(), which are called 992*fa9e4066Sahrens * from the module's _init() and _fini() entry points. 993*fa9e4066Sahrens */ 994*fa9e4066Sahrens /*ARGSUSED*/ 995*fa9e4066Sahrens static int 996*fa9e4066Sahrens zfs_vfsinit(int fstype, char *name) 997*fa9e4066Sahrens { 998*fa9e4066Sahrens int error; 999*fa9e4066Sahrens 1000*fa9e4066Sahrens zfsfstype = fstype; 1001*fa9e4066Sahrens 1002*fa9e4066Sahrens /* 1003*fa9e4066Sahrens * Setup vfsops and vnodeops tables. 1004*fa9e4066Sahrens */ 1005*fa9e4066Sahrens error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); 1006*fa9e4066Sahrens if (error != 0) { 1007*fa9e4066Sahrens cmn_err(CE_WARN, "zfs: bad vfs ops template"); 1008*fa9e4066Sahrens } 1009*fa9e4066Sahrens 1010*fa9e4066Sahrens error = zfs_create_op_tables(); 1011*fa9e4066Sahrens if (error) { 1012*fa9e4066Sahrens zfs_remove_op_tables(); 1013*fa9e4066Sahrens cmn_err(CE_WARN, "zfs: bad vnode ops template"); 1014*fa9e4066Sahrens (void) vfs_freevfsops_by_type(zfsfstype); 1015*fa9e4066Sahrens return (error); 1016*fa9e4066Sahrens } 1017*fa9e4066Sahrens 1018*fa9e4066Sahrens mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); 1019*fa9e4066Sahrens 1020*fa9e4066Sahrens /* 1021*fa9e4066Sahrens * unique major number for all zfs mounts 1022*fa9e4066Sahrens */ 1023*fa9e4066Sahrens if ((zfs_major = getudev()) == (major_t)-1) { 1024*fa9e4066Sahrens cmn_err(CE_WARN, 1025*fa9e4066Sahrens "zfs_vfsinit: Can't get unique device number."); 1026*fa9e4066Sahrens zfs_remove_op_tables(); 1027*fa9e4066Sahrens (void) vfs_freevfsops_by_type(zfsfstype); 1028*fa9e4066Sahrens return (error); 1029*fa9e4066Sahrens } 1030*fa9e4066Sahrens zfs_minor = 0; 1031*fa9e4066Sahrens 1032*fa9e4066Sahrens return (0); 1033*fa9e4066Sahrens } 1034*fa9e4066Sahrens 1035*fa9e4066Sahrens void 1036*fa9e4066Sahrens zfs_init(void) 1037*fa9e4066Sahrens { 1038*fa9e4066Sahrens /* 1039*fa9e4066Sahrens * Initialize .zfs directory structures 1040*fa9e4066Sahrens */ 1041*fa9e4066Sahrens zfsctl_init(); 1042*fa9e4066Sahrens 1043*fa9e4066Sahrens /* 1044*fa9e4066Sahrens * Initialize znode cache, vnode ops, etc... 1045*fa9e4066Sahrens */ 1046*fa9e4066Sahrens zfs_znode_init(); 1047*fa9e4066Sahrens } 1048*fa9e4066Sahrens 1049*fa9e4066Sahrens void 1050*fa9e4066Sahrens zfs_fini(void) 1051*fa9e4066Sahrens { 1052*fa9e4066Sahrens zfsctl_fini(); 1053*fa9e4066Sahrens zfs_znode_fini(); 1054*fa9e4066Sahrens } 1055*fa9e4066Sahrens 1056*fa9e4066Sahrens int 1057*fa9e4066Sahrens zfs_busy(void) 1058*fa9e4066Sahrens { 1059*fa9e4066Sahrens return (zfs_active_fs_count != 0); 1060*fa9e4066Sahrens } 1061*fa9e4066Sahrens 1062*fa9e4066Sahrens static vfsdef_t vfw = { 1063*fa9e4066Sahrens VFSDEF_VERSION, 1064*fa9e4066Sahrens MNTTYPE_ZFS, 1065*fa9e4066Sahrens zfs_vfsinit, 1066*fa9e4066Sahrens VSW_HASPROTO | VSW_CANRWRO | VSW_CANREMOUNT | VSW_VOLATILEDEV, 1067*fa9e4066Sahrens &zfs_mntopts 1068*fa9e4066Sahrens }; 1069*fa9e4066Sahrens 1070*fa9e4066Sahrens struct modlfs zfs_modlfs = { 1071*fa9e4066Sahrens &mod_fsops, "ZFS filesystem version 1", &vfw 1072*fa9e4066Sahrens }; 1073