1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5*033f9833Sek * Common Development and Distribution License (the "License"). 6*033f9833Sek * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 228afd4dd6Sperrin * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens #include <sys/types.h> 29fa9e4066Sahrens #include <sys/param.h> 30fa9e4066Sahrens #include <sys/systm.h> 31fa9e4066Sahrens #include <sys/sysmacros.h> 32fa9e4066Sahrens #include <sys/kmem.h> 33fa9e4066Sahrens #include <sys/pathname.h> 34fa9e4066Sahrens #include <sys/acl.h> 35fa9e4066Sahrens #include <sys/vnode.h> 36fa9e4066Sahrens #include <sys/vfs.h> 37fa9e4066Sahrens #include <sys/mntent.h> 38fa9e4066Sahrens #include <sys/mount.h> 39fa9e4066Sahrens #include <sys/cmn_err.h> 40fa9e4066Sahrens #include "fs/fs_subr.h" 41fa9e4066Sahrens #include <sys/zfs_znode.h> 42fa9e4066Sahrens #include <sys/zil.h> 43fa9e4066Sahrens #include <sys/fs/zfs.h> 44fa9e4066Sahrens #include <sys/dmu.h> 45fa9e4066Sahrens #include <sys/dsl_prop.h> 46fa9e4066Sahrens #include <sys/spa.h> 47fa9e4066Sahrens #include <sys/zap.h> 48fa9e4066Sahrens #include <sys/varargs.h> 49fa9e4066Sahrens #include <sys/policy.h> 50fa9e4066Sahrens #include <sys/atomic.h> 51fa9e4066Sahrens #include <sys/mkdev.h> 52fa9e4066Sahrens #include <sys/modctl.h> 53fa9e4066Sahrens #include <sys/zfs_ioctl.h> 54fa9e4066Sahrens #include <sys/zfs_ctldir.h> 55a0965f35Sbonwick #include <sys/sunddi.h> 56*033f9833Sek #include <sys/dnlc.h> 57fa9e4066Sahrens 58fa9e4066Sahrens int zfsfstype; 59fa9e4066Sahrens vfsops_t *zfs_vfsops = NULL; 60a0965f35Sbonwick static major_t zfs_major; 61fa9e4066Sahrens static minor_t zfs_minor; 62fa9e4066Sahrens static kmutex_t zfs_dev_mtx; 63fa9e4066Sahrens 64fa9e4066Sahrens static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); 65fa9e4066Sahrens static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); 66fa9e4066Sahrens static int zfs_root(vfs_t *vfsp, vnode_t **vpp); 67fa9e4066Sahrens static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); 68fa9e4066Sahrens static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); 69fa9e4066Sahrens static void zfs_freevfs(vfs_t *vfsp); 70fa9e4066Sahrens static void zfs_objset_close(zfsvfs_t *zfsvfs); 71fa9e4066Sahrens 72fa9e4066Sahrens static const fs_operation_def_t zfs_vfsops_template[] = { 73fa9e4066Sahrens VFSNAME_MOUNT, zfs_mount, 74fa9e4066Sahrens VFSNAME_UNMOUNT, zfs_umount, 75fa9e4066Sahrens VFSNAME_ROOT, zfs_root, 76fa9e4066Sahrens VFSNAME_STATVFS, zfs_statvfs, 77fa9e4066Sahrens VFSNAME_SYNC, (fs_generic_func_p) zfs_sync, 78fa9e4066Sahrens VFSNAME_VGET, zfs_vget, 79fa9e4066Sahrens VFSNAME_FREEVFS, (fs_generic_func_p) zfs_freevfs, 80fa9e4066Sahrens NULL, NULL 81fa9e4066Sahrens }; 82fa9e4066Sahrens 83fa9e4066Sahrens static const fs_operation_def_t zfs_vfsops_eio_template[] = { 84fa9e4066Sahrens VFSNAME_FREEVFS, (fs_generic_func_p) zfs_freevfs, 85fa9e4066Sahrens NULL, NULL 86fa9e4066Sahrens }; 87fa9e4066Sahrens 88fa9e4066Sahrens /* 89fa9e4066Sahrens * We need to keep a count of active fs's. 90fa9e4066Sahrens * This is necessary to prevent our module 91fa9e4066Sahrens * from being unloaded after a umount -f 92fa9e4066Sahrens */ 93fa9e4066Sahrens static uint32_t zfs_active_fs_count = 0; 94fa9e4066Sahrens 95fa9e4066Sahrens static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; 96fa9e4066Sahrens static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; 97fa9e4066Sahrens 98fa9e4066Sahrens static mntopt_t mntopts[] = { 99fa9e4066Sahrens { MNTOPT_XATTR, NULL, NULL, MO_NODISPLAY|MO_DEFAULT, NULL }, 100fa9e4066Sahrens { MNTOPT_NOATIME, noatime_cancel, NULL, MO_DEFAULT, NULL }, 101fa9e4066Sahrens { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } 102fa9e4066Sahrens }; 103fa9e4066Sahrens 104fa9e4066Sahrens static mntopts_t zfs_mntopts = { 105fa9e4066Sahrens sizeof (mntopts) / sizeof (mntopt_t), 106fa9e4066Sahrens mntopts 107fa9e4066Sahrens }; 108fa9e4066Sahrens 109fa9e4066Sahrens /*ARGSUSED*/ 110fa9e4066Sahrens int 111fa9e4066Sahrens zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 112fa9e4066Sahrens { 113fa9e4066Sahrens /* 114fa9e4066Sahrens * Data integrity is job one. We don't want a compromised kernel 115fa9e4066Sahrens * writing to the storage pool, so we never sync during panic. 116fa9e4066Sahrens */ 117fa9e4066Sahrens if (panicstr) 118fa9e4066Sahrens return (0); 119fa9e4066Sahrens 120fa9e4066Sahrens /* 121fa9e4066Sahrens * SYNC_ATTR is used by fsflush() to force old filesystems like UFS 122fa9e4066Sahrens * to sync metadata, which they would otherwise cache indefinitely. 123fa9e4066Sahrens * Semantically, the only requirement is that the sync be initiated. 124fa9e4066Sahrens * The DMU syncs out txgs frequently, so there's nothing to do. 125fa9e4066Sahrens */ 126fa9e4066Sahrens if (flag & SYNC_ATTR) 127fa9e4066Sahrens return (0); 128fa9e4066Sahrens 129fa9e4066Sahrens if (vfsp != NULL) { 130fa9e4066Sahrens /* 131fa9e4066Sahrens * Sync a specific filesystem. 132fa9e4066Sahrens */ 133fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 134fa9e4066Sahrens 135fa9e4066Sahrens ZFS_ENTER(zfsvfs); 136fa9e4066Sahrens if (zfsvfs->z_log != NULL) 137fa9e4066Sahrens zil_commit(zfsvfs->z_log, UINT64_MAX, FSYNC); 138fa9e4066Sahrens else 139fa9e4066Sahrens txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 140fa9e4066Sahrens ZFS_EXIT(zfsvfs); 141fa9e4066Sahrens } else { 142fa9e4066Sahrens /* 143fa9e4066Sahrens * Sync all ZFS filesystems. This is what happens when you 144fa9e4066Sahrens * run sync(1M). Unlike other filesystems, ZFS honors the 145fa9e4066Sahrens * request by waiting for all pools to commit all dirty data. 146fa9e4066Sahrens */ 147fa9e4066Sahrens spa_sync_allpools(); 148fa9e4066Sahrens } 149fa9e4066Sahrens 150fa9e4066Sahrens return (0); 151fa9e4066Sahrens } 152fa9e4066Sahrens 153fa9e4066Sahrens static void 154fa9e4066Sahrens atime_changed_cb(void *arg, uint64_t newval) 155fa9e4066Sahrens { 156fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 157fa9e4066Sahrens 158fa9e4066Sahrens if (newval == TRUE) { 159fa9e4066Sahrens zfsvfs->z_atime = TRUE; 160fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 161fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 162fa9e4066Sahrens } else { 163fa9e4066Sahrens zfsvfs->z_atime = FALSE; 164fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 165fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 166fa9e4066Sahrens } 167fa9e4066Sahrens } 168fa9e4066Sahrens 169fa9e4066Sahrens static void 170fa9e4066Sahrens blksz_changed_cb(void *arg, uint64_t newval) 171fa9e4066Sahrens { 172fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 173fa9e4066Sahrens 174fa9e4066Sahrens if (newval < SPA_MINBLOCKSIZE || 175fa9e4066Sahrens newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 176fa9e4066Sahrens newval = SPA_MAXBLOCKSIZE; 177fa9e4066Sahrens 178fa9e4066Sahrens zfsvfs->z_max_blksz = newval; 179fa9e4066Sahrens zfsvfs->z_vfs->vfs_bsize = newval; 180fa9e4066Sahrens } 181fa9e4066Sahrens 182fa9e4066Sahrens static void 183fa9e4066Sahrens readonly_changed_cb(void *arg, uint64_t newval) 184fa9e4066Sahrens { 185fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 186fa9e4066Sahrens 187fa9e4066Sahrens if (newval) { 188fa9e4066Sahrens /* XXX locking on vfs_flag? */ 189fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 190fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 191fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 192fa9e4066Sahrens (void) zfs_delete_thread_target(zfsvfs, 0); 193fa9e4066Sahrens } else { 194fa9e4066Sahrens /* XXX locking on vfs_flag? */ 195fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 196fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 197fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 198fa9e4066Sahrens (void) zfs_delete_thread_target(zfsvfs, 1); 199fa9e4066Sahrens } 200fa9e4066Sahrens } 201fa9e4066Sahrens 202fa9e4066Sahrens static void 203fa9e4066Sahrens devices_changed_cb(void *arg, uint64_t newval) 204fa9e4066Sahrens { 205fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 206fa9e4066Sahrens 207fa9e4066Sahrens if (newval == FALSE) { 208fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; 209fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); 210fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); 211fa9e4066Sahrens } else { 212fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; 213fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); 214fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); 215fa9e4066Sahrens } 216fa9e4066Sahrens } 217fa9e4066Sahrens 218fa9e4066Sahrens static void 219fa9e4066Sahrens setuid_changed_cb(void *arg, uint64_t newval) 220fa9e4066Sahrens { 221fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 222fa9e4066Sahrens 223fa9e4066Sahrens if (newval == FALSE) { 224fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 225fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 226fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 227fa9e4066Sahrens } else { 228fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 229fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 230fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 231fa9e4066Sahrens } 232fa9e4066Sahrens } 233fa9e4066Sahrens 234fa9e4066Sahrens static void 235fa9e4066Sahrens exec_changed_cb(void *arg, uint64_t newval) 236fa9e4066Sahrens { 237fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 238fa9e4066Sahrens 239fa9e4066Sahrens if (newval == FALSE) { 240fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 241fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 242fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 243fa9e4066Sahrens } else { 244fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 245fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 246fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 247fa9e4066Sahrens } 248fa9e4066Sahrens } 249fa9e4066Sahrens 250fa9e4066Sahrens static void 251fa9e4066Sahrens snapdir_changed_cb(void *arg, uint64_t newval) 252fa9e4066Sahrens { 253fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 254fa9e4066Sahrens 255fa9e4066Sahrens zfsvfs->z_show_ctldir = newval; 256fa9e4066Sahrens } 257fa9e4066Sahrens 258fa9e4066Sahrens static void 259fa9e4066Sahrens acl_mode_changed_cb(void *arg, uint64_t newval) 260fa9e4066Sahrens { 261fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 262fa9e4066Sahrens 263fa9e4066Sahrens zfsvfs->z_acl_mode = newval; 264fa9e4066Sahrens } 265fa9e4066Sahrens 266fa9e4066Sahrens static void 267fa9e4066Sahrens acl_inherit_changed_cb(void *arg, uint64_t newval) 268fa9e4066Sahrens { 269fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 270fa9e4066Sahrens 271fa9e4066Sahrens zfsvfs->z_acl_inherit = newval; 272fa9e4066Sahrens } 273fa9e4066Sahrens 274fa9e4066Sahrens /*ARGSUSED*/ 275fa9e4066Sahrens static int 276fa9e4066Sahrens zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 277fa9e4066Sahrens { 278fa9e4066Sahrens zfsvfs_t *zfsvfs = NULL; 279fa9e4066Sahrens znode_t *zp = NULL; 280fa9e4066Sahrens vnode_t *vp = NULL; 281fa9e4066Sahrens objset_t *os = NULL; 282fa9e4066Sahrens struct dsl_dataset *ds; 283fa9e4066Sahrens char *osname; 284fa9e4066Sahrens uint64_t readonly, recordsize; 285fa9e4066Sahrens pathname_t spn; 286fa9e4066Sahrens dev_t mount_dev; 287fa9e4066Sahrens major_t new_major; 288fa9e4066Sahrens int mode; 289fa9e4066Sahrens int error = 0; 290fa9e4066Sahrens uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? 291fa9e4066Sahrens UIO_SYSSPACE : UIO_USERSPACE; 292fa9e4066Sahrens int canwrite; 293fa9e4066Sahrens 294fa9e4066Sahrens if (mvp->v_type != VDIR) 295fa9e4066Sahrens return (ENOTDIR); 296fa9e4066Sahrens 297fa9e4066Sahrens mutex_enter(&mvp->v_lock); 298fa9e4066Sahrens if ((uap->flags & MS_REMOUNT) == 0 && 299fa9e4066Sahrens (uap->flags & MS_OVERLAY) == 0 && 300fa9e4066Sahrens (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 301fa9e4066Sahrens mutex_exit(&mvp->v_lock); 302fa9e4066Sahrens return (EBUSY); 303fa9e4066Sahrens } 304fa9e4066Sahrens mutex_exit(&mvp->v_lock); 305fa9e4066Sahrens 306fa9e4066Sahrens /* 307fa9e4066Sahrens * ZFS does not support passing unparsed data in via MS_DATA. 308fa9e4066Sahrens * Users should use the MS_OPTIONSTR interface; this means 309fa9e4066Sahrens * that all option parsing is already done and the options struct 310fa9e4066Sahrens * can be interrogated. 311fa9e4066Sahrens */ 312fa9e4066Sahrens if ((uap->flags & MS_DATA) && uap->datalen > 0) 313fa9e4066Sahrens return (EINVAL); 314fa9e4066Sahrens 315fa9e4066Sahrens /* 316fa9e4066Sahrens * When doing a remount, we simply refresh our temporary properties 317fa9e4066Sahrens * according to those options set in the current VFS options. 318fa9e4066Sahrens */ 319fa9e4066Sahrens if (uap->flags & MS_REMOUNT) { 320fa9e4066Sahrens zfsvfs = vfsp->vfs_data; 321fa9e4066Sahrens 322fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 323fa9e4066Sahrens readonly_changed_cb(zfsvfs, B_TRUE); 324fa9e4066Sahrens else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 325fa9e4066Sahrens if (dmu_objset_is_snapshot(zfsvfs->z_os)) 326fa9e4066Sahrens return (EROFS); 327fa9e4066Sahrens readonly_changed_cb(zfsvfs, B_FALSE); 328fa9e4066Sahrens } 329fa9e4066Sahrens 330fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 331fa9e4066Sahrens devices_changed_cb(zfsvfs, B_FALSE); 332fa9e4066Sahrens setuid_changed_cb(zfsvfs, B_FALSE); 333fa9e4066Sahrens } else { 334fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 335fa9e4066Sahrens devices_changed_cb(zfsvfs, B_FALSE); 336fa9e4066Sahrens else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) 337fa9e4066Sahrens devices_changed_cb(zfsvfs, B_TRUE); 338fa9e4066Sahrens 339fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 340fa9e4066Sahrens setuid_changed_cb(zfsvfs, B_FALSE); 341fa9e4066Sahrens else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 342fa9e4066Sahrens setuid_changed_cb(zfsvfs, B_TRUE); 343fa9e4066Sahrens } 344fa9e4066Sahrens 345fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 346fa9e4066Sahrens exec_changed_cb(zfsvfs, B_FALSE); 347fa9e4066Sahrens else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 348fa9e4066Sahrens exec_changed_cb(zfsvfs, B_TRUE); 349fa9e4066Sahrens 350fa9e4066Sahrens return (0); 351fa9e4066Sahrens } 352fa9e4066Sahrens 353fa9e4066Sahrens /* 354fa9e4066Sahrens * Get the objset name (the "special" mount argument). 355fa9e4066Sahrens */ 356fa9e4066Sahrens if (error = pn_get(uap->spec, fromspace, &spn)) 357fa9e4066Sahrens return (error); 358fa9e4066Sahrens 359fa9e4066Sahrens osname = spn.pn_path; 360fa9e4066Sahrens 361fa9e4066Sahrens if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) 362fa9e4066Sahrens goto out; 363fa9e4066Sahrens 364fa9e4066Sahrens /* 365fa9e4066Sahrens * Refuse to mount a filesystem if we are in a local zone and the 366fa9e4066Sahrens * dataset is not visible. 367fa9e4066Sahrens */ 368fa9e4066Sahrens if (!INGLOBALZONE(curproc) && 369fa9e4066Sahrens (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 370fa9e4066Sahrens error = EPERM; 371fa9e4066Sahrens goto out; 372fa9e4066Sahrens } 373fa9e4066Sahrens 374fa9e4066Sahrens /* 375fa9e4066Sahrens * Initialize the zfs-specific filesystem structure. 376fa9e4066Sahrens * Should probably make this a kmem cache, shuffle fields, 377fa9e4066Sahrens * and just bzero upto z_hold_mtx[]. 378fa9e4066Sahrens */ 379fa9e4066Sahrens zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 380fa9e4066Sahrens zfsvfs->z_vfs = vfsp; 381fa9e4066Sahrens zfsvfs->z_parent = zfsvfs; 382fa9e4066Sahrens zfsvfs->z_assign = TXG_NOWAIT; 383fa9e4066Sahrens zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 384a0965f35Sbonwick zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 385fa9e4066Sahrens 386fa9e4066Sahrens mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 387fa9e4066Sahrens list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 388fa9e4066Sahrens offsetof(znode_t, z_link_node)); 389fa9e4066Sahrens rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); 390fa9e4066Sahrens 391fa9e4066Sahrens /* 392fa9e4066Sahrens * Initialize the generic filesystem structure. 393fa9e4066Sahrens */ 394fa9e4066Sahrens vfsp->vfs_bcount = 0; 395fa9e4066Sahrens vfsp->vfs_data = NULL; 396fa9e4066Sahrens 397fa9e4066Sahrens /* 398fa9e4066Sahrens * Create a unique device for the mount. 399fa9e4066Sahrens */ 400fa9e4066Sahrens do { 401fa9e4066Sahrens ASSERT3U(zfs_minor, <=, MAXMIN32); 402a0965f35Sbonwick minor_t start = zfs_minor; 403fa9e4066Sahrens do { 404fa9e4066Sahrens mutex_enter(&zfs_dev_mtx); 405a0965f35Sbonwick if (zfs_minor >= MAXMIN32) { 406a0965f35Sbonwick /* 407a0965f35Sbonwick * If we're still using the real major number, 408a0965f35Sbonwick * keep out of /dev/zfs and /dev/zvol minor 409a0965f35Sbonwick * number space. If we're using a getudev()'ed 410a0965f35Sbonwick * major number, we can use all of its minors. 411a0965f35Sbonwick */ 412a0965f35Sbonwick if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) 413a0965f35Sbonwick zfs_minor = ZFS_MIN_MINOR; 414a0965f35Sbonwick else 415a0965f35Sbonwick zfs_minor = 0; 416a0965f35Sbonwick } else { 417a0965f35Sbonwick zfs_minor++; 418a0965f35Sbonwick } 419fa9e4066Sahrens mount_dev = makedevice(zfs_major, zfs_minor); 420fa9e4066Sahrens mutex_exit(&zfs_dev_mtx); 421fa9e4066Sahrens } while (vfs_devismounted(mount_dev) && zfs_minor != start); 422fa9e4066Sahrens if (zfs_minor == start) { 423fa9e4066Sahrens /* 424fa9e4066Sahrens * We are using all ~262,000 minor numbers 425fa9e4066Sahrens * for the current major number. Create a 426fa9e4066Sahrens * new major number. 427fa9e4066Sahrens */ 428fa9e4066Sahrens if ((new_major = getudev()) == (major_t)-1) { 429fa9e4066Sahrens cmn_err(CE_WARN, 430fa9e4066Sahrens "zfs_mount: Can't get unique" 431fa9e4066Sahrens " major device number."); 432fa9e4066Sahrens goto out; 433fa9e4066Sahrens } 434fa9e4066Sahrens mutex_enter(&zfs_dev_mtx); 435fa9e4066Sahrens zfs_major = new_major; 436fa9e4066Sahrens zfs_minor = 0; 437fa9e4066Sahrens mutex_exit(&zfs_dev_mtx); 438fa9e4066Sahrens } else { 439fa9e4066Sahrens break; 440fa9e4066Sahrens } 441fa9e4066Sahrens /* CONSTANTCONDITION */ 442fa9e4066Sahrens } while (1); 443fa9e4066Sahrens 444fa9e4066Sahrens ASSERT(vfs_devismounted(mount_dev) == 0); 445fa9e4066Sahrens 446fa9e4066Sahrens if (dsl_prop_get_integer(osname, "recordsize", &recordsize, NULL) != 0) 447fa9e4066Sahrens recordsize = SPA_MAXBLOCKSIZE; 448fa9e4066Sahrens 449fa9e4066Sahrens vfsp->vfs_dev = mount_dev; 450fa9e4066Sahrens vfsp->vfs_fstype = zfsfstype; 451fa9e4066Sahrens vfsp->vfs_bsize = recordsize; 452fa9e4066Sahrens vfsp->vfs_flag |= VFS_NOTRUNC; 453fa9e4066Sahrens vfsp->vfs_data = zfsvfs; 454fa9e4066Sahrens 455fa9e4066Sahrens error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL); 456fa9e4066Sahrens if (error) 457fa9e4066Sahrens goto out; 458fa9e4066Sahrens 459fa9e4066Sahrens if (readonly) 460fa9e4066Sahrens mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 461fa9e4066Sahrens else 462fa9e4066Sahrens mode = DS_MODE_PRIMARY; 463fa9e4066Sahrens 464fa9e4066Sahrens error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 465fa9e4066Sahrens if (error == EROFS) { 466fa9e4066Sahrens mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 467fa9e4066Sahrens error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 468fa9e4066Sahrens &zfsvfs->z_os); 469fa9e4066Sahrens } 470fa9e4066Sahrens os = zfsvfs->z_os; 471fa9e4066Sahrens 472fa9e4066Sahrens if (error) 473fa9e4066Sahrens goto out; 474fa9e4066Sahrens 475fa9e4066Sahrens if (error = zfs_init_fs(zfsvfs, &zp, cr)) 476fa9e4066Sahrens goto out; 477fa9e4066Sahrens 478fa9e4066Sahrens if (dmu_objset_is_snapshot(os)) { 479fa9e4066Sahrens ASSERT(mode & DS_MODE_READONLY); 480fa9e4066Sahrens atime_changed_cb(zfsvfs, B_FALSE); 481fa9e4066Sahrens readonly_changed_cb(zfsvfs, B_TRUE); 482fa9e4066Sahrens zfsvfs->z_issnap = B_TRUE; 483fa9e4066Sahrens } else { 484fa9e4066Sahrens int do_readonly = FALSE, readonly; 485fa9e4066Sahrens int do_setuid = FALSE, setuid; 486fa9e4066Sahrens int do_exec = FALSE, exec; 487fa9e4066Sahrens int do_devices = FALSE, devices; 488fa9e4066Sahrens 489fa9e4066Sahrens /* 490fa9e4066Sahrens * Start a delete thread running. 491fa9e4066Sahrens */ 492fa9e4066Sahrens (void) zfs_delete_thread_target(zfsvfs, 1); 493fa9e4066Sahrens 494fa9e4066Sahrens /* 495fa9e4066Sahrens * Parse and replay the intent log. 496fa9e4066Sahrens */ 497fa9e4066Sahrens zil_replay(os, zfsvfs, &zfsvfs->z_assign, zfs_replay_vector, 498fa9e4066Sahrens (void (*)(void *))zfs_delete_wait_empty); 499fa9e4066Sahrens 500fa9e4066Sahrens if (!zil_disable) 501fa9e4066Sahrens zfsvfs->z_log = zil_open(os, zfs_get_data); 502fa9e4066Sahrens 503fa9e4066Sahrens /* 504fa9e4066Sahrens * The act of registering our callbacks will destroy any mount 505fa9e4066Sahrens * options we may have. In order to enable temporary overrides 506fa9e4066Sahrens * of mount options, we stash away the current values and 507fa9e4066Sahrens * restore them after we register the callbacks. 508fa9e4066Sahrens */ 509fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 510fa9e4066Sahrens readonly = B_TRUE; 511fa9e4066Sahrens do_readonly = B_TRUE; 512fa9e4066Sahrens } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 513fa9e4066Sahrens readonly = B_FALSE; 514fa9e4066Sahrens do_readonly = B_TRUE; 515fa9e4066Sahrens } 516fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 517fa9e4066Sahrens devices = B_FALSE; 518fa9e4066Sahrens setuid = B_FALSE; 519fa9e4066Sahrens do_devices = B_TRUE; 520fa9e4066Sahrens do_setuid = B_TRUE; 521fa9e4066Sahrens } else { 522fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 523fa9e4066Sahrens devices = B_FALSE; 524fa9e4066Sahrens do_devices = B_TRUE; 525fa9e4066Sahrens } else if (vfs_optionisset(vfsp, 526fa9e4066Sahrens MNTOPT_DEVICES, NULL)) { 527fa9e4066Sahrens devices = B_TRUE; 528fa9e4066Sahrens do_devices = B_TRUE; 529fa9e4066Sahrens } 530fa9e4066Sahrens 531fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 532fa9e4066Sahrens setuid = B_FALSE; 533fa9e4066Sahrens do_setuid = B_TRUE; 534fa9e4066Sahrens } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 535fa9e4066Sahrens setuid = B_TRUE; 536fa9e4066Sahrens do_setuid = B_TRUE; 537fa9e4066Sahrens } 538fa9e4066Sahrens } 539fa9e4066Sahrens if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 540fa9e4066Sahrens exec = B_FALSE; 541fa9e4066Sahrens do_exec = B_TRUE; 542fa9e4066Sahrens } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 543fa9e4066Sahrens exec = B_TRUE; 544fa9e4066Sahrens do_exec = B_TRUE; 545fa9e4066Sahrens } 546fa9e4066Sahrens 547fa9e4066Sahrens /* 548fa9e4066Sahrens * Register property callbacks. 549fa9e4066Sahrens */ 550fa9e4066Sahrens ds = dmu_objset_ds(os); 551fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "atime", atime_changed_cb, 552fa9e4066Sahrens zfsvfs) == 0); 553fa9e4066Sahrens 554fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "recordsize", blksz_changed_cb, 555fa9e4066Sahrens zfsvfs) == 0); 556fa9e4066Sahrens 557fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "readonly", readonly_changed_cb, 558fa9e4066Sahrens zfsvfs) == 0); 559fa9e4066Sahrens 560fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "devices", devices_changed_cb, 561fa9e4066Sahrens zfsvfs) == 0); 562fa9e4066Sahrens 563fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "setuid", setuid_changed_cb, 564fa9e4066Sahrens zfsvfs) == 0); 565fa9e4066Sahrens 566fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "exec", exec_changed_cb, 567fa9e4066Sahrens zfsvfs) == 0); 568fa9e4066Sahrens 569fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "snapdir", snapdir_changed_cb, 570fa9e4066Sahrens zfsvfs) == 0); 571fa9e4066Sahrens 572fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "aclmode", acl_mode_changed_cb, 573fa9e4066Sahrens zfsvfs) == 0); 574fa9e4066Sahrens 575fa9e4066Sahrens VERIFY(dsl_prop_register(ds, "aclinherit", 576fa9e4066Sahrens acl_inherit_changed_cb, zfsvfs) == 0); 577fa9e4066Sahrens 578fa9e4066Sahrens 579fa9e4066Sahrens /* 580fa9e4066Sahrens * Invoke our callbacks to restore temporary mount options. 581fa9e4066Sahrens */ 582fa9e4066Sahrens if (do_readonly) 583fa9e4066Sahrens readonly_changed_cb(zfsvfs, readonly); 584fa9e4066Sahrens if (do_setuid) 585fa9e4066Sahrens setuid_changed_cb(zfsvfs, setuid); 586fa9e4066Sahrens if (do_exec) 587fa9e4066Sahrens exec_changed_cb(zfsvfs, exec); 588fa9e4066Sahrens if (do_devices) 589fa9e4066Sahrens devices_changed_cb(zfsvfs, devices); 590fa9e4066Sahrens } 591fa9e4066Sahrens 592fa9e4066Sahrens vp = ZTOV(zp); 593fa9e4066Sahrens if (!zfsvfs->z_issnap) 594fa9e4066Sahrens zfsctl_create(zfsvfs); 595fa9e4066Sahrens out: 596fa9e4066Sahrens if (error) { 597fa9e4066Sahrens if (zp) 598fa9e4066Sahrens VN_RELE(vp); 599fa9e4066Sahrens 600fa9e4066Sahrens if (zfsvfs) { 601fa9e4066Sahrens if (os) 602fa9e4066Sahrens dmu_objset_close(os); 603fa9e4066Sahrens kmem_free(zfsvfs, sizeof (zfsvfs_t)); 604fa9e4066Sahrens } 605fa9e4066Sahrens } else { 606fa9e4066Sahrens atomic_add_32(&zfs_active_fs_count, 1); 607fa9e4066Sahrens VN_RELE(vp); 608fa9e4066Sahrens } 609fa9e4066Sahrens 610fa9e4066Sahrens pn_free(&spn); 611fa9e4066Sahrens return (error); 612fa9e4066Sahrens } 613fa9e4066Sahrens 614fa9e4066Sahrens static int 615fa9e4066Sahrens zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) 616fa9e4066Sahrens { 617fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 618fa9e4066Sahrens dmu_objset_stats_t dstats; 619fa9e4066Sahrens dev32_t d32; 620fa9e4066Sahrens 621fa9e4066Sahrens ZFS_ENTER(zfsvfs); 622fa9e4066Sahrens 623fa9e4066Sahrens dmu_objset_stats(zfsvfs->z_os, &dstats); 624fa9e4066Sahrens 625fa9e4066Sahrens /* 626fa9e4066Sahrens * The underlying storage pool actually uses multiple block sizes. 627fa9e4066Sahrens * We report the fragsize as the smallest block size we support, 628fa9e4066Sahrens * and we report our blocksize as the filesystem's maximum blocksize. 629fa9e4066Sahrens */ 630fa9e4066Sahrens statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; 631fa9e4066Sahrens statp->f_bsize = zfsvfs->z_max_blksz; 632fa9e4066Sahrens 633fa9e4066Sahrens /* 634fa9e4066Sahrens * The following report "total" blocks of various kinds in the 635fa9e4066Sahrens * file system, but reported in terms of f_frsize - the 636fa9e4066Sahrens * "fragment" size. 637fa9e4066Sahrens */ 638fa9e4066Sahrens 639fa9e4066Sahrens statp->f_blocks = 640fa9e4066Sahrens (dstats.dds_space_refd + dstats.dds_available) >> SPA_MINBLOCKSHIFT; 641fa9e4066Sahrens statp->f_bfree = dstats.dds_available >> SPA_MINBLOCKSHIFT; 642fa9e4066Sahrens statp->f_bavail = statp->f_bfree; /* no root reservation */ 643fa9e4066Sahrens 644fa9e4066Sahrens /* 645fa9e4066Sahrens * statvfs() should really be called statufs(), because it assumes 646fa9e4066Sahrens * static metadata. ZFS doesn't preallocate files, so the best 647fa9e4066Sahrens * we can do is report the max that could possibly fit in f_files, 648fa9e4066Sahrens * and that minus the number actually used in f_ffree. 649fa9e4066Sahrens * For f_ffree, report the smaller of the number of object available 650fa9e4066Sahrens * and the number of blocks (each object will take at least a block). 651fa9e4066Sahrens */ 652fa9e4066Sahrens statp->f_ffree = MIN(dstats.dds_objects_avail, statp->f_bfree); 653fa9e4066Sahrens statp->f_favail = statp->f_ffree; /* no "root reservation" */ 654fa9e4066Sahrens statp->f_files = statp->f_ffree + dstats.dds_objects_used; 655fa9e4066Sahrens 656fa9e4066Sahrens (void) cmpldev(&d32, vfsp->vfs_dev); 657fa9e4066Sahrens statp->f_fsid = d32; 658fa9e4066Sahrens 659fa9e4066Sahrens /* 660fa9e4066Sahrens * We're a zfs filesystem. 661fa9e4066Sahrens */ 662fa9e4066Sahrens (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); 663fa9e4066Sahrens 664a5be7ebbSmarks statp->f_flag = vf_to_stf(vfsp->vfs_flag); 665fa9e4066Sahrens 666fa9e4066Sahrens statp->f_namemax = ZFS_MAXNAMELEN; 667fa9e4066Sahrens 668fa9e4066Sahrens /* 669fa9e4066Sahrens * We have all of 32 characters to stuff a string here. 670fa9e4066Sahrens * Is there anything useful we could/should provide? 671fa9e4066Sahrens */ 672fa9e4066Sahrens bzero(statp->f_fstr, sizeof (statp->f_fstr)); 673fa9e4066Sahrens 674fa9e4066Sahrens ZFS_EXIT(zfsvfs); 675fa9e4066Sahrens return (0); 676fa9e4066Sahrens } 677fa9e4066Sahrens 678fa9e4066Sahrens static int 679fa9e4066Sahrens zfs_root(vfs_t *vfsp, vnode_t **vpp) 680fa9e4066Sahrens { 681fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 682fa9e4066Sahrens znode_t *rootzp; 683fa9e4066Sahrens int error; 684fa9e4066Sahrens 685fa9e4066Sahrens ZFS_ENTER(zfsvfs); 686fa9e4066Sahrens 687fa9e4066Sahrens error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 688fa9e4066Sahrens if (error == 0) 689fa9e4066Sahrens *vpp = ZTOV(rootzp); 690fa9e4066Sahrens 691fa9e4066Sahrens ZFS_EXIT(zfsvfs); 692fa9e4066Sahrens return (error); 693fa9e4066Sahrens } 694fa9e4066Sahrens 695fa9e4066Sahrens /*ARGSUSED*/ 696fa9e4066Sahrens static int 697fa9e4066Sahrens zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) 698fa9e4066Sahrens { 699fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 700fa9e4066Sahrens int ret; 701fa9e4066Sahrens 702fa9e4066Sahrens if ((ret = secpolicy_fs_unmount(cr, vfsp)) != 0) 703fa9e4066Sahrens return (ret); 704fa9e4066Sahrens 705*033f9833Sek 706*033f9833Sek (void) dnlc_purge_vfsp(vfsp, 0); 707*033f9833Sek 708fa9e4066Sahrens /* 709fa9e4066Sahrens * Unmount any snapshots mounted under .zfs before unmounting the 710fa9e4066Sahrens * dataset itself. 711fa9e4066Sahrens */ 712fa9e4066Sahrens if (zfsvfs->z_ctldir != NULL && 713fa9e4066Sahrens (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 714fa9e4066Sahrens return (ret); 715fa9e4066Sahrens 716fa9e4066Sahrens if (fflag & MS_FORCE) { 717fa9e4066Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 718fa9e4066Sahrens zfsvfs->z_unmounted1 = B_TRUE; 719fa9e4066Sahrens 720fa9e4066Sahrens /* 721fa9e4066Sahrens * Wait for all zfs threads to leave zfs. 722fa9e4066Sahrens * Grabbing a rwlock as reader in all vops and 723fa9e4066Sahrens * as writer here doesn't work because it too easy to get 724fa9e4066Sahrens * multiple reader enters as zfs can re-enter itself. 725fa9e4066Sahrens * This can lead to deadlock if there is an intervening 726fa9e4066Sahrens * rw_enter as writer. 727fa9e4066Sahrens * So a file system threads ref count (z_op_cnt) is used. 728fa9e4066Sahrens * A polling loop on z_op_cnt may seem inefficient, but 729fa9e4066Sahrens * - this saves all threads on exit from having to grab a 730fa9e4066Sahrens * mutex in order to cv_signal 731fa9e4066Sahrens * - only occurs on forced unmount in the rare case when 732fa9e4066Sahrens * there are outstanding threads within the file system. 733fa9e4066Sahrens */ 734fa9e4066Sahrens while (zfsvfs->z_op_cnt) { 735fa9e4066Sahrens delay(1); 736fa9e4066Sahrens } 737fa9e4066Sahrens 738fa9e4066Sahrens zfs_objset_close(zfsvfs); 739fa9e4066Sahrens 740fa9e4066Sahrens return (0); 741fa9e4066Sahrens } 742fa9e4066Sahrens 743fa9e4066Sahrens zfs_zcache_flush(zfsvfs); 744fa9e4066Sahrens 745fa9e4066Sahrens /* 746fa9e4066Sahrens * Stop all delete threads. 747fa9e4066Sahrens */ 748fa9e4066Sahrens (void) zfs_delete_thread_target(zfsvfs, 0); 749fa9e4066Sahrens 750fa9e4066Sahrens /* 751fa9e4066Sahrens * Check the number of active vnodes in the file system. 752fa9e4066Sahrens * Our count is maintained in the vfs structure, but the number 753fa9e4066Sahrens * is off by 1 to indicate a hold on the vfs structure itself. 754fa9e4066Sahrens * 755fa9e4066Sahrens * The '.zfs' directory maintains a reference of its own, and any active 756fa9e4066Sahrens * references underneath are reflected in the vnode count. 757fa9e4066Sahrens */ 758fa9e4066Sahrens if (zfsvfs->z_ctldir == NULL) { 759fa9e4066Sahrens if (vfsp->vfs_count > 1) { 760fa9e4066Sahrens if ((zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) == 0) 761fa9e4066Sahrens (void) zfs_delete_thread_target(zfsvfs, 1); 762fa9e4066Sahrens return (EBUSY); 763fa9e4066Sahrens } 764fa9e4066Sahrens } else { 765fa9e4066Sahrens if (vfsp->vfs_count > 2 || 766fa9e4066Sahrens (zfsvfs->z_ctldir->v_count > 1 && !(fflag & MS_FORCE))) { 767fa9e4066Sahrens if ((zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) == 0) 768fa9e4066Sahrens (void) zfs_delete_thread_target(zfsvfs, 1); 769fa9e4066Sahrens return (EBUSY); 770fa9e4066Sahrens } 771fa9e4066Sahrens } 772fa9e4066Sahrens 773fa9e4066Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 774fa9e4066Sahrens zfs_objset_close(zfsvfs); 775fa9e4066Sahrens 776fa9e4066Sahrens return (0); 777fa9e4066Sahrens } 778fa9e4066Sahrens 779fa9e4066Sahrens static int 780fa9e4066Sahrens zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 781fa9e4066Sahrens { 782fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 783fa9e4066Sahrens znode_t *zp; 784fa9e4066Sahrens uint64_t object = 0; 785fa9e4066Sahrens uint64_t fid_gen = 0; 786fa9e4066Sahrens uint64_t gen_mask; 787fa9e4066Sahrens uint64_t zp_gen; 788fa9e4066Sahrens int i, err; 789fa9e4066Sahrens 790fa9e4066Sahrens *vpp = NULL; 791fa9e4066Sahrens 792fa9e4066Sahrens ZFS_ENTER(zfsvfs); 793fa9e4066Sahrens 794fa9e4066Sahrens if (fidp->fid_len == LONG_FID_LEN) { 795fa9e4066Sahrens zfid_long_t *zlfid = (zfid_long_t *)fidp; 796fa9e4066Sahrens uint64_t objsetid = 0; 797fa9e4066Sahrens uint64_t setgen = 0; 798fa9e4066Sahrens 799fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 800fa9e4066Sahrens objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 801fa9e4066Sahrens 802fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 803fa9e4066Sahrens setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 804fa9e4066Sahrens 805fa9e4066Sahrens ZFS_EXIT(zfsvfs); 806fa9e4066Sahrens 807fa9e4066Sahrens err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 808fa9e4066Sahrens if (err) 809fa9e4066Sahrens return (EINVAL); 810fa9e4066Sahrens ZFS_ENTER(zfsvfs); 811fa9e4066Sahrens } 812fa9e4066Sahrens 813fa9e4066Sahrens if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 814fa9e4066Sahrens zfid_short_t *zfid = (zfid_short_t *)fidp; 815fa9e4066Sahrens 816fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 817fa9e4066Sahrens object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 818fa9e4066Sahrens 819fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 820fa9e4066Sahrens fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 821fa9e4066Sahrens } else { 822fa9e4066Sahrens ZFS_EXIT(zfsvfs); 823fa9e4066Sahrens return (EINVAL); 824fa9e4066Sahrens } 825fa9e4066Sahrens 826fa9e4066Sahrens /* A zero fid_gen means we are in the .zfs control directories */ 827fa9e4066Sahrens if (fid_gen == 0 && 828fa9e4066Sahrens (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 829fa9e4066Sahrens *vpp = zfsvfs->z_ctldir; 830fa9e4066Sahrens ASSERT(*vpp != NULL); 831fa9e4066Sahrens if (object == ZFSCTL_INO_SNAPDIR) { 832fa9e4066Sahrens VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 833fa9e4066Sahrens 0, NULL, NULL) == 0); 834fa9e4066Sahrens } else { 835fa9e4066Sahrens VN_HOLD(*vpp); 836fa9e4066Sahrens } 837fa9e4066Sahrens ZFS_EXIT(zfsvfs); 838fa9e4066Sahrens return (0); 839fa9e4066Sahrens } 840fa9e4066Sahrens 841fa9e4066Sahrens gen_mask = -1ULL >> (64 - 8 * i); 842fa9e4066Sahrens 843fa9e4066Sahrens dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 844fa9e4066Sahrens if (err = zfs_zget(zfsvfs, object, &zp)) { 845fa9e4066Sahrens ZFS_EXIT(zfsvfs); 846fa9e4066Sahrens return (err); 847fa9e4066Sahrens } 848fa9e4066Sahrens zp_gen = zp->z_phys->zp_gen & gen_mask; 849fa9e4066Sahrens if (zp_gen == 0) 850fa9e4066Sahrens zp_gen = 1; 851fa9e4066Sahrens if (zp->z_reap || zp_gen != fid_gen) { 852fa9e4066Sahrens dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 853fa9e4066Sahrens VN_RELE(ZTOV(zp)); 854fa9e4066Sahrens ZFS_EXIT(zfsvfs); 855fa9e4066Sahrens return (EINVAL); 856fa9e4066Sahrens } 857fa9e4066Sahrens 858fa9e4066Sahrens *vpp = ZTOV(zp); 859fa9e4066Sahrens ZFS_EXIT(zfsvfs); 860fa9e4066Sahrens return (0); 861fa9e4066Sahrens } 862fa9e4066Sahrens 863fa9e4066Sahrens static void 864fa9e4066Sahrens zfs_objset_close(zfsvfs_t *zfsvfs) 865fa9e4066Sahrens { 866fa9e4066Sahrens zfs_delete_t *zd = &zfsvfs->z_delete_head; 867fa9e4066Sahrens znode_t *zp, *nextzp; 868fa9e4066Sahrens objset_t *os = zfsvfs->z_os; 869fa9e4066Sahrens struct dsl_dataset *ds; 870fa9e4066Sahrens 871fa9e4066Sahrens /* 872fa9e4066Sahrens * Stop all delete threads. 873fa9e4066Sahrens */ 874fa9e4066Sahrens (void) zfs_delete_thread_target(zfsvfs, 0); 875fa9e4066Sahrens 876fa9e4066Sahrens /* 877fa9e4066Sahrens * For forced unmount, at this point all vops except zfs_inactive 878fa9e4066Sahrens * are erroring EIO. We need to now suspend zfs_inactive threads 879fa9e4066Sahrens * while we are freeing dbufs before switching zfs_inactive 880fa9e4066Sahrens * to use behaviour without a objset. 881fa9e4066Sahrens */ 882fa9e4066Sahrens rw_enter(&zfsvfs->z_um_lock, RW_WRITER); 883fa9e4066Sahrens 884fa9e4066Sahrens zfs_zcache_flush(zfsvfs); 885fa9e4066Sahrens 886fa9e4066Sahrens /* 887fa9e4066Sahrens * Release all delete in progress znodes 888fa9e4066Sahrens * They will be processed when the file system remounts. 889fa9e4066Sahrens */ 890fa9e4066Sahrens mutex_enter(&zd->z_mutex); 891fa9e4066Sahrens while (zp = list_head(&zd->z_znodes)) { 892fa9e4066Sahrens list_remove(&zd->z_znodes, zp); 893fa9e4066Sahrens zp->z_dbuf_held = 0; 894fa9e4066Sahrens dmu_buf_rele(zp->z_dbuf); 895fa9e4066Sahrens } 896fa9e4066Sahrens mutex_exit(&zd->z_mutex); 897fa9e4066Sahrens 898fa9e4066Sahrens /* 899fa9e4066Sahrens * Release all holds on dbufs 900fa9e4066Sahrens * Note, although we have stopped all other vop threads and 901fa9e4066Sahrens * zfs_inactive(), the dmu can callback via znode_pageout_func() 902fa9e4066Sahrens * which can zfs_znode_free() the znode. 903fa9e4066Sahrens * So we lock z_all_znodes; search the list for a held 904fa9e4066Sahrens * dbuf; drop the lock (we know zp can't disappear if we hold 905fa9e4066Sahrens * a dbuf lock; then regrab the lock and restart. 906fa9e4066Sahrens */ 907fa9e4066Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 908fa9e4066Sahrens for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { 909fa9e4066Sahrens nextzp = list_next(&zfsvfs->z_all_znodes, zp); 910fa9e4066Sahrens if (zp->z_dbuf_held) { 911fa9e4066Sahrens /* dbufs should only be held when force unmounting */ 912fa9e4066Sahrens zp->z_dbuf_held = 0; 913fa9e4066Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 914fa9e4066Sahrens dmu_buf_rele(zp->z_dbuf); 915fa9e4066Sahrens /* Start again */ 916fa9e4066Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 917fa9e4066Sahrens nextzp = list_head(&zfsvfs->z_all_znodes); 918fa9e4066Sahrens } 919fa9e4066Sahrens } 920fa9e4066Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 921fa9e4066Sahrens 922fa9e4066Sahrens /* 923fa9e4066Sahrens * Unregister properties. 924fa9e4066Sahrens */ 925fa9e4066Sahrens if (!dmu_objset_is_snapshot(os)) { 926fa9e4066Sahrens ds = dmu_objset_ds(os); 927fa9e4066Sahrens 928fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 929fa9e4066Sahrens zfsvfs) == 0); 930fa9e4066Sahrens 931fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 932fa9e4066Sahrens zfsvfs) == 0); 933fa9e4066Sahrens 934fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 935fa9e4066Sahrens zfsvfs) == 0); 936fa9e4066Sahrens 937fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, 938fa9e4066Sahrens zfsvfs) == 0); 939fa9e4066Sahrens 940fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 941fa9e4066Sahrens zfsvfs) == 0); 942fa9e4066Sahrens 943fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 944fa9e4066Sahrens zfsvfs) == 0); 945fa9e4066Sahrens 946fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 947fa9e4066Sahrens zfsvfs) == 0); 948fa9e4066Sahrens 949fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 950fa9e4066Sahrens zfsvfs) == 0); 951fa9e4066Sahrens 952fa9e4066Sahrens VERIFY(dsl_prop_unregister(ds, "aclinherit", 953fa9e4066Sahrens acl_inherit_changed_cb, zfsvfs) == 0); 954fa9e4066Sahrens } 955fa9e4066Sahrens 956fa9e4066Sahrens /* 957fa9e4066Sahrens * Make the dmu drop all it dbuf holds so that zfs_inactive 958fa9e4066Sahrens * can then safely free znode/vnodes. 959fa9e4066Sahrens */ 960fa9e4066Sahrens txg_wait_synced(dmu_objset_pool(os), 0); 961fa9e4066Sahrens 962fa9e4066Sahrens /* 963fa9e4066Sahrens * Switch zfs_inactive to behaviour without an objset. 964fa9e4066Sahrens * It just tosses cached pages and frees the znode & vnode. 965fa9e4066Sahrens * Then re-enable zfs_inactive threads in that new behaviour. 966fa9e4066Sahrens */ 967fa9e4066Sahrens zfsvfs->z_unmounted2 = B_TRUE; 968fa9e4066Sahrens rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */ 969fa9e4066Sahrens 970fa9e4066Sahrens /* 971fa9e4066Sahrens * Close the zil. Can't close the zil while zfs_inactive 972fa9e4066Sahrens * threads are blocked as zil_close can call zfs_inactive. 973fa9e4066Sahrens */ 974fa9e4066Sahrens if (zfsvfs->z_log) { 975fa9e4066Sahrens zil_close(zfsvfs->z_log); 976fa9e4066Sahrens zfsvfs->z_log = NULL; 977fa9e4066Sahrens } 978fa9e4066Sahrens 979fa9e4066Sahrens /* 980fa9e4066Sahrens * Finally close the objset 981fa9e4066Sahrens */ 982fa9e4066Sahrens dmu_objset_close(os); 983fa9e4066Sahrens 9848afd4dd6Sperrin /* 9858afd4dd6Sperrin * We can now safely destroy the '.zfs' directory node. 9868afd4dd6Sperrin */ 9878afd4dd6Sperrin if (zfsvfs->z_ctldir != NULL) 9888afd4dd6Sperrin zfsctl_destroy(zfsvfs); 9898afd4dd6Sperrin 990fa9e4066Sahrens } 991fa9e4066Sahrens 992fa9e4066Sahrens static void 993fa9e4066Sahrens zfs_freevfs(vfs_t *vfsp) 994fa9e4066Sahrens { 995fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 996fa9e4066Sahrens 997fa9e4066Sahrens kmem_free(zfsvfs, sizeof (zfsvfs_t)); 998fa9e4066Sahrens 999fa9e4066Sahrens atomic_add_32(&zfs_active_fs_count, -1); 1000fa9e4066Sahrens } 1001fa9e4066Sahrens 1002fa9e4066Sahrens /* 1003fa9e4066Sahrens * VFS_INIT() initialization. Note that there is no VFS_FINI(), 1004fa9e4066Sahrens * so we can't safely do any non-idempotent initialization here. 1005fa9e4066Sahrens * Leave that to zfs_init() and zfs_fini(), which are called 1006fa9e4066Sahrens * from the module's _init() and _fini() entry points. 1007fa9e4066Sahrens */ 1008fa9e4066Sahrens /*ARGSUSED*/ 1009fa9e4066Sahrens static int 1010fa9e4066Sahrens zfs_vfsinit(int fstype, char *name) 1011fa9e4066Sahrens { 1012fa9e4066Sahrens int error; 1013fa9e4066Sahrens 1014fa9e4066Sahrens zfsfstype = fstype; 1015fa9e4066Sahrens 1016fa9e4066Sahrens /* 1017fa9e4066Sahrens * Setup vfsops and vnodeops tables. 1018fa9e4066Sahrens */ 1019fa9e4066Sahrens error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); 1020fa9e4066Sahrens if (error != 0) { 1021fa9e4066Sahrens cmn_err(CE_WARN, "zfs: bad vfs ops template"); 1022fa9e4066Sahrens } 1023fa9e4066Sahrens 1024fa9e4066Sahrens error = zfs_create_op_tables(); 1025fa9e4066Sahrens if (error) { 1026fa9e4066Sahrens zfs_remove_op_tables(); 1027fa9e4066Sahrens cmn_err(CE_WARN, "zfs: bad vnode ops template"); 1028fa9e4066Sahrens (void) vfs_freevfsops_by_type(zfsfstype); 1029fa9e4066Sahrens return (error); 1030fa9e4066Sahrens } 1031fa9e4066Sahrens 1032fa9e4066Sahrens mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); 1033fa9e4066Sahrens 1034fa9e4066Sahrens /* 1035a0965f35Sbonwick * Unique major number for all zfs mounts. 1036a0965f35Sbonwick * If we run out of 32-bit minors, we'll getudev() another major. 1037fa9e4066Sahrens */ 1038a0965f35Sbonwick zfs_major = ddi_name_to_major(ZFS_DRIVER); 1039a0965f35Sbonwick zfs_minor = ZFS_MIN_MINOR; 1040fa9e4066Sahrens 1041fa9e4066Sahrens return (0); 1042fa9e4066Sahrens } 1043fa9e4066Sahrens 1044fa9e4066Sahrens void 1045fa9e4066Sahrens zfs_init(void) 1046fa9e4066Sahrens { 1047fa9e4066Sahrens /* 1048fa9e4066Sahrens * Initialize .zfs directory structures 1049fa9e4066Sahrens */ 1050fa9e4066Sahrens zfsctl_init(); 1051fa9e4066Sahrens 1052fa9e4066Sahrens /* 1053fa9e4066Sahrens * Initialize znode cache, vnode ops, etc... 1054fa9e4066Sahrens */ 1055fa9e4066Sahrens zfs_znode_init(); 1056fa9e4066Sahrens } 1057fa9e4066Sahrens 1058fa9e4066Sahrens void 1059fa9e4066Sahrens zfs_fini(void) 1060fa9e4066Sahrens { 1061fa9e4066Sahrens zfsctl_fini(); 1062fa9e4066Sahrens zfs_znode_fini(); 1063fa9e4066Sahrens } 1064fa9e4066Sahrens 1065fa9e4066Sahrens int 1066fa9e4066Sahrens zfs_busy(void) 1067fa9e4066Sahrens { 1068fa9e4066Sahrens return (zfs_active_fs_count != 0); 1069fa9e4066Sahrens } 1070fa9e4066Sahrens 1071fa9e4066Sahrens static vfsdef_t vfw = { 1072fa9e4066Sahrens VFSDEF_VERSION, 1073fa9e4066Sahrens MNTTYPE_ZFS, 1074fa9e4066Sahrens zfs_vfsinit, 1075fa9e4066Sahrens VSW_HASPROTO | VSW_CANRWRO | VSW_CANREMOUNT | VSW_VOLATILEDEV, 1076fa9e4066Sahrens &zfs_mntopts 1077fa9e4066Sahrens }; 1078fa9e4066Sahrens 1079fa9e4066Sahrens struct modlfs zfs_modlfs = { 1080fa9e4066Sahrens &mod_fsops, "ZFS filesystem version 1", &vfw 1081fa9e4066Sahrens }; 1082