1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5033f9833Sek * Common Development and Distribution License (the "License"). 6033f9833Sek * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22893a6d32Sahrens * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 2678077464Sck #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens #include <sys/types.h> 29fa9e4066Sahrens #include <sys/param.h> 30fa9e4066Sahrens #include <sys/systm.h> 31fa9e4066Sahrens #include <sys/sysmacros.h> 32fa9e4066Sahrens #include <sys/kmem.h> 33fa9e4066Sahrens #include <sys/pathname.h> 34fa9e4066Sahrens #include <sys/acl.h> 35fa9e4066Sahrens #include <sys/vnode.h> 36fa9e4066Sahrens #include <sys/vfs.h> 37aa59c4cbSrsb #include <sys/vfs_opreg.h> 38fa9e4066Sahrens #include <sys/mntent.h> 39fa9e4066Sahrens #include <sys/mount.h> 40fa9e4066Sahrens #include <sys/cmn_err.h> 41fa9e4066Sahrens #include "fs/fs_subr.h" 42fa9e4066Sahrens #include <sys/zfs_znode.h> 43893a6d32Sahrens #include <sys/zfs_dir.h> 44fa9e4066Sahrens #include <sys/zil.h> 45fa9e4066Sahrens #include <sys/fs/zfs.h> 46fa9e4066Sahrens #include <sys/dmu.h> 47fa9e4066Sahrens #include <sys/dsl_prop.h> 48*b1b8ab34Slling #include <sys/dsl_dataset.h> 49fa9e4066Sahrens #include <sys/spa.h> 50fa9e4066Sahrens #include <sys/zap.h> 51fa9e4066Sahrens #include <sys/varargs.h> 52fa9e4066Sahrens #include <sys/policy.h> 53fa9e4066Sahrens #include <sys/atomic.h> 54fa9e4066Sahrens #include <sys/mkdev.h> 55fa9e4066Sahrens #include <sys/modctl.h> 56fa9e4066Sahrens #include <sys/zfs_ioctl.h> 57fa9e4066Sahrens #include <sys/zfs_ctldir.h> 58ea8dc4b6Seschrock #include <sys/bootconf.h> 59a0965f35Sbonwick #include <sys/sunddi.h> 60033f9833Sek #include <sys/dnlc.h> 61fa9e4066Sahrens 62fa9e4066Sahrens int zfsfstype; 63fa9e4066Sahrens vfsops_t *zfs_vfsops = NULL; 64a0965f35Sbonwick static major_t zfs_major; 65fa9e4066Sahrens static minor_t zfs_minor; 66fa9e4066Sahrens static kmutex_t zfs_dev_mtx; 67fa9e4066Sahrens 68fa9e4066Sahrens static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); 69fa9e4066Sahrens static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); 70ea8dc4b6Seschrock static int zfs_mountroot(vfs_t *vfsp, enum whymountroot); 71fa9e4066Sahrens static int zfs_root(vfs_t *vfsp, vnode_t **vpp); 72fa9e4066Sahrens static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); 73fa9e4066Sahrens static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); 74fa9e4066Sahrens static void zfs_freevfs(vfs_t *vfsp); 75fa9e4066Sahrens static void zfs_objset_close(zfsvfs_t *zfsvfs); 76fa9e4066Sahrens 77fa9e4066Sahrens static const fs_operation_def_t zfs_vfsops_template[] = { 78aa59c4cbSrsb VFSNAME_MOUNT, { .vfs_mount = zfs_mount }, 79aa59c4cbSrsb VFSNAME_MOUNTROOT, { .vfs_mountroot = zfs_mountroot }, 80aa59c4cbSrsb VFSNAME_UNMOUNT, { .vfs_unmount = zfs_umount }, 81aa59c4cbSrsb VFSNAME_ROOT, { .vfs_root = zfs_root }, 82aa59c4cbSrsb VFSNAME_STATVFS, { .vfs_statvfs = zfs_statvfs }, 83aa59c4cbSrsb VFSNAME_SYNC, { .vfs_sync = zfs_sync }, 84aa59c4cbSrsb VFSNAME_VGET, { .vfs_vget = zfs_vget }, 85aa59c4cbSrsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 86aa59c4cbSrsb NULL, NULL 87fa9e4066Sahrens }; 88fa9e4066Sahrens 89fa9e4066Sahrens static const fs_operation_def_t zfs_vfsops_eio_template[] = { 90aa59c4cbSrsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 91aa59c4cbSrsb NULL, NULL 92fa9e4066Sahrens }; 93fa9e4066Sahrens 94fa9e4066Sahrens /* 95fa9e4066Sahrens * We need to keep a count of active fs's. 96fa9e4066Sahrens * This is necessary to prevent our module 97fa9e4066Sahrens * from being unloaded after a umount -f 98fa9e4066Sahrens */ 99fa9e4066Sahrens static uint32_t zfs_active_fs_count = 0; 100fa9e4066Sahrens 101fa9e4066Sahrens static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; 102fa9e4066Sahrens static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; 1037b55fa8eSck static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 1047b55fa8eSck static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 105fa9e4066Sahrens 1067b55fa8eSck /* 1077b55fa8eSck * MNTOPT_DEFAULT was removed from MNTOPT_XATTR, since the 1087b55fa8eSck * default value is now determined by the xattr property. 1097b55fa8eSck */ 110fa9e4066Sahrens static mntopt_t mntopts[] = { 1117b55fa8eSck { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL }, 1127b55fa8eSck { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL }, 113fa9e4066Sahrens { MNTOPT_NOATIME, noatime_cancel, NULL, MO_DEFAULT, NULL }, 114fa9e4066Sahrens { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } 115fa9e4066Sahrens }; 116fa9e4066Sahrens 117fa9e4066Sahrens static mntopts_t zfs_mntopts = { 118fa9e4066Sahrens sizeof (mntopts) / sizeof (mntopt_t), 119fa9e4066Sahrens mntopts 120fa9e4066Sahrens }; 121fa9e4066Sahrens 122fa9e4066Sahrens /*ARGSUSED*/ 123fa9e4066Sahrens int 124fa9e4066Sahrens zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 125fa9e4066Sahrens { 126fa9e4066Sahrens /* 127fa9e4066Sahrens * Data integrity is job one. We don't want a compromised kernel 128fa9e4066Sahrens * writing to the storage pool, so we never sync during panic. 129fa9e4066Sahrens */ 130fa9e4066Sahrens if (panicstr) 131fa9e4066Sahrens return (0); 132fa9e4066Sahrens 133fa9e4066Sahrens /* 134fa9e4066Sahrens * SYNC_ATTR is used by fsflush() to force old filesystems like UFS 135fa9e4066Sahrens * to sync metadata, which they would otherwise cache indefinitely. 136fa9e4066Sahrens * Semantically, the only requirement is that the sync be initiated. 137fa9e4066Sahrens * The DMU syncs out txgs frequently, so there's nothing to do. 138fa9e4066Sahrens */ 139fa9e4066Sahrens if (flag & SYNC_ATTR) 140fa9e4066Sahrens return (0); 141fa9e4066Sahrens 142fa9e4066Sahrens if (vfsp != NULL) { 143fa9e4066Sahrens /* 144fa9e4066Sahrens * Sync a specific filesystem. 145fa9e4066Sahrens */ 146fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 147fa9e4066Sahrens 148fa9e4066Sahrens ZFS_ENTER(zfsvfs); 149fa9e4066Sahrens if (zfsvfs->z_log != NULL) 150b19a79ecSperrin zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 151fa9e4066Sahrens else 152fa9e4066Sahrens txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 153fa9e4066Sahrens ZFS_EXIT(zfsvfs); 154fa9e4066Sahrens } else { 155fa9e4066Sahrens /* 156fa9e4066Sahrens * Sync all ZFS filesystems. This is what happens when you 157fa9e4066Sahrens * run sync(1M). Unlike other filesystems, ZFS honors the 158fa9e4066Sahrens * request by waiting for all pools to commit all dirty data. 159fa9e4066Sahrens */ 160fa9e4066Sahrens spa_sync_allpools(); 161fa9e4066Sahrens } 162fa9e4066Sahrens 163fa9e4066Sahrens return (0); 164fa9e4066Sahrens } 165fa9e4066Sahrens 166ea8dc4b6Seschrock static int 167ea8dc4b6Seschrock zfs_create_unique_device(dev_t *dev) 168ea8dc4b6Seschrock { 169ea8dc4b6Seschrock major_t new_major; 170ea8dc4b6Seschrock 171ea8dc4b6Seschrock do { 172ea8dc4b6Seschrock ASSERT3U(zfs_minor, <=, MAXMIN32); 173ea8dc4b6Seschrock minor_t start = zfs_minor; 174ea8dc4b6Seschrock do { 175ea8dc4b6Seschrock mutex_enter(&zfs_dev_mtx); 176ea8dc4b6Seschrock if (zfs_minor >= MAXMIN32) { 177ea8dc4b6Seschrock /* 178ea8dc4b6Seschrock * If we're still using the real major 179ea8dc4b6Seschrock * keep out of /dev/zfs and /dev/zvol minor 180ea8dc4b6Seschrock * number space. If we're using a getudev()'ed 181ea8dc4b6Seschrock * major number, we can use all of its minors. 182ea8dc4b6Seschrock */ 183ea8dc4b6Seschrock if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) 184ea8dc4b6Seschrock zfs_minor = ZFS_MIN_MINOR; 185ea8dc4b6Seschrock else 186ea8dc4b6Seschrock zfs_minor = 0; 187ea8dc4b6Seschrock } else { 188ea8dc4b6Seschrock zfs_minor++; 189ea8dc4b6Seschrock } 190ea8dc4b6Seschrock *dev = makedevice(zfs_major, zfs_minor); 191ea8dc4b6Seschrock mutex_exit(&zfs_dev_mtx); 192ea8dc4b6Seschrock } while (vfs_devismounted(*dev) && zfs_minor != start); 193ea8dc4b6Seschrock if (zfs_minor == start) { 194ea8dc4b6Seschrock /* 195ea8dc4b6Seschrock * We are using all ~262,000 minor numbers for the 196ea8dc4b6Seschrock * current major number. Create a new major number. 197ea8dc4b6Seschrock */ 198ea8dc4b6Seschrock if ((new_major = getudev()) == (major_t)-1) { 199ea8dc4b6Seschrock cmn_err(CE_WARN, 200ea8dc4b6Seschrock "zfs_mount: Can't get unique major " 201ea8dc4b6Seschrock "device number."); 202ea8dc4b6Seschrock return (-1); 203ea8dc4b6Seschrock } 204ea8dc4b6Seschrock mutex_enter(&zfs_dev_mtx); 205ea8dc4b6Seschrock zfs_major = new_major; 206ea8dc4b6Seschrock zfs_minor = 0; 207ea8dc4b6Seschrock 208ea8dc4b6Seschrock mutex_exit(&zfs_dev_mtx); 209ea8dc4b6Seschrock } else { 210ea8dc4b6Seschrock break; 211ea8dc4b6Seschrock } 212ea8dc4b6Seschrock /* CONSTANTCONDITION */ 213ea8dc4b6Seschrock } while (1); 214ea8dc4b6Seschrock 215ea8dc4b6Seschrock return (0); 216ea8dc4b6Seschrock } 217ea8dc4b6Seschrock 218fa9e4066Sahrens static void 219fa9e4066Sahrens atime_changed_cb(void *arg, uint64_t newval) 220fa9e4066Sahrens { 221fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 222fa9e4066Sahrens 223fa9e4066Sahrens if (newval == TRUE) { 224fa9e4066Sahrens zfsvfs->z_atime = TRUE; 225fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 226fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 227fa9e4066Sahrens } else { 228fa9e4066Sahrens zfsvfs->z_atime = FALSE; 229fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 230fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 231fa9e4066Sahrens } 232fa9e4066Sahrens } 233fa9e4066Sahrens 2347b55fa8eSck static void 2357b55fa8eSck xattr_changed_cb(void *arg, uint64_t newval) 2367b55fa8eSck { 2377b55fa8eSck zfsvfs_t *zfsvfs = arg; 2387b55fa8eSck 2397b55fa8eSck if (newval == TRUE) { 2407b55fa8eSck /* XXX locking on vfs_flag? */ 2417b55fa8eSck zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 2427b55fa8eSck vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 2437b55fa8eSck vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 2447b55fa8eSck } else { 2457b55fa8eSck /* XXX locking on vfs_flag? */ 2467b55fa8eSck zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 2477b55fa8eSck vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 2487b55fa8eSck vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 2497b55fa8eSck } 2507b55fa8eSck } 2517b55fa8eSck 252fa9e4066Sahrens static void 253fa9e4066Sahrens blksz_changed_cb(void *arg, uint64_t newval) 254fa9e4066Sahrens { 255fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 256fa9e4066Sahrens 257fa9e4066Sahrens if (newval < SPA_MINBLOCKSIZE || 258fa9e4066Sahrens newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 259fa9e4066Sahrens newval = SPA_MAXBLOCKSIZE; 260fa9e4066Sahrens 261fa9e4066Sahrens zfsvfs->z_max_blksz = newval; 262fa9e4066Sahrens zfsvfs->z_vfs->vfs_bsize = newval; 263fa9e4066Sahrens } 264fa9e4066Sahrens 265fa9e4066Sahrens static void 266fa9e4066Sahrens readonly_changed_cb(void *arg, uint64_t newval) 267fa9e4066Sahrens { 268fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 269fa9e4066Sahrens 270fa9e4066Sahrens if (newval) { 271fa9e4066Sahrens /* XXX locking on vfs_flag? */ 272fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 273fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 274fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 275fa9e4066Sahrens } else { 276fa9e4066Sahrens /* XXX locking on vfs_flag? */ 277fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 278fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 279fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 280fa9e4066Sahrens } 281fa9e4066Sahrens } 282fa9e4066Sahrens 283fa9e4066Sahrens static void 284fa9e4066Sahrens devices_changed_cb(void *arg, uint64_t newval) 285fa9e4066Sahrens { 286fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 287fa9e4066Sahrens 288fa9e4066Sahrens if (newval == FALSE) { 289fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; 290fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); 291fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); 292fa9e4066Sahrens } else { 293fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; 294fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); 295fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); 296fa9e4066Sahrens } 297fa9e4066Sahrens } 298fa9e4066Sahrens 299fa9e4066Sahrens static void 300fa9e4066Sahrens setuid_changed_cb(void *arg, uint64_t newval) 301fa9e4066Sahrens { 302fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 303fa9e4066Sahrens 304fa9e4066Sahrens if (newval == FALSE) { 305fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 306fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 307fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 308fa9e4066Sahrens } else { 309fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 310fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 311fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 312fa9e4066Sahrens } 313fa9e4066Sahrens } 314fa9e4066Sahrens 315fa9e4066Sahrens static void 316fa9e4066Sahrens exec_changed_cb(void *arg, uint64_t newval) 317fa9e4066Sahrens { 318fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 319fa9e4066Sahrens 320fa9e4066Sahrens if (newval == FALSE) { 321fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 322fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 323fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 324fa9e4066Sahrens } else { 325fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 326fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 327fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 328fa9e4066Sahrens } 329fa9e4066Sahrens } 330fa9e4066Sahrens 331fa9e4066Sahrens static void 332fa9e4066Sahrens snapdir_changed_cb(void *arg, uint64_t newval) 333fa9e4066Sahrens { 334fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 335fa9e4066Sahrens 336fa9e4066Sahrens zfsvfs->z_show_ctldir = newval; 337fa9e4066Sahrens } 338fa9e4066Sahrens 339fa9e4066Sahrens static void 340fa9e4066Sahrens acl_mode_changed_cb(void *arg, uint64_t newval) 341fa9e4066Sahrens { 342fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 343fa9e4066Sahrens 344fa9e4066Sahrens zfsvfs->z_acl_mode = newval; 345fa9e4066Sahrens } 346fa9e4066Sahrens 347fa9e4066Sahrens static void 348fa9e4066Sahrens acl_inherit_changed_cb(void *arg, uint64_t newval) 349fa9e4066Sahrens { 350fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 351fa9e4066Sahrens 352fa9e4066Sahrens zfsvfs->z_acl_inherit = newval; 353fa9e4066Sahrens } 354fa9e4066Sahrens 355fa9e4066Sahrens static int 356ea8dc4b6Seschrock zfs_refresh_properties(vfs_t *vfsp) 357fa9e4066Sahrens { 358ea8dc4b6Seschrock zfsvfs_t *zfsvfs = vfsp->vfs_data; 359fa9e4066Sahrens 3604981797aStabriz /* 3614981797aStabriz * Remount operations default to "rw" unless "ro" is explicitly 3624981797aStabriz * specified. 3634981797aStabriz */ 364ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 365ea8dc4b6Seschrock readonly_changed_cb(zfsvfs, B_TRUE); 3664981797aStabriz } else { 3674981797aStabriz if (!dmu_objset_is_snapshot(zfsvfs->z_os)) 3684981797aStabriz readonly_changed_cb(zfsvfs, B_FALSE); 3694981797aStabriz else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 370*b1b8ab34Slling return (EROFS); 371ea8dc4b6Seschrock } 372fa9e4066Sahrens 373ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 374ea8dc4b6Seschrock devices_changed_cb(zfsvfs, B_FALSE); 375ea8dc4b6Seschrock setuid_changed_cb(zfsvfs, B_FALSE); 376ea8dc4b6Seschrock } else { 377ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 378ea8dc4b6Seschrock devices_changed_cb(zfsvfs, B_FALSE); 379ea8dc4b6Seschrock else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) 380ea8dc4b6Seschrock devices_changed_cb(zfsvfs, B_TRUE); 381ea8dc4b6Seschrock 382ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 383ea8dc4b6Seschrock setuid_changed_cb(zfsvfs, B_FALSE); 384ea8dc4b6Seschrock else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 385ea8dc4b6Seschrock setuid_changed_cb(zfsvfs, B_TRUE); 386fa9e4066Sahrens } 387fa9e4066Sahrens 388ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 389ea8dc4b6Seschrock exec_changed_cb(zfsvfs, B_FALSE); 390ea8dc4b6Seschrock else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 391ea8dc4b6Seschrock exec_changed_cb(zfsvfs, B_TRUE); 392ea8dc4b6Seschrock 3933bb79becSeschrock if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 3943bb79becSeschrock atime_changed_cb(zfsvfs, B_TRUE); 3953bb79becSeschrock else if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 3963bb79becSeschrock atime_changed_cb(zfsvfs, B_FALSE); 3973bb79becSeschrock 3987b55fa8eSck if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 3997b55fa8eSck xattr_changed_cb(zfsvfs, B_TRUE); 4007b55fa8eSck else if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) 4017b55fa8eSck xattr_changed_cb(zfsvfs, B_FALSE); 4027b55fa8eSck 403ea8dc4b6Seschrock return (0); 404ea8dc4b6Seschrock } 405ea8dc4b6Seschrock 406ea8dc4b6Seschrock static int 407ea8dc4b6Seschrock zfs_register_callbacks(vfs_t *vfsp) 408ea8dc4b6Seschrock { 409ea8dc4b6Seschrock struct dsl_dataset *ds = NULL; 410ea8dc4b6Seschrock objset_t *os = NULL; 411ea8dc4b6Seschrock zfsvfs_t *zfsvfs = NULL; 4123ccfa83cSahrens int readonly, do_readonly = FALSE; 4133ccfa83cSahrens int setuid, do_setuid = FALSE; 4143ccfa83cSahrens int exec, do_exec = FALSE; 4153ccfa83cSahrens int devices, do_devices = FALSE; 4163ccfa83cSahrens int xattr, do_xattr = FALSE; 417ea8dc4b6Seschrock int error = 0; 418ea8dc4b6Seschrock 419ea8dc4b6Seschrock ASSERT(vfsp); 420ea8dc4b6Seschrock zfsvfs = vfsp->vfs_data; 421ea8dc4b6Seschrock ASSERT(zfsvfs); 422ea8dc4b6Seschrock os = zfsvfs->z_os; 423fa9e4066Sahrens 424fa9e4066Sahrens /* 425ea8dc4b6Seschrock * The act of registering our callbacks will destroy any mount 426ea8dc4b6Seschrock * options we may have. In order to enable temporary overrides 4277b55fa8eSck * of mount options, we stash away the current values and 428ea8dc4b6Seschrock * restore them after we register the callbacks. 429fa9e4066Sahrens */ 430ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 431ea8dc4b6Seschrock readonly = B_TRUE; 432ea8dc4b6Seschrock do_readonly = B_TRUE; 433ea8dc4b6Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 434ea8dc4b6Seschrock readonly = B_FALSE; 435ea8dc4b6Seschrock do_readonly = B_TRUE; 436ea8dc4b6Seschrock } 437ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 438ea8dc4b6Seschrock devices = B_FALSE; 439ea8dc4b6Seschrock setuid = B_FALSE; 440ea8dc4b6Seschrock do_devices = B_TRUE; 441ea8dc4b6Seschrock do_setuid = B_TRUE; 442ea8dc4b6Seschrock } else { 443ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 444ea8dc4b6Seschrock devices = B_FALSE; 445ea8dc4b6Seschrock do_devices = B_TRUE; 446*b1b8ab34Slling } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) { 447ea8dc4b6Seschrock devices = B_TRUE; 448ea8dc4b6Seschrock do_devices = B_TRUE; 449fa9e4066Sahrens } 450fa9e4066Sahrens 451ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 452ea8dc4b6Seschrock setuid = B_FALSE; 453ea8dc4b6Seschrock do_setuid = B_TRUE; 454ea8dc4b6Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 455ea8dc4b6Seschrock setuid = B_TRUE; 456ea8dc4b6Seschrock do_setuid = B_TRUE; 457fa9e4066Sahrens } 458ea8dc4b6Seschrock } 459ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 460ea8dc4b6Seschrock exec = B_FALSE; 461ea8dc4b6Seschrock do_exec = B_TRUE; 462ea8dc4b6Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 463ea8dc4b6Seschrock exec = B_TRUE; 464ea8dc4b6Seschrock do_exec = B_TRUE; 465fa9e4066Sahrens } 4667b55fa8eSck if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 4677b55fa8eSck xattr = B_FALSE; 4687b55fa8eSck do_xattr = B_TRUE; 4697b55fa8eSck } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 4707b55fa8eSck xattr = B_TRUE; 4717b55fa8eSck do_xattr = B_TRUE; 4727b55fa8eSck } 473fa9e4066Sahrens 474fa9e4066Sahrens /* 475ea8dc4b6Seschrock * Register property callbacks. 476ea8dc4b6Seschrock * 477ea8dc4b6Seschrock * It would probably be fine to just check for i/o error from 478ea8dc4b6Seschrock * the first prop_register(), but I guess I like to go 479ea8dc4b6Seschrock * overboard... 480fa9e4066Sahrens */ 481ea8dc4b6Seschrock ds = dmu_objset_ds(os); 482ea8dc4b6Seschrock error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 4837b55fa8eSck error = error ? error : dsl_prop_register(ds, 4847b55fa8eSck "xattr", xattr_changed_cb, zfsvfs); 485ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 486ea8dc4b6Seschrock "recordsize", blksz_changed_cb, zfsvfs); 487ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 488ea8dc4b6Seschrock "readonly", readonly_changed_cb, zfsvfs); 489ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 490ea8dc4b6Seschrock "devices", devices_changed_cb, zfsvfs); 491ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 492ea8dc4b6Seschrock "setuid", setuid_changed_cb, zfsvfs); 493ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 494ea8dc4b6Seschrock "exec", exec_changed_cb, zfsvfs); 495ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 496ea8dc4b6Seschrock "snapdir", snapdir_changed_cb, zfsvfs); 497ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 498ea8dc4b6Seschrock "aclmode", acl_mode_changed_cb, zfsvfs); 499ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 500ea8dc4b6Seschrock "aclinherit", acl_inherit_changed_cb, zfsvfs); 501ea8dc4b6Seschrock if (error) 502ea8dc4b6Seschrock goto unregister; 503fa9e4066Sahrens 504ea8dc4b6Seschrock /* 505ea8dc4b6Seschrock * Invoke our callbacks to restore temporary mount options. 506ea8dc4b6Seschrock */ 507ea8dc4b6Seschrock if (do_readonly) 508ea8dc4b6Seschrock readonly_changed_cb(zfsvfs, readonly); 509ea8dc4b6Seschrock if (do_setuid) 510ea8dc4b6Seschrock setuid_changed_cb(zfsvfs, setuid); 511ea8dc4b6Seschrock if (do_exec) 512ea8dc4b6Seschrock exec_changed_cb(zfsvfs, exec); 513ea8dc4b6Seschrock if (do_devices) 514ea8dc4b6Seschrock devices_changed_cb(zfsvfs, devices); 5157b55fa8eSck if (do_xattr) 5167b55fa8eSck xattr_changed_cb(zfsvfs, xattr); 517fa9e4066Sahrens 518ea8dc4b6Seschrock return (0); 519fa9e4066Sahrens 520ea8dc4b6Seschrock unregister: 521fa9e4066Sahrens /* 522ea8dc4b6Seschrock * We may attempt to unregister some callbacks that are not 523ea8dc4b6Seschrock * registered, but this is OK; it will simply return ENOMSG, 524ea8dc4b6Seschrock * which we will ignore. 525fa9e4066Sahrens */ 526ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 5277b55fa8eSck (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 528ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 529ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 530ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs); 531ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 532ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 533ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 534ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 535ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 536ea8dc4b6Seschrock zfsvfs); 537ea8dc4b6Seschrock return (error); 538ea8dc4b6Seschrock 539ea8dc4b6Seschrock } 540ea8dc4b6Seschrock 541ea8dc4b6Seschrock static int 542ea8dc4b6Seschrock zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr) 543ea8dc4b6Seschrock { 544ea8dc4b6Seschrock dev_t mount_dev; 545ea8dc4b6Seschrock uint64_t recordsize, readonly; 546ea8dc4b6Seschrock int error = 0; 547ea8dc4b6Seschrock int mode; 548ea8dc4b6Seschrock zfsvfs_t *zfsvfs; 549ea8dc4b6Seschrock znode_t *zp = NULL; 550ea8dc4b6Seschrock 551ea8dc4b6Seschrock ASSERT(vfsp); 552ea8dc4b6Seschrock ASSERT(osname); 553fa9e4066Sahrens 554fa9e4066Sahrens /* 555fa9e4066Sahrens * Initialize the zfs-specific filesystem structure. 556fa9e4066Sahrens * Should probably make this a kmem cache, shuffle fields, 557ea8dc4b6Seschrock * and just bzero up to z_hold_mtx[]. 558fa9e4066Sahrens */ 559fa9e4066Sahrens zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 560fa9e4066Sahrens zfsvfs->z_vfs = vfsp; 561fa9e4066Sahrens zfsvfs->z_parent = zfsvfs; 562fa9e4066Sahrens zfsvfs->z_assign = TXG_NOWAIT; 563fa9e4066Sahrens zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 564a0965f35Sbonwick zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 565fa9e4066Sahrens 566fa9e4066Sahrens mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 567fa9e4066Sahrens list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 568fa9e4066Sahrens offsetof(znode_t, z_link_node)); 569fa9e4066Sahrens rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); 570fa9e4066Sahrens 571ea8dc4b6Seschrock /* Initialize the generic filesystem structure. */ 572fa9e4066Sahrens vfsp->vfs_bcount = 0; 573fa9e4066Sahrens vfsp->vfs_data = NULL; 574fa9e4066Sahrens 575ea8dc4b6Seschrock if (zfs_create_unique_device(&mount_dev) == -1) { 576ea8dc4b6Seschrock error = ENODEV; 577ea8dc4b6Seschrock goto out; 578ea8dc4b6Seschrock } 579fa9e4066Sahrens ASSERT(vfs_devismounted(mount_dev) == 0); 580fa9e4066Sahrens 581ea8dc4b6Seschrock if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 582ea8dc4b6Seschrock NULL)) 583ea8dc4b6Seschrock goto out; 584fa9e4066Sahrens 585fa9e4066Sahrens vfsp->vfs_dev = mount_dev; 586fa9e4066Sahrens vfsp->vfs_fstype = zfsfstype; 587fa9e4066Sahrens vfsp->vfs_bsize = recordsize; 588fa9e4066Sahrens vfsp->vfs_flag |= VFS_NOTRUNC; 589fa9e4066Sahrens vfsp->vfs_data = zfsvfs; 590fa9e4066Sahrens 591ea8dc4b6Seschrock if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 592fa9e4066Sahrens goto out; 593fa9e4066Sahrens 594fa9e4066Sahrens if (readonly) 595fa9e4066Sahrens mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 596fa9e4066Sahrens else 597fa9e4066Sahrens mode = DS_MODE_PRIMARY; 598fa9e4066Sahrens 599fa9e4066Sahrens error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 600fa9e4066Sahrens if (error == EROFS) { 601fa9e4066Sahrens mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 602fa9e4066Sahrens error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 603fa9e4066Sahrens &zfsvfs->z_os); 604fa9e4066Sahrens } 605fa9e4066Sahrens 606fa9e4066Sahrens if (error) 607fa9e4066Sahrens goto out; 608fa9e4066Sahrens 609fa9e4066Sahrens if (error = zfs_init_fs(zfsvfs, &zp, cr)) 610fa9e4066Sahrens goto out; 611fa9e4066Sahrens 612ea8dc4b6Seschrock /* The call to zfs_init_fs leaves the vnode held, release it here. */ 613ea8dc4b6Seschrock VN_RELE(ZTOV(zp)); 614ea8dc4b6Seschrock 615ea8dc4b6Seschrock if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 6167b55fa8eSck uint64_t xattr; 6177b55fa8eSck 618fa9e4066Sahrens ASSERT(mode & DS_MODE_READONLY); 619fa9e4066Sahrens atime_changed_cb(zfsvfs, B_FALSE); 620fa9e4066Sahrens readonly_changed_cb(zfsvfs, B_TRUE); 6217b55fa8eSck if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) 6227b55fa8eSck goto out; 6237b55fa8eSck xattr_changed_cb(zfsvfs, xattr); 624fa9e4066Sahrens zfsvfs->z_issnap = B_TRUE; 625fa9e4066Sahrens } else { 626ea8dc4b6Seschrock error = zfs_register_callbacks(vfsp); 627ea8dc4b6Seschrock if (error) 628ea8dc4b6Seschrock goto out; 629fa9e4066Sahrens 630893a6d32Sahrens zfs_unlinked_drain(zfsvfs); 631fa9e4066Sahrens 632fa9e4066Sahrens /* 633fa9e4066Sahrens * Parse and replay the intent log. 634fa9e4066Sahrens */ 635ea8dc4b6Seschrock zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 636893a6d32Sahrens zfs_replay_vector); 637fa9e4066Sahrens 638fa9e4066Sahrens if (!zil_disable) 639ea8dc4b6Seschrock zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 640ea8dc4b6Seschrock } 641fa9e4066Sahrens 642ea8dc4b6Seschrock if (!zfsvfs->z_issnap) 643ea8dc4b6Seschrock zfsctl_create(zfsvfs); 644ea8dc4b6Seschrock out: 645ea8dc4b6Seschrock if (error) { 646ea8dc4b6Seschrock if (zfsvfs->z_os) 647ea8dc4b6Seschrock dmu_objset_close(zfsvfs->z_os); 648ea8dc4b6Seschrock kmem_free(zfsvfs, sizeof (zfsvfs_t)); 649ea8dc4b6Seschrock } else { 650ea8dc4b6Seschrock atomic_add_32(&zfs_active_fs_count, 1); 651ea8dc4b6Seschrock } 652fa9e4066Sahrens 653ea8dc4b6Seschrock return (error); 654fa9e4066Sahrens 655ea8dc4b6Seschrock } 656ea8dc4b6Seschrock 657ea8dc4b6Seschrock void 658ea8dc4b6Seschrock zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 659ea8dc4b6Seschrock { 660ea8dc4b6Seschrock objset_t *os = zfsvfs->z_os; 661ea8dc4b6Seschrock struct dsl_dataset *ds; 662ea8dc4b6Seschrock 663ea8dc4b6Seschrock /* 664ea8dc4b6Seschrock * Unregister properties. 665ea8dc4b6Seschrock */ 666ea8dc4b6Seschrock if (!dmu_objset_is_snapshot(os)) { 667fa9e4066Sahrens ds = dmu_objset_ds(os); 668ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 669fa9e4066Sahrens zfsvfs) == 0); 670fa9e4066Sahrens 6717b55fa8eSck VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 6727b55fa8eSck zfsvfs) == 0); 6737b55fa8eSck 674ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 675fa9e4066Sahrens zfsvfs) == 0); 676fa9e4066Sahrens 677ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 678fa9e4066Sahrens zfsvfs) == 0); 679fa9e4066Sahrens 680ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, 681fa9e4066Sahrens zfsvfs) == 0); 682fa9e4066Sahrens 683ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 684fa9e4066Sahrens zfsvfs) == 0); 685fa9e4066Sahrens 686ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 687fa9e4066Sahrens zfsvfs) == 0); 688fa9e4066Sahrens 689ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 690fa9e4066Sahrens zfsvfs) == 0); 691fa9e4066Sahrens 692ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 693fa9e4066Sahrens zfsvfs) == 0); 694fa9e4066Sahrens 695ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "aclinherit", 696fa9e4066Sahrens acl_inherit_changed_cb, zfsvfs) == 0); 697ea8dc4b6Seschrock } 698ea8dc4b6Seschrock } 699fa9e4066Sahrens 700*b1b8ab34Slling /* 701*b1b8ab34Slling * Convert a decimal digit string to a uint64_t integer. 702*b1b8ab34Slling */ 703*b1b8ab34Slling static int 704*b1b8ab34Slling str_to_uint64(char *str, uint64_t *objnum) 705*b1b8ab34Slling { 706*b1b8ab34Slling uint64_t num = 0; 707*b1b8ab34Slling 708*b1b8ab34Slling while (*str) { 709*b1b8ab34Slling if (*str < '0' || *str > '9') 710*b1b8ab34Slling return (EINVAL); 711*b1b8ab34Slling 712*b1b8ab34Slling num = num*10 + *str++ - '0'; 713*b1b8ab34Slling } 714*b1b8ab34Slling 715*b1b8ab34Slling *objnum = num; 716*b1b8ab34Slling return (0); 717*b1b8ab34Slling } 718*b1b8ab34Slling 719*b1b8ab34Slling 720*b1b8ab34Slling /* 721*b1b8ab34Slling * The boot path passed from the boot loader is in the form of 722*b1b8ab34Slling * "rootpool-name/root-filesystem-object-number'. Convert this 723*b1b8ab34Slling * string to a dataset name: "rootpool-name/root-filesystem-name". 724*b1b8ab34Slling */ 725*b1b8ab34Slling static int 726*b1b8ab34Slling parse_bootpath(char *bpath, char *outpath) 727*b1b8ab34Slling { 728*b1b8ab34Slling char *slashp; 729*b1b8ab34Slling uint64_t objnum; 730*b1b8ab34Slling int error; 731*b1b8ab34Slling 732*b1b8ab34Slling if (*bpath == 0 || *bpath == '/') 733*b1b8ab34Slling return (EINVAL); 734*b1b8ab34Slling 735*b1b8ab34Slling slashp = strchr(bpath, '/'); 736*b1b8ab34Slling 737*b1b8ab34Slling /* if no '/', just return the pool name */ 738*b1b8ab34Slling if (slashp == NULL) { 739*b1b8ab34Slling (void) strcpy(outpath, bpath); 740*b1b8ab34Slling return (0); 741*b1b8ab34Slling } 742*b1b8ab34Slling 743*b1b8ab34Slling if (error = str_to_uint64(slashp+1, &objnum)) 744*b1b8ab34Slling return (error); 745*b1b8ab34Slling 746*b1b8ab34Slling *slashp = '\0'; 747*b1b8ab34Slling error = dsl_dsobj_to_dsname(bpath, objnum, outpath); 748*b1b8ab34Slling *slashp = '/'; 749*b1b8ab34Slling 750*b1b8ab34Slling return (error); 751*b1b8ab34Slling } 752*b1b8ab34Slling 753ea8dc4b6Seschrock static int 754ea8dc4b6Seschrock zfs_mountroot(vfs_t *vfsp, enum whymountroot why) 755ea8dc4b6Seschrock { 756ea8dc4b6Seschrock int error = 0; 757ea8dc4b6Seschrock int ret = 0; 758ea8dc4b6Seschrock static int zfsrootdone = 0; 759ea8dc4b6Seschrock zfsvfs_t *zfsvfs = NULL; 760ea8dc4b6Seschrock znode_t *zp = NULL; 761ea8dc4b6Seschrock vnode_t *vp = NULL; 762*b1b8ab34Slling char *zfs_bootpath; 763ea8dc4b6Seschrock 764ea8dc4b6Seschrock ASSERT(vfsp); 765ea8dc4b6Seschrock 766ea8dc4b6Seschrock /* 767*b1b8ab34Slling * The filesystem that we mount as root is defined in the 768*b1b8ab34Slling * "zfs-bootfs" property. 769ea8dc4b6Seschrock */ 770ea8dc4b6Seschrock if (why == ROOT_INIT) { 771ea8dc4b6Seschrock if (zfsrootdone++) 772ea8dc4b6Seschrock return (EBUSY); 773fa9e4066Sahrens 774*b1b8ab34Slling if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), 775*b1b8ab34Slling DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bootpath) != 776*b1b8ab34Slling DDI_SUCCESS) 777*b1b8ab34Slling return (EIO); 778*b1b8ab34Slling 779*b1b8ab34Slling error = parse_bootpath(zfs_bootpath, rootfs.bo_name); 780*b1b8ab34Slling ddi_prop_free(zfs_bootpath); 781*b1b8ab34Slling 782*b1b8ab34Slling if (error) 783*b1b8ab34Slling return (error); 784fa9e4066Sahrens 785ea8dc4b6Seschrock if (error = vfs_lock(vfsp)) 786ea8dc4b6Seschrock return (error); 787fa9e4066Sahrens 788*b1b8ab34Slling if (error = zfs_domount(vfsp, rootfs.bo_name, CRED())) 789ea8dc4b6Seschrock goto out; 790ea8dc4b6Seschrock 791ea8dc4b6Seschrock zfsvfs = (zfsvfs_t *)vfsp->vfs_data; 792ea8dc4b6Seschrock ASSERT(zfsvfs); 793ea8dc4b6Seschrock if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) 794ea8dc4b6Seschrock goto out; 795ea8dc4b6Seschrock 796ea8dc4b6Seschrock vp = ZTOV(zp); 797ea8dc4b6Seschrock mutex_enter(&vp->v_lock); 798ea8dc4b6Seschrock vp->v_flag |= VROOT; 799ea8dc4b6Seschrock mutex_exit(&vp->v_lock); 800ea8dc4b6Seschrock rootvp = vp; 801ea8dc4b6Seschrock 802ea8dc4b6Seschrock /* 803ea8dc4b6Seschrock * The zfs_zget call above returns with a hold on vp, we release 804ea8dc4b6Seschrock * it here. 805ea8dc4b6Seschrock */ 806fa9e4066Sahrens VN_RELE(vp); 807ea8dc4b6Seschrock 808ea8dc4b6Seschrock /* 809ea8dc4b6Seschrock * Mount root as readonly initially, it will be remouted 810ea8dc4b6Seschrock * read/write by /lib/svc/method/fs-usr. 811ea8dc4b6Seschrock */ 812ea8dc4b6Seschrock readonly_changed_cb(vfsp->vfs_data, B_TRUE); 813ea8dc4b6Seschrock vfs_add((struct vnode *)0, vfsp, 814ea8dc4b6Seschrock (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); 815ea8dc4b6Seschrock out: 816ea8dc4b6Seschrock vfs_unlock(vfsp); 817ea8dc4b6Seschrock ret = (error) ? error : 0; 818ea8dc4b6Seschrock return (ret); 819ea8dc4b6Seschrock 820ea8dc4b6Seschrock } else if (why == ROOT_REMOUNT) { 821ea8dc4b6Seschrock 822ea8dc4b6Seschrock readonly_changed_cb(vfsp->vfs_data, B_FALSE); 823ea8dc4b6Seschrock vfsp->vfs_flag |= VFS_REMOUNT; 824ea8dc4b6Seschrock return (zfs_refresh_properties(vfsp)); 825ea8dc4b6Seschrock 826ea8dc4b6Seschrock } else if (why == ROOT_UNMOUNT) { 827ea8dc4b6Seschrock zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); 828ea8dc4b6Seschrock (void) zfs_sync(vfsp, 0, 0); 829ea8dc4b6Seschrock return (0); 830ea8dc4b6Seschrock } 831ea8dc4b6Seschrock 832ea8dc4b6Seschrock /* 833ea8dc4b6Seschrock * if "why" is equal to anything else other than ROOT_INIT, 834ea8dc4b6Seschrock * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. 835ea8dc4b6Seschrock */ 836ea8dc4b6Seschrock return (ENOTSUP); 837ea8dc4b6Seschrock } 838ea8dc4b6Seschrock 839ea8dc4b6Seschrock /*ARGSUSED*/ 840ea8dc4b6Seschrock static int 841ea8dc4b6Seschrock zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 842ea8dc4b6Seschrock { 843ea8dc4b6Seschrock char *osname; 844ea8dc4b6Seschrock pathname_t spn; 845ea8dc4b6Seschrock int error = 0; 846ea8dc4b6Seschrock uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? 847*b1b8ab34Slling UIO_SYSSPACE : UIO_USERSPACE; 848ea8dc4b6Seschrock int canwrite; 849ea8dc4b6Seschrock 850ea8dc4b6Seschrock if (mvp->v_type != VDIR) 851ea8dc4b6Seschrock return (ENOTDIR); 852ea8dc4b6Seschrock 853ea8dc4b6Seschrock mutex_enter(&mvp->v_lock); 854ea8dc4b6Seschrock if ((uap->flags & MS_REMOUNT) == 0 && 855ea8dc4b6Seschrock (uap->flags & MS_OVERLAY) == 0 && 856ea8dc4b6Seschrock (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 857ea8dc4b6Seschrock mutex_exit(&mvp->v_lock); 858ea8dc4b6Seschrock return (EBUSY); 859ea8dc4b6Seschrock } 860ea8dc4b6Seschrock mutex_exit(&mvp->v_lock); 861ea8dc4b6Seschrock 862ea8dc4b6Seschrock /* 863ea8dc4b6Seschrock * ZFS does not support passing unparsed data in via MS_DATA. 864ea8dc4b6Seschrock * Users should use the MS_OPTIONSTR interface; this means 865ea8dc4b6Seschrock * that all option parsing is already done and the options struct 866ea8dc4b6Seschrock * can be interrogated. 867ea8dc4b6Seschrock */ 868ea8dc4b6Seschrock if ((uap->flags & MS_DATA) && uap->datalen > 0) 869ea8dc4b6Seschrock return (EINVAL); 870ea8dc4b6Seschrock 871ea8dc4b6Seschrock /* 872ea8dc4b6Seschrock * When doing a remount, we simply refresh our temporary properties 873ea8dc4b6Seschrock * according to those options set in the current VFS options. 874ea8dc4b6Seschrock */ 875ea8dc4b6Seschrock if (uap->flags & MS_REMOUNT) { 876ea8dc4b6Seschrock return (zfs_refresh_properties(vfsp)); 877fa9e4066Sahrens } 878fa9e4066Sahrens 879ea8dc4b6Seschrock /* 880ea8dc4b6Seschrock * Get the objset name (the "special" mount argument). 881ea8dc4b6Seschrock */ 882ea8dc4b6Seschrock if (error = pn_get(uap->spec, fromspace, &spn)) 883ea8dc4b6Seschrock return (error); 884ea8dc4b6Seschrock 885ea8dc4b6Seschrock osname = spn.pn_path; 886ea8dc4b6Seschrock 887ea8dc4b6Seschrock if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) 888ea8dc4b6Seschrock goto out; 889ea8dc4b6Seschrock 890ea8dc4b6Seschrock /* 891ea8dc4b6Seschrock * Refuse to mount a filesystem if we are in a local zone and the 892ea8dc4b6Seschrock * dataset is not visible. 893ea8dc4b6Seschrock */ 894ea8dc4b6Seschrock if (!INGLOBALZONE(curproc) && 895ea8dc4b6Seschrock (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 896ea8dc4b6Seschrock error = EPERM; 897ea8dc4b6Seschrock goto out; 898ea8dc4b6Seschrock } 899ea8dc4b6Seschrock 900ea8dc4b6Seschrock error = zfs_domount(vfsp, osname, cr); 901ea8dc4b6Seschrock 902ea8dc4b6Seschrock out: 903fa9e4066Sahrens pn_free(&spn); 904fa9e4066Sahrens return (error); 905fa9e4066Sahrens } 906fa9e4066Sahrens 907fa9e4066Sahrens static int 908fa9e4066Sahrens zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) 909fa9e4066Sahrens { 910fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 911fa9e4066Sahrens dev32_t d32; 912a2eea2e1Sahrens uint64_t refdbytes, availbytes, usedobjs, availobjs; 913fa9e4066Sahrens 914fa9e4066Sahrens ZFS_ENTER(zfsvfs); 915fa9e4066Sahrens 916a2eea2e1Sahrens dmu_objset_space(zfsvfs->z_os, 917a2eea2e1Sahrens &refdbytes, &availbytes, &usedobjs, &availobjs); 918fa9e4066Sahrens 919fa9e4066Sahrens /* 920fa9e4066Sahrens * The underlying storage pool actually uses multiple block sizes. 921fa9e4066Sahrens * We report the fragsize as the smallest block size we support, 922fa9e4066Sahrens * and we report our blocksize as the filesystem's maximum blocksize. 923fa9e4066Sahrens */ 924fa9e4066Sahrens statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; 925fa9e4066Sahrens statp->f_bsize = zfsvfs->z_max_blksz; 926fa9e4066Sahrens 927fa9e4066Sahrens /* 928fa9e4066Sahrens * The following report "total" blocks of various kinds in the 929fa9e4066Sahrens * file system, but reported in terms of f_frsize - the 930fa9e4066Sahrens * "fragment" size. 931fa9e4066Sahrens */ 932fa9e4066Sahrens 933a2eea2e1Sahrens statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 934a2eea2e1Sahrens statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT; 935fa9e4066Sahrens statp->f_bavail = statp->f_bfree; /* no root reservation */ 936fa9e4066Sahrens 937fa9e4066Sahrens /* 938fa9e4066Sahrens * statvfs() should really be called statufs(), because it assumes 939fa9e4066Sahrens * static metadata. ZFS doesn't preallocate files, so the best 940fa9e4066Sahrens * we can do is report the max that could possibly fit in f_files, 941fa9e4066Sahrens * and that minus the number actually used in f_ffree. 942fa9e4066Sahrens * For f_ffree, report the smaller of the number of object available 943fa9e4066Sahrens * and the number of blocks (each object will take at least a block). 944fa9e4066Sahrens */ 945a2eea2e1Sahrens statp->f_ffree = MIN(availobjs, statp->f_bfree); 946fa9e4066Sahrens statp->f_favail = statp->f_ffree; /* no "root reservation" */ 947a2eea2e1Sahrens statp->f_files = statp->f_ffree + usedobjs; 948fa9e4066Sahrens 949fa9e4066Sahrens (void) cmpldev(&d32, vfsp->vfs_dev); 950fa9e4066Sahrens statp->f_fsid = d32; 951fa9e4066Sahrens 952fa9e4066Sahrens /* 953fa9e4066Sahrens * We're a zfs filesystem. 954fa9e4066Sahrens */ 955fa9e4066Sahrens (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); 956fa9e4066Sahrens 957a5be7ebbSmarks statp->f_flag = vf_to_stf(vfsp->vfs_flag); 958fa9e4066Sahrens 959fa9e4066Sahrens statp->f_namemax = ZFS_MAXNAMELEN; 960fa9e4066Sahrens 961fa9e4066Sahrens /* 962fa9e4066Sahrens * We have all of 32 characters to stuff a string here. 963fa9e4066Sahrens * Is there anything useful we could/should provide? 964fa9e4066Sahrens */ 965fa9e4066Sahrens bzero(statp->f_fstr, sizeof (statp->f_fstr)); 966fa9e4066Sahrens 967fa9e4066Sahrens ZFS_EXIT(zfsvfs); 968fa9e4066Sahrens return (0); 969fa9e4066Sahrens } 970fa9e4066Sahrens 971fa9e4066Sahrens static int 972fa9e4066Sahrens zfs_root(vfs_t *vfsp, vnode_t **vpp) 973fa9e4066Sahrens { 974fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 975fa9e4066Sahrens znode_t *rootzp; 976fa9e4066Sahrens int error; 977fa9e4066Sahrens 978fa9e4066Sahrens ZFS_ENTER(zfsvfs); 979fa9e4066Sahrens 980fa9e4066Sahrens error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 981fa9e4066Sahrens if (error == 0) 982fa9e4066Sahrens *vpp = ZTOV(rootzp); 983fa9e4066Sahrens 984fa9e4066Sahrens ZFS_EXIT(zfsvfs); 985fa9e4066Sahrens return (error); 986fa9e4066Sahrens } 987fa9e4066Sahrens 988fa9e4066Sahrens /*ARGSUSED*/ 989fa9e4066Sahrens static int 990fa9e4066Sahrens zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) 991fa9e4066Sahrens { 992fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 993fa9e4066Sahrens int ret; 994fa9e4066Sahrens 995fa9e4066Sahrens if ((ret = secpolicy_fs_unmount(cr, vfsp)) != 0) 996fa9e4066Sahrens return (ret); 997fa9e4066Sahrens 998033f9833Sek 999033f9833Sek (void) dnlc_purge_vfsp(vfsp, 0); 1000033f9833Sek 1001fa9e4066Sahrens /* 1002fa9e4066Sahrens * Unmount any snapshots mounted under .zfs before unmounting the 1003fa9e4066Sahrens * dataset itself. 1004fa9e4066Sahrens */ 1005fa9e4066Sahrens if (zfsvfs->z_ctldir != NULL && 1006fa9e4066Sahrens (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1007fa9e4066Sahrens return (ret); 1008fa9e4066Sahrens 1009fa9e4066Sahrens if (fflag & MS_FORCE) { 1010fa9e4066Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 1011fa9e4066Sahrens zfsvfs->z_unmounted1 = B_TRUE; 1012fa9e4066Sahrens 1013fa9e4066Sahrens /* 1014fa9e4066Sahrens * Wait for all zfs threads to leave zfs. 1015fa9e4066Sahrens * Grabbing a rwlock as reader in all vops and 1016fa9e4066Sahrens * as writer here doesn't work because it too easy to get 1017fa9e4066Sahrens * multiple reader enters as zfs can re-enter itself. 1018fa9e4066Sahrens * This can lead to deadlock if there is an intervening 1019fa9e4066Sahrens * rw_enter as writer. 1020fa9e4066Sahrens * So a file system threads ref count (z_op_cnt) is used. 1021fa9e4066Sahrens * A polling loop on z_op_cnt may seem inefficient, but 1022fa9e4066Sahrens * - this saves all threads on exit from having to grab a 1023fa9e4066Sahrens * mutex in order to cv_signal 1024fa9e4066Sahrens * - only occurs on forced unmount in the rare case when 1025fa9e4066Sahrens * there are outstanding threads within the file system. 1026fa9e4066Sahrens */ 1027fa9e4066Sahrens while (zfsvfs->z_op_cnt) { 1028fa9e4066Sahrens delay(1); 1029fa9e4066Sahrens } 1030fa9e4066Sahrens 1031fa9e4066Sahrens zfs_objset_close(zfsvfs); 1032fa9e4066Sahrens 1033fa9e4066Sahrens return (0); 1034fa9e4066Sahrens } 1035fa9e4066Sahrens /* 1036fa9e4066Sahrens * Check the number of active vnodes in the file system. 1037fa9e4066Sahrens * Our count is maintained in the vfs structure, but the number 1038fa9e4066Sahrens * is off by 1 to indicate a hold on the vfs structure itself. 1039fa9e4066Sahrens * 1040fa9e4066Sahrens * The '.zfs' directory maintains a reference of its own, and any active 1041fa9e4066Sahrens * references underneath are reflected in the vnode count. 1042fa9e4066Sahrens */ 1043fa9e4066Sahrens if (zfsvfs->z_ctldir == NULL) { 1044893a6d32Sahrens if (vfsp->vfs_count > 1) 1045fa9e4066Sahrens return (EBUSY); 1046fa9e4066Sahrens } else { 1047fa9e4066Sahrens if (vfsp->vfs_count > 2 || 1048fa9e4066Sahrens (zfsvfs->z_ctldir->v_count > 1 && !(fflag & MS_FORCE))) { 1049fa9e4066Sahrens return (EBUSY); 1050fa9e4066Sahrens } 1051fa9e4066Sahrens } 1052fa9e4066Sahrens 1053fa9e4066Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 1054fa9e4066Sahrens zfs_objset_close(zfsvfs); 1055fa9e4066Sahrens 1056fa9e4066Sahrens return (0); 1057fa9e4066Sahrens } 1058fa9e4066Sahrens 1059fa9e4066Sahrens static int 1060fa9e4066Sahrens zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1061fa9e4066Sahrens { 1062fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1063fa9e4066Sahrens znode_t *zp; 1064fa9e4066Sahrens uint64_t object = 0; 1065fa9e4066Sahrens uint64_t fid_gen = 0; 1066fa9e4066Sahrens uint64_t gen_mask; 1067fa9e4066Sahrens uint64_t zp_gen; 1068fa9e4066Sahrens int i, err; 1069fa9e4066Sahrens 1070fa9e4066Sahrens *vpp = NULL; 1071fa9e4066Sahrens 1072fa9e4066Sahrens ZFS_ENTER(zfsvfs); 1073fa9e4066Sahrens 1074fa9e4066Sahrens if (fidp->fid_len == LONG_FID_LEN) { 1075fa9e4066Sahrens zfid_long_t *zlfid = (zfid_long_t *)fidp; 1076fa9e4066Sahrens uint64_t objsetid = 0; 1077fa9e4066Sahrens uint64_t setgen = 0; 1078fa9e4066Sahrens 1079fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1080fa9e4066Sahrens objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1081fa9e4066Sahrens 1082fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1083fa9e4066Sahrens setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1084fa9e4066Sahrens 1085fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1086fa9e4066Sahrens 1087fa9e4066Sahrens err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1088fa9e4066Sahrens if (err) 1089fa9e4066Sahrens return (EINVAL); 1090fa9e4066Sahrens ZFS_ENTER(zfsvfs); 1091fa9e4066Sahrens } 1092fa9e4066Sahrens 1093fa9e4066Sahrens if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1094fa9e4066Sahrens zfid_short_t *zfid = (zfid_short_t *)fidp; 1095fa9e4066Sahrens 1096fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 1097fa9e4066Sahrens object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1098fa9e4066Sahrens 1099fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 1100fa9e4066Sahrens fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1101fa9e4066Sahrens } else { 1102fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1103fa9e4066Sahrens return (EINVAL); 1104fa9e4066Sahrens } 1105fa9e4066Sahrens 1106fa9e4066Sahrens /* A zero fid_gen means we are in the .zfs control directories */ 1107fa9e4066Sahrens if (fid_gen == 0 && 1108fa9e4066Sahrens (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1109fa9e4066Sahrens *vpp = zfsvfs->z_ctldir; 1110fa9e4066Sahrens ASSERT(*vpp != NULL); 1111fa9e4066Sahrens if (object == ZFSCTL_INO_SNAPDIR) { 1112fa9e4066Sahrens VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1113fa9e4066Sahrens 0, NULL, NULL) == 0); 1114fa9e4066Sahrens } else { 1115fa9e4066Sahrens VN_HOLD(*vpp); 1116fa9e4066Sahrens } 1117fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1118fa9e4066Sahrens return (0); 1119fa9e4066Sahrens } 1120fa9e4066Sahrens 1121fa9e4066Sahrens gen_mask = -1ULL >> (64 - 8 * i); 1122fa9e4066Sahrens 1123fa9e4066Sahrens dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1124fa9e4066Sahrens if (err = zfs_zget(zfsvfs, object, &zp)) { 1125fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1126fa9e4066Sahrens return (err); 1127fa9e4066Sahrens } 1128fa9e4066Sahrens zp_gen = zp->z_phys->zp_gen & gen_mask; 1129fa9e4066Sahrens if (zp_gen == 0) 1130fa9e4066Sahrens zp_gen = 1; 1131893a6d32Sahrens if (zp->z_unlinked || zp_gen != fid_gen) { 1132fa9e4066Sahrens dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1133fa9e4066Sahrens VN_RELE(ZTOV(zp)); 1134fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1135fa9e4066Sahrens return (EINVAL); 1136fa9e4066Sahrens } 1137fa9e4066Sahrens 1138fa9e4066Sahrens *vpp = ZTOV(zp); 1139fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1140fa9e4066Sahrens return (0); 1141fa9e4066Sahrens } 1142fa9e4066Sahrens 1143fa9e4066Sahrens static void 1144fa9e4066Sahrens zfs_objset_close(zfsvfs_t *zfsvfs) 1145fa9e4066Sahrens { 1146fa9e4066Sahrens znode_t *zp, *nextzp; 1147fa9e4066Sahrens objset_t *os = zfsvfs->z_os; 1148fa9e4066Sahrens 1149fa9e4066Sahrens /* 1150fa9e4066Sahrens * For forced unmount, at this point all vops except zfs_inactive 1151fa9e4066Sahrens * are erroring EIO. We need to now suspend zfs_inactive threads 1152fa9e4066Sahrens * while we are freeing dbufs before switching zfs_inactive 1153fa9e4066Sahrens * to use behaviour without a objset. 1154fa9e4066Sahrens */ 1155fa9e4066Sahrens rw_enter(&zfsvfs->z_um_lock, RW_WRITER); 1156fa9e4066Sahrens 1157fa9e4066Sahrens /* 1158fa9e4066Sahrens * Release all holds on dbufs 1159fa9e4066Sahrens * Note, although we have stopped all other vop threads and 1160fa9e4066Sahrens * zfs_inactive(), the dmu can callback via znode_pageout_func() 1161fa9e4066Sahrens * which can zfs_znode_free() the znode. 1162fa9e4066Sahrens * So we lock z_all_znodes; search the list for a held 1163fa9e4066Sahrens * dbuf; drop the lock (we know zp can't disappear if we hold 1164fa9e4066Sahrens * a dbuf lock; then regrab the lock and restart. 1165fa9e4066Sahrens */ 1166fa9e4066Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 1167fa9e4066Sahrens for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { 1168fa9e4066Sahrens nextzp = list_next(&zfsvfs->z_all_znodes, zp); 1169fa9e4066Sahrens if (zp->z_dbuf_held) { 1170fa9e4066Sahrens /* dbufs should only be held when force unmounting */ 1171fa9e4066Sahrens zp->z_dbuf_held = 0; 1172fa9e4066Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 1173ea8dc4b6Seschrock dmu_buf_rele(zp->z_dbuf, NULL); 1174fa9e4066Sahrens /* Start again */ 1175fa9e4066Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 1176fa9e4066Sahrens nextzp = list_head(&zfsvfs->z_all_znodes); 1177fa9e4066Sahrens } 1178fa9e4066Sahrens } 1179fa9e4066Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 1180fa9e4066Sahrens 1181fa9e4066Sahrens /* 1182fa9e4066Sahrens * Unregister properties. 1183fa9e4066Sahrens */ 1184ea8dc4b6Seschrock if (!dmu_objset_is_snapshot(os)) 1185ea8dc4b6Seschrock zfs_unregister_callbacks(zfsvfs); 1186fa9e4066Sahrens 1187fa9e4066Sahrens /* 1188fa9e4066Sahrens * Switch zfs_inactive to behaviour without an objset. 1189fa9e4066Sahrens * It just tosses cached pages and frees the znode & vnode. 1190fa9e4066Sahrens * Then re-enable zfs_inactive threads in that new behaviour. 1191fa9e4066Sahrens */ 1192fa9e4066Sahrens zfsvfs->z_unmounted2 = B_TRUE; 1193fa9e4066Sahrens rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */ 1194fa9e4066Sahrens 1195fa9e4066Sahrens /* 1196fa9e4066Sahrens * Close the zil. Can't close the zil while zfs_inactive 1197fa9e4066Sahrens * threads are blocked as zil_close can call zfs_inactive. 1198fa9e4066Sahrens */ 1199fa9e4066Sahrens if (zfsvfs->z_log) { 1200fa9e4066Sahrens zil_close(zfsvfs->z_log); 1201fa9e4066Sahrens zfsvfs->z_log = NULL; 1202fa9e4066Sahrens } 1203fa9e4066Sahrens 1204ea8dc4b6Seschrock /* 1205ea8dc4b6Seschrock * Evict all dbufs so that cached znodes will be freed 1206ea8dc4b6Seschrock */ 1207436b2950Sperrin if (dmu_objset_evict_dbufs(os, 1)) { 1208436b2950Sperrin txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1209436b2950Sperrin (void) dmu_objset_evict_dbufs(os, 0); 1210436b2950Sperrin } 1211ea8dc4b6Seschrock 1212fa9e4066Sahrens /* 1213fa9e4066Sahrens * Finally close the objset 1214fa9e4066Sahrens */ 1215fa9e4066Sahrens dmu_objset_close(os); 1216fa9e4066Sahrens 12178afd4dd6Sperrin /* 12188afd4dd6Sperrin * We can now safely destroy the '.zfs' directory node. 12198afd4dd6Sperrin */ 12208afd4dd6Sperrin if (zfsvfs->z_ctldir != NULL) 12218afd4dd6Sperrin zfsctl_destroy(zfsvfs); 12228afd4dd6Sperrin 1223fa9e4066Sahrens } 1224fa9e4066Sahrens 1225fa9e4066Sahrens static void 1226fa9e4066Sahrens zfs_freevfs(vfs_t *vfsp) 1227fa9e4066Sahrens { 1228fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1229fa9e4066Sahrens 1230fa9e4066Sahrens kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1231fa9e4066Sahrens 1232fa9e4066Sahrens atomic_add_32(&zfs_active_fs_count, -1); 1233fa9e4066Sahrens } 1234fa9e4066Sahrens 1235fa9e4066Sahrens /* 1236fa9e4066Sahrens * VFS_INIT() initialization. Note that there is no VFS_FINI(), 1237fa9e4066Sahrens * so we can't safely do any non-idempotent initialization here. 1238fa9e4066Sahrens * Leave that to zfs_init() and zfs_fini(), which are called 1239fa9e4066Sahrens * from the module's _init() and _fini() entry points. 1240fa9e4066Sahrens */ 1241fa9e4066Sahrens /*ARGSUSED*/ 1242fa9e4066Sahrens static int 1243fa9e4066Sahrens zfs_vfsinit(int fstype, char *name) 1244fa9e4066Sahrens { 1245fa9e4066Sahrens int error; 1246fa9e4066Sahrens 1247fa9e4066Sahrens zfsfstype = fstype; 1248fa9e4066Sahrens 1249fa9e4066Sahrens /* 1250fa9e4066Sahrens * Setup vfsops and vnodeops tables. 1251fa9e4066Sahrens */ 1252fa9e4066Sahrens error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); 1253fa9e4066Sahrens if (error != 0) { 1254fa9e4066Sahrens cmn_err(CE_WARN, "zfs: bad vfs ops template"); 1255fa9e4066Sahrens } 1256fa9e4066Sahrens 1257fa9e4066Sahrens error = zfs_create_op_tables(); 1258fa9e4066Sahrens if (error) { 1259fa9e4066Sahrens zfs_remove_op_tables(); 1260fa9e4066Sahrens cmn_err(CE_WARN, "zfs: bad vnode ops template"); 1261fa9e4066Sahrens (void) vfs_freevfsops_by_type(zfsfstype); 1262fa9e4066Sahrens return (error); 1263fa9e4066Sahrens } 1264fa9e4066Sahrens 1265fa9e4066Sahrens mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); 1266fa9e4066Sahrens 1267fa9e4066Sahrens /* 1268a0965f35Sbonwick * Unique major number for all zfs mounts. 1269a0965f35Sbonwick * If we run out of 32-bit minors, we'll getudev() another major. 1270fa9e4066Sahrens */ 1271a0965f35Sbonwick zfs_major = ddi_name_to_major(ZFS_DRIVER); 1272a0965f35Sbonwick zfs_minor = ZFS_MIN_MINOR; 1273fa9e4066Sahrens 1274fa9e4066Sahrens return (0); 1275fa9e4066Sahrens } 1276fa9e4066Sahrens 1277fa9e4066Sahrens void 1278fa9e4066Sahrens zfs_init(void) 1279fa9e4066Sahrens { 1280fa9e4066Sahrens /* 1281fa9e4066Sahrens * Initialize .zfs directory structures 1282fa9e4066Sahrens */ 1283fa9e4066Sahrens zfsctl_init(); 1284fa9e4066Sahrens 1285fa9e4066Sahrens /* 1286fa9e4066Sahrens * Initialize znode cache, vnode ops, etc... 1287fa9e4066Sahrens */ 1288fa9e4066Sahrens zfs_znode_init(); 1289fa9e4066Sahrens } 1290fa9e4066Sahrens 1291fa9e4066Sahrens void 1292fa9e4066Sahrens zfs_fini(void) 1293fa9e4066Sahrens { 1294fa9e4066Sahrens zfsctl_fini(); 1295fa9e4066Sahrens zfs_znode_fini(); 1296fa9e4066Sahrens } 1297fa9e4066Sahrens 1298fa9e4066Sahrens int 1299fa9e4066Sahrens zfs_busy(void) 1300fa9e4066Sahrens { 1301fa9e4066Sahrens return (zfs_active_fs_count != 0); 1302fa9e4066Sahrens } 1303fa9e4066Sahrens 1304fa9e4066Sahrens static vfsdef_t vfw = { 1305fa9e4066Sahrens VFSDEF_VERSION, 1306fa9e4066Sahrens MNTTYPE_ZFS, 1307fa9e4066Sahrens zfs_vfsinit, 13085a59a8b3Srsb VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS, 1309fa9e4066Sahrens &zfs_mntopts 1310fa9e4066Sahrens }; 1311fa9e4066Sahrens 1312fa9e4066Sahrens struct modlfs zfs_modlfs = { 1313e9dbad6fSeschrock &mod_fsops, "ZFS filesystem version " ZFS_VERSION_STRING, &vfw 1314fa9e4066Sahrens }; 1315