1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5033f9833Sek * Common Development and Distribution License (the "License"). 6033f9833Sek * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22893a6d32Sahrens * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 2678077464Sck #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens #include <sys/types.h> 29fa9e4066Sahrens #include <sys/param.h> 30fa9e4066Sahrens #include <sys/systm.h> 31fa9e4066Sahrens #include <sys/sysmacros.h> 32fa9e4066Sahrens #include <sys/kmem.h> 33fa9e4066Sahrens #include <sys/pathname.h> 34fa9e4066Sahrens #include <sys/vnode.h> 35fa9e4066Sahrens #include <sys/vfs.h> 36aa59c4cbSrsb #include <sys/vfs_opreg.h> 37fa9e4066Sahrens #include <sys/mntent.h> 38fa9e4066Sahrens #include <sys/mount.h> 39fa9e4066Sahrens #include <sys/cmn_err.h> 40fa9e4066Sahrens #include "fs/fs_subr.h" 41fa9e4066Sahrens #include <sys/zfs_znode.h> 42893a6d32Sahrens #include <sys/zfs_dir.h> 43fa9e4066Sahrens #include <sys/zil.h> 44fa9e4066Sahrens #include <sys/fs/zfs.h> 45fa9e4066Sahrens #include <sys/dmu.h> 46fa9e4066Sahrens #include <sys/dsl_prop.h> 47b1b8ab34Slling #include <sys/dsl_dataset.h> 48ecd6cf80Smarks #include <sys/dsl_deleg.h> 49fa9e4066Sahrens #include <sys/spa.h> 50fa9e4066Sahrens #include <sys/zap.h> 51fa9e4066Sahrens #include <sys/varargs.h> 52fa9e4066Sahrens #include <sys/policy.h> 53fa9e4066Sahrens #include <sys/atomic.h> 54fa9e4066Sahrens #include <sys/mkdev.h> 55fa9e4066Sahrens #include <sys/modctl.h> 56ecd6cf80Smarks #include <sys/refstr.h> 57fa9e4066Sahrens #include <sys/zfs_ioctl.h> 58fa9e4066Sahrens #include <sys/zfs_ctldir.h> 59ea8dc4b6Seschrock #include <sys/bootconf.h> 60a0965f35Sbonwick #include <sys/sunddi.h> 61033f9833Sek #include <sys/dnlc.h> 62*f18faf3fSek #include <sys/dmu_objset.h> 63fa9e4066Sahrens 64fa9e4066Sahrens int zfsfstype; 65fa9e4066Sahrens vfsops_t *zfs_vfsops = NULL; 66a0965f35Sbonwick static major_t zfs_major; 67fa9e4066Sahrens static minor_t zfs_minor; 68fa9e4066Sahrens static kmutex_t zfs_dev_mtx; 69fa9e4066Sahrens 70fa9e4066Sahrens static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); 71fa9e4066Sahrens static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); 72ea8dc4b6Seschrock static int zfs_mountroot(vfs_t *vfsp, enum whymountroot); 73fa9e4066Sahrens static int zfs_root(vfs_t *vfsp, vnode_t **vpp); 74fa9e4066Sahrens static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); 75fa9e4066Sahrens static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); 76fa9e4066Sahrens static void zfs_freevfs(vfs_t *vfsp); 77fa9e4066Sahrens 78fa9e4066Sahrens static const fs_operation_def_t zfs_vfsops_template[] = { 79aa59c4cbSrsb VFSNAME_MOUNT, { .vfs_mount = zfs_mount }, 80aa59c4cbSrsb VFSNAME_MOUNTROOT, { .vfs_mountroot = zfs_mountroot }, 81aa59c4cbSrsb VFSNAME_UNMOUNT, { .vfs_unmount = zfs_umount }, 82aa59c4cbSrsb VFSNAME_ROOT, { .vfs_root = zfs_root }, 83aa59c4cbSrsb VFSNAME_STATVFS, { .vfs_statvfs = zfs_statvfs }, 84aa59c4cbSrsb VFSNAME_SYNC, { .vfs_sync = zfs_sync }, 85aa59c4cbSrsb VFSNAME_VGET, { .vfs_vget = zfs_vget }, 86aa59c4cbSrsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 87aa59c4cbSrsb NULL, NULL 88fa9e4066Sahrens }; 89fa9e4066Sahrens 90fa9e4066Sahrens static const fs_operation_def_t zfs_vfsops_eio_template[] = { 91aa59c4cbSrsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 92aa59c4cbSrsb NULL, NULL 93fa9e4066Sahrens }; 94fa9e4066Sahrens 95fa9e4066Sahrens /* 96fa9e4066Sahrens * We need to keep a count of active fs's. 97fa9e4066Sahrens * This is necessary to prevent our module 98fa9e4066Sahrens * from being unloaded after a umount -f 99fa9e4066Sahrens */ 100fa9e4066Sahrens static uint32_t zfs_active_fs_count = 0; 101fa9e4066Sahrens 102fa9e4066Sahrens static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; 103fa9e4066Sahrens static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; 1047b55fa8eSck static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 1057b55fa8eSck static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 106fa9e4066Sahrens 1077b55fa8eSck /* 108b510d378Slling * MO_DEFAULT is not used since the default value is determined 109b510d378Slling * by the equivalent property. 1107b55fa8eSck */ 111fa9e4066Sahrens static mntopt_t mntopts[] = { 1127b55fa8eSck { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL }, 1137b55fa8eSck { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL }, 114b510d378Slling { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL }, 115fa9e4066Sahrens { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } 116fa9e4066Sahrens }; 117fa9e4066Sahrens 118fa9e4066Sahrens static mntopts_t zfs_mntopts = { 119fa9e4066Sahrens sizeof (mntopts) / sizeof (mntopt_t), 120fa9e4066Sahrens mntopts 121fa9e4066Sahrens }; 122fa9e4066Sahrens 123fa9e4066Sahrens /*ARGSUSED*/ 124fa9e4066Sahrens int 125fa9e4066Sahrens zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 126fa9e4066Sahrens { 127fa9e4066Sahrens /* 128fa9e4066Sahrens * Data integrity is job one. We don't want a compromised kernel 129fa9e4066Sahrens * writing to the storage pool, so we never sync during panic. 130fa9e4066Sahrens */ 131fa9e4066Sahrens if (panicstr) 132fa9e4066Sahrens return (0); 133fa9e4066Sahrens 134fa9e4066Sahrens /* 135fa9e4066Sahrens * SYNC_ATTR is used by fsflush() to force old filesystems like UFS 136fa9e4066Sahrens * to sync metadata, which they would otherwise cache indefinitely. 137fa9e4066Sahrens * Semantically, the only requirement is that the sync be initiated. 138fa9e4066Sahrens * The DMU syncs out txgs frequently, so there's nothing to do. 139fa9e4066Sahrens */ 140fa9e4066Sahrens if (flag & SYNC_ATTR) 141fa9e4066Sahrens return (0); 142fa9e4066Sahrens 143fa9e4066Sahrens if (vfsp != NULL) { 144fa9e4066Sahrens /* 145fa9e4066Sahrens * Sync a specific filesystem. 146fa9e4066Sahrens */ 147fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 148fa9e4066Sahrens 149fa9e4066Sahrens ZFS_ENTER(zfsvfs); 150fa9e4066Sahrens if (zfsvfs->z_log != NULL) 151b19a79ecSperrin zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 152fa9e4066Sahrens else 153fa9e4066Sahrens txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 154fa9e4066Sahrens ZFS_EXIT(zfsvfs); 155fa9e4066Sahrens } else { 156fa9e4066Sahrens /* 157fa9e4066Sahrens * Sync all ZFS filesystems. This is what happens when you 158fa9e4066Sahrens * run sync(1M). Unlike other filesystems, ZFS honors the 159fa9e4066Sahrens * request by waiting for all pools to commit all dirty data. 160fa9e4066Sahrens */ 161fa9e4066Sahrens spa_sync_allpools(); 162fa9e4066Sahrens } 163fa9e4066Sahrens 164fa9e4066Sahrens return (0); 165fa9e4066Sahrens } 166fa9e4066Sahrens 167ea8dc4b6Seschrock static int 168ea8dc4b6Seschrock zfs_create_unique_device(dev_t *dev) 169ea8dc4b6Seschrock { 170ea8dc4b6Seschrock major_t new_major; 171ea8dc4b6Seschrock 172ea8dc4b6Seschrock do { 173ea8dc4b6Seschrock ASSERT3U(zfs_minor, <=, MAXMIN32); 174ea8dc4b6Seschrock minor_t start = zfs_minor; 175ea8dc4b6Seschrock do { 176ea8dc4b6Seschrock mutex_enter(&zfs_dev_mtx); 177ea8dc4b6Seschrock if (zfs_minor >= MAXMIN32) { 178ea8dc4b6Seschrock /* 179ea8dc4b6Seschrock * If we're still using the real major 180ea8dc4b6Seschrock * keep out of /dev/zfs and /dev/zvol minor 181ea8dc4b6Seschrock * number space. If we're using a getudev()'ed 182ea8dc4b6Seschrock * major number, we can use all of its minors. 183ea8dc4b6Seschrock */ 184ea8dc4b6Seschrock if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) 185ea8dc4b6Seschrock zfs_minor = ZFS_MIN_MINOR; 186ea8dc4b6Seschrock else 187ea8dc4b6Seschrock zfs_minor = 0; 188ea8dc4b6Seschrock } else { 189ea8dc4b6Seschrock zfs_minor++; 190ea8dc4b6Seschrock } 191ea8dc4b6Seschrock *dev = makedevice(zfs_major, zfs_minor); 192ea8dc4b6Seschrock mutex_exit(&zfs_dev_mtx); 193ea8dc4b6Seschrock } while (vfs_devismounted(*dev) && zfs_minor != start); 194ea8dc4b6Seschrock if (zfs_minor == start) { 195ea8dc4b6Seschrock /* 196ea8dc4b6Seschrock * We are using all ~262,000 minor numbers for the 197ea8dc4b6Seschrock * current major number. Create a new major number. 198ea8dc4b6Seschrock */ 199ea8dc4b6Seschrock if ((new_major = getudev()) == (major_t)-1) { 200ea8dc4b6Seschrock cmn_err(CE_WARN, 201ea8dc4b6Seschrock "zfs_mount: Can't get unique major " 202ea8dc4b6Seschrock "device number."); 203ea8dc4b6Seschrock return (-1); 204ea8dc4b6Seschrock } 205ea8dc4b6Seschrock mutex_enter(&zfs_dev_mtx); 206ea8dc4b6Seschrock zfs_major = new_major; 207ea8dc4b6Seschrock zfs_minor = 0; 208ea8dc4b6Seschrock 209ea8dc4b6Seschrock mutex_exit(&zfs_dev_mtx); 210ea8dc4b6Seschrock } else { 211ea8dc4b6Seschrock break; 212ea8dc4b6Seschrock } 213ea8dc4b6Seschrock /* CONSTANTCONDITION */ 214ea8dc4b6Seschrock } while (1); 215ea8dc4b6Seschrock 216ea8dc4b6Seschrock return (0); 217ea8dc4b6Seschrock } 218ea8dc4b6Seschrock 219fa9e4066Sahrens static void 220fa9e4066Sahrens atime_changed_cb(void *arg, uint64_t newval) 221fa9e4066Sahrens { 222fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 223fa9e4066Sahrens 224fa9e4066Sahrens if (newval == TRUE) { 225fa9e4066Sahrens zfsvfs->z_atime = TRUE; 226fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 227fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 228fa9e4066Sahrens } else { 229fa9e4066Sahrens zfsvfs->z_atime = FALSE; 230fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 231fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 232fa9e4066Sahrens } 233fa9e4066Sahrens } 234fa9e4066Sahrens 2357b55fa8eSck static void 2367b55fa8eSck xattr_changed_cb(void *arg, uint64_t newval) 2377b55fa8eSck { 2387b55fa8eSck zfsvfs_t *zfsvfs = arg; 2397b55fa8eSck 2407b55fa8eSck if (newval == TRUE) { 2417b55fa8eSck /* XXX locking on vfs_flag? */ 2427b55fa8eSck zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 2437b55fa8eSck vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 2447b55fa8eSck vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 2457b55fa8eSck } else { 2467b55fa8eSck /* XXX locking on vfs_flag? */ 2477b55fa8eSck zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 2487b55fa8eSck vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 2497b55fa8eSck vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 2507b55fa8eSck } 2517b55fa8eSck } 2527b55fa8eSck 253fa9e4066Sahrens static void 254fa9e4066Sahrens blksz_changed_cb(void *arg, uint64_t newval) 255fa9e4066Sahrens { 256fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 257fa9e4066Sahrens 258fa9e4066Sahrens if (newval < SPA_MINBLOCKSIZE || 259fa9e4066Sahrens newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 260fa9e4066Sahrens newval = SPA_MAXBLOCKSIZE; 261fa9e4066Sahrens 262fa9e4066Sahrens zfsvfs->z_max_blksz = newval; 263fa9e4066Sahrens zfsvfs->z_vfs->vfs_bsize = newval; 264fa9e4066Sahrens } 265fa9e4066Sahrens 266fa9e4066Sahrens static void 267fa9e4066Sahrens readonly_changed_cb(void *arg, uint64_t newval) 268fa9e4066Sahrens { 269fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 270fa9e4066Sahrens 271fa9e4066Sahrens if (newval) { 272fa9e4066Sahrens /* XXX locking on vfs_flag? */ 273fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 274fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 275fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 276fa9e4066Sahrens } else { 277fa9e4066Sahrens /* XXX locking on vfs_flag? */ 278fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 279fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 280fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 281fa9e4066Sahrens } 282fa9e4066Sahrens } 283fa9e4066Sahrens 284fa9e4066Sahrens static void 285fa9e4066Sahrens devices_changed_cb(void *arg, uint64_t newval) 286fa9e4066Sahrens { 287fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 288fa9e4066Sahrens 289fa9e4066Sahrens if (newval == FALSE) { 290fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; 291fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); 292fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); 293fa9e4066Sahrens } else { 294fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; 295fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); 296fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); 297fa9e4066Sahrens } 298fa9e4066Sahrens } 299fa9e4066Sahrens 300fa9e4066Sahrens static void 301fa9e4066Sahrens setuid_changed_cb(void *arg, uint64_t newval) 302fa9e4066Sahrens { 303fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 304fa9e4066Sahrens 305fa9e4066Sahrens if (newval == FALSE) { 306fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 307fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 308fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 309fa9e4066Sahrens } else { 310fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 311fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 312fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 313fa9e4066Sahrens } 314fa9e4066Sahrens } 315fa9e4066Sahrens 316fa9e4066Sahrens static void 317fa9e4066Sahrens exec_changed_cb(void *arg, uint64_t newval) 318fa9e4066Sahrens { 319fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 320fa9e4066Sahrens 321fa9e4066Sahrens if (newval == FALSE) { 322fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 323fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 324fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 325fa9e4066Sahrens } else { 326fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 327fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 328fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 329fa9e4066Sahrens } 330fa9e4066Sahrens } 331fa9e4066Sahrens 332fa9e4066Sahrens static void 333fa9e4066Sahrens snapdir_changed_cb(void *arg, uint64_t newval) 334fa9e4066Sahrens { 335fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 336fa9e4066Sahrens 337fa9e4066Sahrens zfsvfs->z_show_ctldir = newval; 338fa9e4066Sahrens } 339fa9e4066Sahrens 340fa9e4066Sahrens static void 341fa9e4066Sahrens acl_mode_changed_cb(void *arg, uint64_t newval) 342fa9e4066Sahrens { 343fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 344fa9e4066Sahrens 345fa9e4066Sahrens zfsvfs->z_acl_mode = newval; 346fa9e4066Sahrens } 347fa9e4066Sahrens 348fa9e4066Sahrens static void 349fa9e4066Sahrens acl_inherit_changed_cb(void *arg, uint64_t newval) 350fa9e4066Sahrens { 351fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 352fa9e4066Sahrens 353fa9e4066Sahrens zfsvfs->z_acl_inherit = newval; 354fa9e4066Sahrens } 355fa9e4066Sahrens 356ea8dc4b6Seschrock static int 357ea8dc4b6Seschrock zfs_register_callbacks(vfs_t *vfsp) 358ea8dc4b6Seschrock { 359ea8dc4b6Seschrock struct dsl_dataset *ds = NULL; 360ea8dc4b6Seschrock objset_t *os = NULL; 361ea8dc4b6Seschrock zfsvfs_t *zfsvfs = NULL; 3623ccfa83cSahrens int readonly, do_readonly = FALSE; 3633ccfa83cSahrens int setuid, do_setuid = FALSE; 3643ccfa83cSahrens int exec, do_exec = FALSE; 3653ccfa83cSahrens int devices, do_devices = FALSE; 3663ccfa83cSahrens int xattr, do_xattr = FALSE; 367b510d378Slling int atime, do_atime = FALSE; 368ea8dc4b6Seschrock int error = 0; 369ea8dc4b6Seschrock 370ea8dc4b6Seschrock ASSERT(vfsp); 371ea8dc4b6Seschrock zfsvfs = vfsp->vfs_data; 372ea8dc4b6Seschrock ASSERT(zfsvfs); 373ea8dc4b6Seschrock os = zfsvfs->z_os; 374fa9e4066Sahrens 375fa9e4066Sahrens /* 376ea8dc4b6Seschrock * The act of registering our callbacks will destroy any mount 377ea8dc4b6Seschrock * options we may have. In order to enable temporary overrides 3787b55fa8eSck * of mount options, we stash away the current values and 379ea8dc4b6Seschrock * restore them after we register the callbacks. 380fa9e4066Sahrens */ 381ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 382ea8dc4b6Seschrock readonly = B_TRUE; 383ea8dc4b6Seschrock do_readonly = B_TRUE; 384ea8dc4b6Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 385ea8dc4b6Seschrock readonly = B_FALSE; 386ea8dc4b6Seschrock do_readonly = B_TRUE; 387ea8dc4b6Seschrock } 388ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 389ea8dc4b6Seschrock devices = B_FALSE; 390ea8dc4b6Seschrock setuid = B_FALSE; 391ea8dc4b6Seschrock do_devices = B_TRUE; 392ea8dc4b6Seschrock do_setuid = B_TRUE; 393ea8dc4b6Seschrock } else { 394ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 395ea8dc4b6Seschrock devices = B_FALSE; 396ea8dc4b6Seschrock do_devices = B_TRUE; 397b1b8ab34Slling } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) { 398ea8dc4b6Seschrock devices = B_TRUE; 399ea8dc4b6Seschrock do_devices = B_TRUE; 400fa9e4066Sahrens } 401fa9e4066Sahrens 402ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 403ea8dc4b6Seschrock setuid = B_FALSE; 404ea8dc4b6Seschrock do_setuid = B_TRUE; 405ea8dc4b6Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 406ea8dc4b6Seschrock setuid = B_TRUE; 407ea8dc4b6Seschrock do_setuid = B_TRUE; 408fa9e4066Sahrens } 409ea8dc4b6Seschrock } 410ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 411ea8dc4b6Seschrock exec = B_FALSE; 412ea8dc4b6Seschrock do_exec = B_TRUE; 413ea8dc4b6Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 414ea8dc4b6Seschrock exec = B_TRUE; 415ea8dc4b6Seschrock do_exec = B_TRUE; 416fa9e4066Sahrens } 4177b55fa8eSck if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 4187b55fa8eSck xattr = B_FALSE; 4197b55fa8eSck do_xattr = B_TRUE; 4207b55fa8eSck } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 4217b55fa8eSck xattr = B_TRUE; 4227b55fa8eSck do_xattr = B_TRUE; 4237b55fa8eSck } 424b510d378Slling if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 425b510d378Slling atime = B_FALSE; 426b510d378Slling do_atime = B_TRUE; 427b510d378Slling } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 428b510d378Slling atime = B_TRUE; 429b510d378Slling do_atime = B_TRUE; 430b510d378Slling } 431fa9e4066Sahrens 432fa9e4066Sahrens /* 433ea8dc4b6Seschrock * Register property callbacks. 434ea8dc4b6Seschrock * 435ea8dc4b6Seschrock * It would probably be fine to just check for i/o error from 436ea8dc4b6Seschrock * the first prop_register(), but I guess I like to go 437ea8dc4b6Seschrock * overboard... 438fa9e4066Sahrens */ 439ea8dc4b6Seschrock ds = dmu_objset_ds(os); 440ea8dc4b6Seschrock error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 4417b55fa8eSck error = error ? error : dsl_prop_register(ds, 4427b55fa8eSck "xattr", xattr_changed_cb, zfsvfs); 443ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 444ea8dc4b6Seschrock "recordsize", blksz_changed_cb, zfsvfs); 445ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 446ea8dc4b6Seschrock "readonly", readonly_changed_cb, zfsvfs); 447ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 448ea8dc4b6Seschrock "devices", devices_changed_cb, zfsvfs); 449ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 450ea8dc4b6Seschrock "setuid", setuid_changed_cb, zfsvfs); 451ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 452ea8dc4b6Seschrock "exec", exec_changed_cb, zfsvfs); 453ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 454ea8dc4b6Seschrock "snapdir", snapdir_changed_cb, zfsvfs); 455ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 456ea8dc4b6Seschrock "aclmode", acl_mode_changed_cb, zfsvfs); 457ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 458ea8dc4b6Seschrock "aclinherit", acl_inherit_changed_cb, zfsvfs); 459ea8dc4b6Seschrock if (error) 460ea8dc4b6Seschrock goto unregister; 461fa9e4066Sahrens 462ea8dc4b6Seschrock /* 463ea8dc4b6Seschrock * Invoke our callbacks to restore temporary mount options. 464ea8dc4b6Seschrock */ 465ea8dc4b6Seschrock if (do_readonly) 466ea8dc4b6Seschrock readonly_changed_cb(zfsvfs, readonly); 467ea8dc4b6Seschrock if (do_setuid) 468ea8dc4b6Seschrock setuid_changed_cb(zfsvfs, setuid); 469ea8dc4b6Seschrock if (do_exec) 470ea8dc4b6Seschrock exec_changed_cb(zfsvfs, exec); 471ea8dc4b6Seschrock if (do_devices) 472ea8dc4b6Seschrock devices_changed_cb(zfsvfs, devices); 4737b55fa8eSck if (do_xattr) 4747b55fa8eSck xattr_changed_cb(zfsvfs, xattr); 475b510d378Slling if (do_atime) 476b510d378Slling atime_changed_cb(zfsvfs, atime); 477fa9e4066Sahrens 478ea8dc4b6Seschrock return (0); 479fa9e4066Sahrens 480ea8dc4b6Seschrock unregister: 481fa9e4066Sahrens /* 482ea8dc4b6Seschrock * We may attempt to unregister some callbacks that are not 483ea8dc4b6Seschrock * registered, but this is OK; it will simply return ENOMSG, 484ea8dc4b6Seschrock * which we will ignore. 485fa9e4066Sahrens */ 486ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 4877b55fa8eSck (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 488ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 489ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 490ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs); 491ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 492ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 493ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 494ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 495ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 496ea8dc4b6Seschrock zfsvfs); 497ea8dc4b6Seschrock return (error); 498ea8dc4b6Seschrock 499ea8dc4b6Seschrock } 500ea8dc4b6Seschrock 501*f18faf3fSek static int 502*f18faf3fSek zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 503*f18faf3fSek { 504*f18faf3fSek uint_t readonly; 505*f18faf3fSek int error; 506*f18faf3fSek 507*f18faf3fSek error = zfs_register_callbacks(zfsvfs->z_vfs); 508*f18faf3fSek if (error) 509*f18faf3fSek return (error); 510*f18faf3fSek 511*f18faf3fSek /* 512*f18faf3fSek * Set the objset user_ptr to track its zfsvfs. 513*f18faf3fSek */ 514*f18faf3fSek mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); 515*f18faf3fSek dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 516*f18faf3fSek mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); 517*f18faf3fSek 518*f18faf3fSek /* 519*f18faf3fSek * If we are not mounting (ie: online recv), then we don't 520*f18faf3fSek * have to worry about replaying the log as we blocked all 521*f18faf3fSek * operations out since we closed the ZIL. 522*f18faf3fSek */ 523*f18faf3fSek if (mounting) { 524*f18faf3fSek /* 525*f18faf3fSek * During replay we remove the read only flag to 526*f18faf3fSek * allow replays to succeed. 527*f18faf3fSek */ 528*f18faf3fSek readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 529*f18faf3fSek if (readonly != 0) 530*f18faf3fSek zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 531*f18faf3fSek else 532*f18faf3fSek zfs_unlinked_drain(zfsvfs); 533*f18faf3fSek 534*f18faf3fSek /* 535*f18faf3fSek * Parse and replay the intent log. 536*f18faf3fSek * 537*f18faf3fSek * Because of ziltest, this must be done after 538*f18faf3fSek * zfs_unlinked_drain(). (Further note: ziltest doesn't 539*f18faf3fSek * use readonly mounts, where zfs_unlinked_drain() isn't 540*f18faf3fSek * called.) This is because ziltest causes spa_sync() 541*f18faf3fSek * to think it's committed, but actually it is not, so 542*f18faf3fSek * the intent log contains many txg's worth of changes. 543*f18faf3fSek * 544*f18faf3fSek * In particular, if object N is in the unlinked set in 545*f18faf3fSek * the last txg to actually sync, then it could be 546*f18faf3fSek * actually freed in a later txg and then reallocated in 547*f18faf3fSek * a yet later txg. This would write a "create object 548*f18faf3fSek * N" record to the intent log. Normally, this would be 549*f18faf3fSek * fine because the spa_sync() would have written out 550*f18faf3fSek * the fact that object N is free, before we could write 551*f18faf3fSek * the "create object N" intent log record. 552*f18faf3fSek * 553*f18faf3fSek * But when we are in ziltest mode, we advance the "open 554*f18faf3fSek * txg" without actually spa_sync()-ing the changes to 555*f18faf3fSek * disk. So we would see that object N is still 556*f18faf3fSek * allocated and in the unlinked set, and there is an 557*f18faf3fSek * intent log record saying to allocate it. 558*f18faf3fSek */ 559*f18faf3fSek zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 560*f18faf3fSek zfs_replay_vector); 561*f18faf3fSek 562*f18faf3fSek zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 563*f18faf3fSek } 564*f18faf3fSek 565*f18faf3fSek if (!zil_disable) 566*f18faf3fSek zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 567*f18faf3fSek 568*f18faf3fSek return (0); 569*f18faf3fSek } 570*f18faf3fSek 571ea8dc4b6Seschrock static int 572ea8dc4b6Seschrock zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr) 573ea8dc4b6Seschrock { 574ea8dc4b6Seschrock dev_t mount_dev; 575ea8dc4b6Seschrock uint64_t recordsize, readonly; 576ea8dc4b6Seschrock int error = 0; 577ea8dc4b6Seschrock int mode; 578ea8dc4b6Seschrock zfsvfs_t *zfsvfs; 579ea8dc4b6Seschrock znode_t *zp = NULL; 580ea8dc4b6Seschrock 581ea8dc4b6Seschrock ASSERT(vfsp); 582ea8dc4b6Seschrock ASSERT(osname); 583fa9e4066Sahrens 584fa9e4066Sahrens /* 585fa9e4066Sahrens * Initialize the zfs-specific filesystem structure. 586fa9e4066Sahrens * Should probably make this a kmem cache, shuffle fields, 587ea8dc4b6Seschrock * and just bzero up to z_hold_mtx[]. 588fa9e4066Sahrens */ 589fa9e4066Sahrens zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 590fa9e4066Sahrens zfsvfs->z_vfs = vfsp; 591fa9e4066Sahrens zfsvfs->z_parent = zfsvfs; 592fa9e4066Sahrens zfsvfs->z_assign = TXG_NOWAIT; 593fa9e4066Sahrens zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 594a0965f35Sbonwick zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 595fa9e4066Sahrens 596fa9e4066Sahrens mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 597fa9e4066Sahrens list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 598fa9e4066Sahrens offsetof(znode_t, z_link_node)); 599*f18faf3fSek rrw_init(&zfsvfs->z_teardown_lock); 600*f18faf3fSek rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 601fa9e4066Sahrens 602ea8dc4b6Seschrock /* Initialize the generic filesystem structure. */ 603fa9e4066Sahrens vfsp->vfs_bcount = 0; 604fa9e4066Sahrens vfsp->vfs_data = NULL; 605fa9e4066Sahrens 606ea8dc4b6Seschrock if (zfs_create_unique_device(&mount_dev) == -1) { 607ea8dc4b6Seschrock error = ENODEV; 608ea8dc4b6Seschrock goto out; 609ea8dc4b6Seschrock } 610fa9e4066Sahrens ASSERT(vfs_devismounted(mount_dev) == 0); 611fa9e4066Sahrens 612ea8dc4b6Seschrock if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 613ea8dc4b6Seschrock NULL)) 614ea8dc4b6Seschrock goto out; 615fa9e4066Sahrens 616fa9e4066Sahrens vfsp->vfs_dev = mount_dev; 617fa9e4066Sahrens vfsp->vfs_fstype = zfsfstype; 618fa9e4066Sahrens vfsp->vfs_bsize = recordsize; 619fa9e4066Sahrens vfsp->vfs_flag |= VFS_NOTRUNC; 620fa9e4066Sahrens vfsp->vfs_data = zfsvfs; 621fa9e4066Sahrens 622ea8dc4b6Seschrock if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 623fa9e4066Sahrens goto out; 624fa9e4066Sahrens 625fa9e4066Sahrens if (readonly) 626fa9e4066Sahrens mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 627fa9e4066Sahrens else 628fa9e4066Sahrens mode = DS_MODE_PRIMARY; 629fa9e4066Sahrens 630fa9e4066Sahrens error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 631fa9e4066Sahrens if (error == EROFS) { 632fa9e4066Sahrens mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 633fa9e4066Sahrens error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 634fa9e4066Sahrens &zfsvfs->z_os); 635fa9e4066Sahrens } 636fa9e4066Sahrens 637fa9e4066Sahrens if (error) 638fa9e4066Sahrens goto out; 639fa9e4066Sahrens 640fa9e4066Sahrens if (error = zfs_init_fs(zfsvfs, &zp, cr)) 641fa9e4066Sahrens goto out; 642fa9e4066Sahrens 643ea8dc4b6Seschrock /* The call to zfs_init_fs leaves the vnode held, release it here. */ 644ea8dc4b6Seschrock VN_RELE(ZTOV(zp)); 645ea8dc4b6Seschrock 646ea8dc4b6Seschrock if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 6477b55fa8eSck uint64_t xattr; 6487b55fa8eSck 649fa9e4066Sahrens ASSERT(mode & DS_MODE_READONLY); 650fa9e4066Sahrens atime_changed_cb(zfsvfs, B_FALSE); 651fa9e4066Sahrens readonly_changed_cb(zfsvfs, B_TRUE); 6527b55fa8eSck if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) 6537b55fa8eSck goto out; 6547b55fa8eSck xattr_changed_cb(zfsvfs, xattr); 655fa9e4066Sahrens zfsvfs->z_issnap = B_TRUE; 656fa9e4066Sahrens } else { 657*f18faf3fSek error = zfsvfs_setup(zfsvfs, B_TRUE); 658ea8dc4b6Seschrock } 659fa9e4066Sahrens 660ea8dc4b6Seschrock if (!zfsvfs->z_issnap) 661ea8dc4b6Seschrock zfsctl_create(zfsvfs); 662ea8dc4b6Seschrock out: 663ea8dc4b6Seschrock if (error) { 664ea8dc4b6Seschrock if (zfsvfs->z_os) 665ea8dc4b6Seschrock dmu_objset_close(zfsvfs->z_os); 666c25056deSgw mutex_destroy(&zfsvfs->z_znodes_lock); 667c25056deSgw list_destroy(&zfsvfs->z_all_znodes); 668*f18faf3fSek rrw_destroy(&zfsvfs->z_teardown_lock); 669*f18faf3fSek rw_destroy(&zfsvfs->z_teardown_inactive_lock); 670ea8dc4b6Seschrock kmem_free(zfsvfs, sizeof (zfsvfs_t)); 671ea8dc4b6Seschrock } else { 672ea8dc4b6Seschrock atomic_add_32(&zfs_active_fs_count, 1); 673ea8dc4b6Seschrock } 674fa9e4066Sahrens 675ea8dc4b6Seschrock return (error); 676ea8dc4b6Seschrock } 677ea8dc4b6Seschrock 678ea8dc4b6Seschrock void 679ea8dc4b6Seschrock zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 680ea8dc4b6Seschrock { 681ea8dc4b6Seschrock objset_t *os = zfsvfs->z_os; 682ea8dc4b6Seschrock struct dsl_dataset *ds; 683ea8dc4b6Seschrock 684ea8dc4b6Seschrock /* 685ea8dc4b6Seschrock * Unregister properties. 686ea8dc4b6Seschrock */ 687ea8dc4b6Seschrock if (!dmu_objset_is_snapshot(os)) { 688fa9e4066Sahrens ds = dmu_objset_ds(os); 689ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 690fa9e4066Sahrens zfsvfs) == 0); 691fa9e4066Sahrens 6927b55fa8eSck VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 6937b55fa8eSck zfsvfs) == 0); 6947b55fa8eSck 695ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 696fa9e4066Sahrens zfsvfs) == 0); 697fa9e4066Sahrens 698ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 699fa9e4066Sahrens zfsvfs) == 0); 700fa9e4066Sahrens 701ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, 702fa9e4066Sahrens zfsvfs) == 0); 703fa9e4066Sahrens 704ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 705fa9e4066Sahrens zfsvfs) == 0); 706fa9e4066Sahrens 707ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 708fa9e4066Sahrens zfsvfs) == 0); 709fa9e4066Sahrens 710ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 711fa9e4066Sahrens zfsvfs) == 0); 712fa9e4066Sahrens 713ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 714fa9e4066Sahrens zfsvfs) == 0); 715fa9e4066Sahrens 716ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "aclinherit", 717fa9e4066Sahrens acl_inherit_changed_cb, zfsvfs) == 0); 718ea8dc4b6Seschrock } 719ea8dc4b6Seschrock } 720fa9e4066Sahrens 721b1b8ab34Slling /* 722b1b8ab34Slling * Convert a decimal digit string to a uint64_t integer. 723b1b8ab34Slling */ 724b1b8ab34Slling static int 725b1b8ab34Slling str_to_uint64(char *str, uint64_t *objnum) 726b1b8ab34Slling { 727b1b8ab34Slling uint64_t num = 0; 728b1b8ab34Slling 729b1b8ab34Slling while (*str) { 730b1b8ab34Slling if (*str < '0' || *str > '9') 731b1b8ab34Slling return (EINVAL); 732b1b8ab34Slling 733b1b8ab34Slling num = num*10 + *str++ - '0'; 734b1b8ab34Slling } 735b1b8ab34Slling 736b1b8ab34Slling *objnum = num; 737b1b8ab34Slling return (0); 738b1b8ab34Slling } 739b1b8ab34Slling 740b1b8ab34Slling /* 741b1b8ab34Slling * The boot path passed from the boot loader is in the form of 742b1b8ab34Slling * "rootpool-name/root-filesystem-object-number'. Convert this 743b1b8ab34Slling * string to a dataset name: "rootpool-name/root-filesystem-name". 744b1b8ab34Slling */ 745b1b8ab34Slling static int 746b1b8ab34Slling parse_bootpath(char *bpath, char *outpath) 747b1b8ab34Slling { 748b1b8ab34Slling char *slashp; 749b1b8ab34Slling uint64_t objnum; 750b1b8ab34Slling int error; 751b1b8ab34Slling 752b1b8ab34Slling if (*bpath == 0 || *bpath == '/') 753b1b8ab34Slling return (EINVAL); 754b1b8ab34Slling 755b1b8ab34Slling slashp = strchr(bpath, '/'); 756b1b8ab34Slling 757b1b8ab34Slling /* if no '/', just return the pool name */ 758b1b8ab34Slling if (slashp == NULL) { 759b1b8ab34Slling (void) strcpy(outpath, bpath); 760b1b8ab34Slling return (0); 761b1b8ab34Slling } 762b1b8ab34Slling 763b1b8ab34Slling if (error = str_to_uint64(slashp+1, &objnum)) 764b1b8ab34Slling return (error); 765b1b8ab34Slling 766b1b8ab34Slling *slashp = '\0'; 767b1b8ab34Slling error = dsl_dsobj_to_dsname(bpath, objnum, outpath); 768b1b8ab34Slling *slashp = '/'; 769b1b8ab34Slling 770b1b8ab34Slling return (error); 771b1b8ab34Slling } 772b1b8ab34Slling 773ea8dc4b6Seschrock static int 774ea8dc4b6Seschrock zfs_mountroot(vfs_t *vfsp, enum whymountroot why) 775ea8dc4b6Seschrock { 776ea8dc4b6Seschrock int error = 0; 777ea8dc4b6Seschrock int ret = 0; 778ea8dc4b6Seschrock static int zfsrootdone = 0; 779ea8dc4b6Seschrock zfsvfs_t *zfsvfs = NULL; 780ea8dc4b6Seschrock znode_t *zp = NULL; 781ea8dc4b6Seschrock vnode_t *vp = NULL; 782b1b8ab34Slling char *zfs_bootpath; 783ea8dc4b6Seschrock 784ea8dc4b6Seschrock ASSERT(vfsp); 785ea8dc4b6Seschrock 786ea8dc4b6Seschrock /* 787b1b8ab34Slling * The filesystem that we mount as root is defined in the 788b1b8ab34Slling * "zfs-bootfs" property. 789ea8dc4b6Seschrock */ 790ea8dc4b6Seschrock if (why == ROOT_INIT) { 791ea8dc4b6Seschrock if (zfsrootdone++) 792ea8dc4b6Seschrock return (EBUSY); 793fa9e4066Sahrens 794b1b8ab34Slling if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), 795b1b8ab34Slling DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bootpath) != 796b1b8ab34Slling DDI_SUCCESS) 797b1b8ab34Slling return (EIO); 798b1b8ab34Slling 799b1b8ab34Slling error = parse_bootpath(zfs_bootpath, rootfs.bo_name); 800b1b8ab34Slling ddi_prop_free(zfs_bootpath); 801b1b8ab34Slling 802b1b8ab34Slling if (error) 803b1b8ab34Slling return (error); 804fa9e4066Sahrens 805ea8dc4b6Seschrock if (error = vfs_lock(vfsp)) 806ea8dc4b6Seschrock return (error); 807fa9e4066Sahrens 808b1b8ab34Slling if (error = zfs_domount(vfsp, rootfs.bo_name, CRED())) 809ea8dc4b6Seschrock goto out; 810ea8dc4b6Seschrock 811ea8dc4b6Seschrock zfsvfs = (zfsvfs_t *)vfsp->vfs_data; 812ea8dc4b6Seschrock ASSERT(zfsvfs); 813ea8dc4b6Seschrock if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) 814ea8dc4b6Seschrock goto out; 815ea8dc4b6Seschrock 816ea8dc4b6Seschrock vp = ZTOV(zp); 817ea8dc4b6Seschrock mutex_enter(&vp->v_lock); 818ea8dc4b6Seschrock vp->v_flag |= VROOT; 819ea8dc4b6Seschrock mutex_exit(&vp->v_lock); 820ea8dc4b6Seschrock rootvp = vp; 821ea8dc4b6Seschrock 822ea8dc4b6Seschrock /* 823ea8dc4b6Seschrock * The zfs_zget call above returns with a hold on vp, we release 824ea8dc4b6Seschrock * it here. 825ea8dc4b6Seschrock */ 826fa9e4066Sahrens VN_RELE(vp); 827ea8dc4b6Seschrock 828ea8dc4b6Seschrock /* 829ea8dc4b6Seschrock * Mount root as readonly initially, it will be remouted 830ea8dc4b6Seschrock * read/write by /lib/svc/method/fs-usr. 831ea8dc4b6Seschrock */ 832ea8dc4b6Seschrock readonly_changed_cb(vfsp->vfs_data, B_TRUE); 833ea8dc4b6Seschrock vfs_add((struct vnode *)0, vfsp, 834ea8dc4b6Seschrock (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); 835ea8dc4b6Seschrock out: 836ea8dc4b6Seschrock vfs_unlock(vfsp); 837ea8dc4b6Seschrock ret = (error) ? error : 0; 838ea8dc4b6Seschrock return (ret); 839ea8dc4b6Seschrock } else if (why == ROOT_REMOUNT) { 840ea8dc4b6Seschrock readonly_changed_cb(vfsp->vfs_data, B_FALSE); 841ea8dc4b6Seschrock vfsp->vfs_flag |= VFS_REMOUNT; 842b510d378Slling 843b510d378Slling /* refresh mount options */ 844b510d378Slling zfs_unregister_callbacks(vfsp->vfs_data); 845b510d378Slling return (zfs_register_callbacks(vfsp)); 846b510d378Slling 847ea8dc4b6Seschrock } else if (why == ROOT_UNMOUNT) { 848ea8dc4b6Seschrock zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); 849ea8dc4b6Seschrock (void) zfs_sync(vfsp, 0, 0); 850ea8dc4b6Seschrock return (0); 851ea8dc4b6Seschrock } 852ea8dc4b6Seschrock 853ea8dc4b6Seschrock /* 854ea8dc4b6Seschrock * if "why" is equal to anything else other than ROOT_INIT, 855ea8dc4b6Seschrock * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. 856ea8dc4b6Seschrock */ 857ea8dc4b6Seschrock return (ENOTSUP); 858ea8dc4b6Seschrock } 859ea8dc4b6Seschrock 860ea8dc4b6Seschrock /*ARGSUSED*/ 861ea8dc4b6Seschrock static int 862ea8dc4b6Seschrock zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 863ea8dc4b6Seschrock { 864ea8dc4b6Seschrock char *osname; 865ea8dc4b6Seschrock pathname_t spn; 866ea8dc4b6Seschrock int error = 0; 867ea8dc4b6Seschrock uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? 868b1b8ab34Slling UIO_SYSSPACE : UIO_USERSPACE; 869ea8dc4b6Seschrock int canwrite; 870ea8dc4b6Seschrock 871ea8dc4b6Seschrock if (mvp->v_type != VDIR) 872ea8dc4b6Seschrock return (ENOTDIR); 873ea8dc4b6Seschrock 874ea8dc4b6Seschrock mutex_enter(&mvp->v_lock); 875ea8dc4b6Seschrock if ((uap->flags & MS_REMOUNT) == 0 && 876ea8dc4b6Seschrock (uap->flags & MS_OVERLAY) == 0 && 877ea8dc4b6Seschrock (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 878ea8dc4b6Seschrock mutex_exit(&mvp->v_lock); 879ea8dc4b6Seschrock return (EBUSY); 880ea8dc4b6Seschrock } 881ea8dc4b6Seschrock mutex_exit(&mvp->v_lock); 882ea8dc4b6Seschrock 883ea8dc4b6Seschrock /* 884ea8dc4b6Seschrock * ZFS does not support passing unparsed data in via MS_DATA. 885ea8dc4b6Seschrock * Users should use the MS_OPTIONSTR interface; this means 886ea8dc4b6Seschrock * that all option parsing is already done and the options struct 887ea8dc4b6Seschrock * can be interrogated. 888ea8dc4b6Seschrock */ 889ea8dc4b6Seschrock if ((uap->flags & MS_DATA) && uap->datalen > 0) 890ea8dc4b6Seschrock return (EINVAL); 891ea8dc4b6Seschrock 892ea8dc4b6Seschrock /* 893ea8dc4b6Seschrock * Get the objset name (the "special" mount argument). 894ea8dc4b6Seschrock */ 895ea8dc4b6Seschrock if (error = pn_get(uap->spec, fromspace, &spn)) 896ea8dc4b6Seschrock return (error); 897ea8dc4b6Seschrock 898ea8dc4b6Seschrock osname = spn.pn_path; 899ea8dc4b6Seschrock 900ecd6cf80Smarks /* 901ecd6cf80Smarks * Check for mount privilege? 902ecd6cf80Smarks * 903ecd6cf80Smarks * If we don't have privilege then see if 904ecd6cf80Smarks * we have local permission to allow it 905ecd6cf80Smarks */ 906ecd6cf80Smarks error = secpolicy_fs_mount(cr, mvp, vfsp); 907ecd6cf80Smarks if (error) { 908ecd6cf80Smarks error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 909ecd6cf80Smarks if (error == 0) { 910ecd6cf80Smarks vattr_t vattr; 911ecd6cf80Smarks 912ecd6cf80Smarks /* 913ecd6cf80Smarks * Make sure user is the owner of the mount point 914ecd6cf80Smarks * or has sufficient privileges. 915ecd6cf80Smarks */ 916ecd6cf80Smarks 917ecd6cf80Smarks vattr.va_mask = AT_UID; 918ecd6cf80Smarks 919286710a4Smarks if (error = VOP_GETATTR(mvp, &vattr, 0, cr)) { 920ecd6cf80Smarks goto out; 921ecd6cf80Smarks } 922ecd6cf80Smarks 923ecd6cf80Smarks if (error = secpolicy_vnode_owner(cr, vattr.va_uid)) { 924ecd6cf80Smarks goto out; 925ecd6cf80Smarks } 926ecd6cf80Smarks 927ecd6cf80Smarks if (error = VOP_ACCESS(mvp, VWRITE, 0, cr)) { 928ecd6cf80Smarks goto out; 929ecd6cf80Smarks } 930ecd6cf80Smarks 931ecd6cf80Smarks secpolicy_fs_mount_clearopts(cr, vfsp); 932ecd6cf80Smarks } else { 933ecd6cf80Smarks goto out; 934ecd6cf80Smarks } 935ecd6cf80Smarks } 936ea8dc4b6Seschrock 937ea8dc4b6Seschrock /* 938ea8dc4b6Seschrock * Refuse to mount a filesystem if we are in a local zone and the 939ea8dc4b6Seschrock * dataset is not visible. 940ea8dc4b6Seschrock */ 941ea8dc4b6Seschrock if (!INGLOBALZONE(curproc) && 942ea8dc4b6Seschrock (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 943ea8dc4b6Seschrock error = EPERM; 944ea8dc4b6Seschrock goto out; 945ea8dc4b6Seschrock } 946ea8dc4b6Seschrock 947b510d378Slling /* 948b510d378Slling * When doing a remount, we simply refresh our temporary properties 949b510d378Slling * according to those options set in the current VFS options. 950b510d378Slling */ 951b510d378Slling if (uap->flags & MS_REMOUNT) { 952b510d378Slling /* refresh mount options */ 953b510d378Slling zfs_unregister_callbacks(vfsp->vfs_data); 954b510d378Slling error = zfs_register_callbacks(vfsp); 955b510d378Slling goto out; 956b510d378Slling } 957b510d378Slling 958ea8dc4b6Seschrock error = zfs_domount(vfsp, osname, cr); 959ea8dc4b6Seschrock 960ea8dc4b6Seschrock out: 961fa9e4066Sahrens pn_free(&spn); 962fa9e4066Sahrens return (error); 963fa9e4066Sahrens } 964fa9e4066Sahrens 965fa9e4066Sahrens static int 966fa9e4066Sahrens zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) 967fa9e4066Sahrens { 968fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 969fa9e4066Sahrens dev32_t d32; 970a2eea2e1Sahrens uint64_t refdbytes, availbytes, usedobjs, availobjs; 971fa9e4066Sahrens 972fa9e4066Sahrens ZFS_ENTER(zfsvfs); 973fa9e4066Sahrens 974a2eea2e1Sahrens dmu_objset_space(zfsvfs->z_os, 975a2eea2e1Sahrens &refdbytes, &availbytes, &usedobjs, &availobjs); 976fa9e4066Sahrens 977fa9e4066Sahrens /* 978fa9e4066Sahrens * The underlying storage pool actually uses multiple block sizes. 979fa9e4066Sahrens * We report the fragsize as the smallest block size we support, 980fa9e4066Sahrens * and we report our blocksize as the filesystem's maximum blocksize. 981fa9e4066Sahrens */ 982fa9e4066Sahrens statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; 983fa9e4066Sahrens statp->f_bsize = zfsvfs->z_max_blksz; 984fa9e4066Sahrens 985fa9e4066Sahrens /* 986fa9e4066Sahrens * The following report "total" blocks of various kinds in the 987fa9e4066Sahrens * file system, but reported in terms of f_frsize - the 988fa9e4066Sahrens * "fragment" size. 989fa9e4066Sahrens */ 990fa9e4066Sahrens 991a2eea2e1Sahrens statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 992a2eea2e1Sahrens statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT; 993fa9e4066Sahrens statp->f_bavail = statp->f_bfree; /* no root reservation */ 994fa9e4066Sahrens 995fa9e4066Sahrens /* 996fa9e4066Sahrens * statvfs() should really be called statufs(), because it assumes 997fa9e4066Sahrens * static metadata. ZFS doesn't preallocate files, so the best 998fa9e4066Sahrens * we can do is report the max that could possibly fit in f_files, 999fa9e4066Sahrens * and that minus the number actually used in f_ffree. 1000fa9e4066Sahrens * For f_ffree, report the smaller of the number of object available 1001fa9e4066Sahrens * and the number of blocks (each object will take at least a block). 1002fa9e4066Sahrens */ 1003a2eea2e1Sahrens statp->f_ffree = MIN(availobjs, statp->f_bfree); 1004fa9e4066Sahrens statp->f_favail = statp->f_ffree; /* no "root reservation" */ 1005a2eea2e1Sahrens statp->f_files = statp->f_ffree + usedobjs; 1006fa9e4066Sahrens 1007fa9e4066Sahrens (void) cmpldev(&d32, vfsp->vfs_dev); 1008fa9e4066Sahrens statp->f_fsid = d32; 1009fa9e4066Sahrens 1010fa9e4066Sahrens /* 1011fa9e4066Sahrens * We're a zfs filesystem. 1012fa9e4066Sahrens */ 1013fa9e4066Sahrens (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); 1014fa9e4066Sahrens 1015a5be7ebbSmarks statp->f_flag = vf_to_stf(vfsp->vfs_flag); 1016fa9e4066Sahrens 1017fa9e4066Sahrens statp->f_namemax = ZFS_MAXNAMELEN; 1018fa9e4066Sahrens 1019fa9e4066Sahrens /* 1020fa9e4066Sahrens * We have all of 32 characters to stuff a string here. 1021fa9e4066Sahrens * Is there anything useful we could/should provide? 1022fa9e4066Sahrens */ 1023fa9e4066Sahrens bzero(statp->f_fstr, sizeof (statp->f_fstr)); 1024fa9e4066Sahrens 1025fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1026fa9e4066Sahrens return (0); 1027fa9e4066Sahrens } 1028fa9e4066Sahrens 1029fa9e4066Sahrens static int 1030fa9e4066Sahrens zfs_root(vfs_t *vfsp, vnode_t **vpp) 1031fa9e4066Sahrens { 1032fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1033fa9e4066Sahrens znode_t *rootzp; 1034fa9e4066Sahrens int error; 1035fa9e4066Sahrens 1036fa9e4066Sahrens ZFS_ENTER(zfsvfs); 1037fa9e4066Sahrens 1038fa9e4066Sahrens error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1039fa9e4066Sahrens if (error == 0) 1040fa9e4066Sahrens *vpp = ZTOV(rootzp); 1041fa9e4066Sahrens 1042fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1043fa9e4066Sahrens return (error); 1044fa9e4066Sahrens } 1045fa9e4066Sahrens 1046*f18faf3fSek /* 1047*f18faf3fSek * Teardown the zfsvfs::z_os. 1048*f18faf3fSek * 1049*f18faf3fSek * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' 1050*f18faf3fSek * and 'z_teardown_inactive_lock' held. 1051*f18faf3fSek */ 1052*f18faf3fSek static int 1053*f18faf3fSek zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1054*f18faf3fSek { 1055*f18faf3fSek objset_t *os = zfsvfs->z_os; 1056*f18faf3fSek znode_t *zp, *nextzp; 1057*f18faf3fSek znode_t markerzp; 1058*f18faf3fSek 1059*f18faf3fSek rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1060*f18faf3fSek 1061*f18faf3fSek if (!unmounting) { 1062*f18faf3fSek /* 1063*f18faf3fSek * We purge the parent filesystem's vfsp as the parent 1064*f18faf3fSek * filesystem and all of its snapshots have their vnode's 1065*f18faf3fSek * v_vfsp set to the parent's filesystem's vfsp. Note, 1066*f18faf3fSek * 'z_parent' is self referential for non-snapshots. 1067*f18faf3fSek */ 1068*f18faf3fSek (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1069*f18faf3fSek } 1070*f18faf3fSek 1071*f18faf3fSek /* 1072*f18faf3fSek * Close the zil. NB: Can't close the zil while zfs_inactive 1073*f18faf3fSek * threads are blocked as zil_close can call zfs_inactive. 1074*f18faf3fSek */ 1075*f18faf3fSek if (zfsvfs->z_log) { 1076*f18faf3fSek zil_close(zfsvfs->z_log); 1077*f18faf3fSek zfsvfs->z_log = NULL; 1078*f18faf3fSek } 1079*f18faf3fSek 1080*f18faf3fSek rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 1081*f18faf3fSek 1082*f18faf3fSek /* 1083*f18faf3fSek * If we are not unmounting (ie: online recv) and someone already 1084*f18faf3fSek * unmounted this file system while we were doing the switcheroo, 1085*f18faf3fSek * or a reopen of z_os failed then just bail out now. 1086*f18faf3fSek */ 1087*f18faf3fSek if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1088*f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 1089*f18faf3fSek rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1090*f18faf3fSek return (EIO); 1091*f18faf3fSek } 1092*f18faf3fSek 1093*f18faf3fSek /* 1094*f18faf3fSek * At this point there are no vops active, and any new vops will 1095*f18faf3fSek * fail with EIO since we have z_teardown_lock for writer (only 1096*f18faf3fSek * relavent for forced unmount). 1097*f18faf3fSek * 1098*f18faf3fSek * Release all holds on dbufs. 1099*f18faf3fSek * Note, the dmu can still callback via znode_pageout_func() 1100*f18faf3fSek * which can zfs_znode_free() the znode. So we lock 1101*f18faf3fSek * z_all_znodes; search the list for a held dbuf; drop the lock 1102*f18faf3fSek * (we know zp can't disappear if we hold a dbuf lock) then 1103*f18faf3fSek * regrab the lock and restart. 1104*f18faf3fSek * 1105*f18faf3fSek * Since we have to restart the search after finding each held dbuf, 1106*f18faf3fSek * we do two things to speed up searching: we insert a dummy znode 1107*f18faf3fSek * ('markerzp') to detect the original tail of the list, and move 1108*f18faf3fSek * non-held znodes to the end of the list. Once we hit 'markerzp', 1109*f18faf3fSek * we know we've looked at each znode and can break out. 1110*f18faf3fSek */ 1111*f18faf3fSek mutex_enter(&zfsvfs->z_znodes_lock); 1112*f18faf3fSek list_insert_tail(&zfsvfs->z_all_znodes, &markerzp); 1113*f18faf3fSek for (zp = list_head(&zfsvfs->z_all_znodes); zp != &markerzp; 1114*f18faf3fSek zp = nextzp) { 1115*f18faf3fSek nextzp = list_next(&zfsvfs->z_all_znodes, zp); 1116*f18faf3fSek if (zp->z_dbuf_held) { 1117*f18faf3fSek /* dbufs should only be held when force unmounting */ 1118*f18faf3fSek zp->z_dbuf_held = 0; 1119*f18faf3fSek mutex_exit(&zfsvfs->z_znodes_lock); 1120*f18faf3fSek dmu_buf_rele(zp->z_dbuf, NULL); 1121*f18faf3fSek /* Start again */ 1122*f18faf3fSek mutex_enter(&zfsvfs->z_znodes_lock); 1123*f18faf3fSek nextzp = list_head(&zfsvfs->z_all_znodes); 1124*f18faf3fSek } else { 1125*f18faf3fSek list_remove(&zfsvfs->z_all_znodes, zp); 1126*f18faf3fSek list_insert_tail(&zfsvfs->z_all_znodes, zp); 1127*f18faf3fSek } 1128*f18faf3fSek } 1129*f18faf3fSek list_remove(&zfsvfs->z_all_znodes, &markerzp); 1130*f18faf3fSek mutex_exit(&zfsvfs->z_znodes_lock); 1131*f18faf3fSek 1132*f18faf3fSek /* 1133*f18faf3fSek * If we are unmounting, set the unmounted flag and let new vops 1134*f18faf3fSek * unblock. zfs_inactive will have the unmounted behavior, and all 1135*f18faf3fSek * other vops will fail with EIO. 1136*f18faf3fSek */ 1137*f18faf3fSek if (unmounting) { 1138*f18faf3fSek zfsvfs->z_unmounted = B_TRUE; 1139*f18faf3fSek rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1140*f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 1141*f18faf3fSek } 1142*f18faf3fSek 1143*f18faf3fSek /* 1144*f18faf3fSek * z_os will be NULL if there was an error in attempting to reopen 1145*f18faf3fSek * zfsvfs, so just return as the properties had already been 1146*f18faf3fSek * unregistered and cached data had been evicted before. 1147*f18faf3fSek */ 1148*f18faf3fSek if (zfsvfs->z_os == NULL) 1149*f18faf3fSek return (0); 1150*f18faf3fSek 1151*f18faf3fSek /* 1152*f18faf3fSek * Unregister properties. 1153*f18faf3fSek */ 1154*f18faf3fSek zfs_unregister_callbacks(zfsvfs); 1155*f18faf3fSek 1156*f18faf3fSek /* 1157*f18faf3fSek * Evict cached data 1158*f18faf3fSek */ 1159*f18faf3fSek (void) dmu_objset_evict_dbufs(os); 1160*f18faf3fSek 1161*f18faf3fSek return (0); 1162*f18faf3fSek } 1163*f18faf3fSek 1164fa9e4066Sahrens /*ARGSUSED*/ 1165fa9e4066Sahrens static int 1166fa9e4066Sahrens zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) 1167fa9e4066Sahrens { 1168fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1169*f18faf3fSek objset_t *os; 1170fa9e4066Sahrens int ret; 1171fa9e4066Sahrens 1172ecd6cf80Smarks ret = secpolicy_fs_unmount(cr, vfsp); 1173ecd6cf80Smarks if (ret) { 1174ecd6cf80Smarks ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 1175ecd6cf80Smarks ZFS_DELEG_PERM_MOUNT, cr); 1176ecd6cf80Smarks if (ret) 1177ecd6cf80Smarks return (ret); 1178ecd6cf80Smarks } 1179033f9833Sek 1180ed097989Sek /* 1181ed097989Sek * We purge the parent filesystem's vfsp as the parent filesystem 1182ed097989Sek * and all of its snapshots have their vnode's v_vfsp set to the 1183ed097989Sek * parent's filesystem's vfsp. Note, 'z_parent' is self 1184ed097989Sek * referential for non-snapshots. 1185ed097989Sek */ 1186ed097989Sek (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1187033f9833Sek 1188fa9e4066Sahrens /* 1189fa9e4066Sahrens * Unmount any snapshots mounted under .zfs before unmounting the 1190fa9e4066Sahrens * dataset itself. 1191fa9e4066Sahrens */ 1192fa9e4066Sahrens if (zfsvfs->z_ctldir != NULL && 1193ecd6cf80Smarks (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) { 1194fa9e4066Sahrens return (ret); 1195ecd6cf80Smarks } 1196fa9e4066Sahrens 119791ebeef5Sahrens if (!(fflag & MS_FORCE)) { 1198fa9e4066Sahrens /* 119991ebeef5Sahrens * Check the number of active vnodes in the file system. 120091ebeef5Sahrens * Our count is maintained in the vfs structure, but the 120191ebeef5Sahrens * number is off by 1 to indicate a hold on the vfs 120291ebeef5Sahrens * structure itself. 120391ebeef5Sahrens * 120491ebeef5Sahrens * The '.zfs' directory maintains a reference of its 120591ebeef5Sahrens * own, and any active references underneath are 120691ebeef5Sahrens * reflected in the vnode count. 1207fa9e4066Sahrens */ 120891ebeef5Sahrens if (zfsvfs->z_ctldir == NULL) { 120991ebeef5Sahrens if (vfsp->vfs_count > 1) 121091ebeef5Sahrens return (EBUSY); 121191ebeef5Sahrens } else { 121291ebeef5Sahrens if (vfsp->vfs_count > 2 || 1213*f18faf3fSek zfsvfs->z_ctldir->v_count > 1) 121491ebeef5Sahrens return (EBUSY); 1215fa9e4066Sahrens } 121691ebeef5Sahrens } 1217fa9e4066Sahrens 121891ebeef5Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 121991ebeef5Sahrens 1220*f18faf3fSek VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 1221*f18faf3fSek os = zfsvfs->z_os; 122291ebeef5Sahrens 122391ebeef5Sahrens /* 1224*f18faf3fSek * z_os will be NULL if there was an error in 1225*f18faf3fSek * attempting to reopen zfsvfs. 122691ebeef5Sahrens */ 1227*f18faf3fSek if (os != NULL) { 1228*f18faf3fSek /* 1229*f18faf3fSek * Unset the objset user_ptr. 1230*f18faf3fSek */ 1231*f18faf3fSek mutex_enter(&os->os->os_user_ptr_lock); 1232*f18faf3fSek dmu_objset_set_user(os, NULL); 1233*f18faf3fSek mutex_exit(&os->os->os_user_ptr_lock); 123491ebeef5Sahrens 1235*f18faf3fSek /* 1236*f18faf3fSek * Finally close the objset 1237*f18faf3fSek */ 1238*f18faf3fSek dmu_objset_close(os); 123991ebeef5Sahrens } 124091ebeef5Sahrens 124191ebeef5Sahrens /* 124291ebeef5Sahrens * We can now safely destroy the '.zfs' directory node. 124391ebeef5Sahrens */ 124491ebeef5Sahrens if (zfsvfs->z_ctldir != NULL) 124591ebeef5Sahrens zfsctl_destroy(zfsvfs); 1246fa9e4066Sahrens 1247fa9e4066Sahrens return (0); 1248fa9e4066Sahrens } 1249fa9e4066Sahrens 1250fa9e4066Sahrens static int 1251fa9e4066Sahrens zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1252fa9e4066Sahrens { 1253fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1254fa9e4066Sahrens znode_t *zp; 1255fa9e4066Sahrens uint64_t object = 0; 1256fa9e4066Sahrens uint64_t fid_gen = 0; 1257fa9e4066Sahrens uint64_t gen_mask; 1258fa9e4066Sahrens uint64_t zp_gen; 1259fa9e4066Sahrens int i, err; 1260fa9e4066Sahrens 1261fa9e4066Sahrens *vpp = NULL; 1262fa9e4066Sahrens 1263fa9e4066Sahrens ZFS_ENTER(zfsvfs); 1264fa9e4066Sahrens 1265fa9e4066Sahrens if (fidp->fid_len == LONG_FID_LEN) { 1266fa9e4066Sahrens zfid_long_t *zlfid = (zfid_long_t *)fidp; 1267fa9e4066Sahrens uint64_t objsetid = 0; 1268fa9e4066Sahrens uint64_t setgen = 0; 1269fa9e4066Sahrens 1270fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1271fa9e4066Sahrens objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1272fa9e4066Sahrens 1273fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1274fa9e4066Sahrens setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1275fa9e4066Sahrens 1276fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1277fa9e4066Sahrens 1278fa9e4066Sahrens err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1279fa9e4066Sahrens if (err) 1280fa9e4066Sahrens return (EINVAL); 1281fa9e4066Sahrens ZFS_ENTER(zfsvfs); 1282fa9e4066Sahrens } 1283fa9e4066Sahrens 1284fa9e4066Sahrens if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1285fa9e4066Sahrens zfid_short_t *zfid = (zfid_short_t *)fidp; 1286fa9e4066Sahrens 1287fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 1288fa9e4066Sahrens object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1289fa9e4066Sahrens 1290fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 1291fa9e4066Sahrens fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1292fa9e4066Sahrens } else { 1293fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1294fa9e4066Sahrens return (EINVAL); 1295fa9e4066Sahrens } 1296fa9e4066Sahrens 1297fa9e4066Sahrens /* A zero fid_gen means we are in the .zfs control directories */ 1298fa9e4066Sahrens if (fid_gen == 0 && 1299fa9e4066Sahrens (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1300fa9e4066Sahrens *vpp = zfsvfs->z_ctldir; 1301fa9e4066Sahrens ASSERT(*vpp != NULL); 1302fa9e4066Sahrens if (object == ZFSCTL_INO_SNAPDIR) { 1303fa9e4066Sahrens VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1304fa9e4066Sahrens 0, NULL, NULL) == 0); 1305fa9e4066Sahrens } else { 1306fa9e4066Sahrens VN_HOLD(*vpp); 1307fa9e4066Sahrens } 1308fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1309fa9e4066Sahrens return (0); 1310fa9e4066Sahrens } 1311fa9e4066Sahrens 1312fa9e4066Sahrens gen_mask = -1ULL >> (64 - 8 * i); 1313fa9e4066Sahrens 1314fa9e4066Sahrens dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1315fa9e4066Sahrens if (err = zfs_zget(zfsvfs, object, &zp)) { 1316fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1317fa9e4066Sahrens return (err); 1318fa9e4066Sahrens } 1319fa9e4066Sahrens zp_gen = zp->z_phys->zp_gen & gen_mask; 1320fa9e4066Sahrens if (zp_gen == 0) 1321fa9e4066Sahrens zp_gen = 1; 1322893a6d32Sahrens if (zp->z_unlinked || zp_gen != fid_gen) { 1323fa9e4066Sahrens dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1324fa9e4066Sahrens VN_RELE(ZTOV(zp)); 1325fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1326fa9e4066Sahrens return (EINVAL); 1327fa9e4066Sahrens } 1328fa9e4066Sahrens 1329fa9e4066Sahrens *vpp = ZTOV(zp); 1330fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1331fa9e4066Sahrens return (0); 1332fa9e4066Sahrens } 1333fa9e4066Sahrens 1334*f18faf3fSek /* 1335*f18faf3fSek * Block out VOPs and close zfsvfs_t::z_os 1336*f18faf3fSek * 1337*f18faf3fSek * Note, if successful, then we return with the 'z_teardown_lock' and 1338*f18faf3fSek * 'z_teardown_inactive_lock' write held. 1339*f18faf3fSek */ 1340*f18faf3fSek int 1341*f18faf3fSek zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *mode) 1342*f18faf3fSek { 1343*f18faf3fSek int error; 1344*f18faf3fSek 1345*f18faf3fSek if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1346*f18faf3fSek return (error); 1347*f18faf3fSek 1348*f18faf3fSek *mode = zfsvfs->z_os->os_mode; 1349*f18faf3fSek dmu_objset_name(zfsvfs->z_os, name); 1350*f18faf3fSek dmu_objset_close(zfsvfs->z_os); 1351*f18faf3fSek 1352*f18faf3fSek return (0); 1353*f18faf3fSek } 1354*f18faf3fSek 1355*f18faf3fSek /* 1356*f18faf3fSek * Reopen zfsvfs_t::z_os and release VOPs. 1357*f18faf3fSek */ 1358*f18faf3fSek int 1359*f18faf3fSek zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) 1360*f18faf3fSek { 1361*f18faf3fSek int err; 1362*f18faf3fSek 1363*f18faf3fSek ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); 1364*f18faf3fSek ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 1365*f18faf3fSek 1366*f18faf3fSek err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 1367*f18faf3fSek if (err) { 1368*f18faf3fSek zfsvfs->z_os = NULL; 1369*f18faf3fSek } else { 1370*f18faf3fSek znode_t *zp; 1371*f18faf3fSek 1372*f18faf3fSek VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 1373*f18faf3fSek 1374*f18faf3fSek /* 1375*f18faf3fSek * Attempt to re-establish all the active znodes with 1376*f18faf3fSek * their dbufs. If a zfs_rezget() fails, then we'll let 1377*f18faf3fSek * any potential callers discover that via ZFS_ENTER_VERIFY_VP 1378*f18faf3fSek * when they try to use their znode. 1379*f18faf3fSek */ 1380*f18faf3fSek mutex_enter(&zfsvfs->z_znodes_lock); 1381*f18faf3fSek for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1382*f18faf3fSek zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1383*f18faf3fSek ASSERT(!zp->z_dbuf_held); 1384*f18faf3fSek (void) zfs_rezget(zp); 1385*f18faf3fSek } 1386*f18faf3fSek mutex_exit(&zfsvfs->z_znodes_lock); 1387*f18faf3fSek 1388*f18faf3fSek } 1389*f18faf3fSek 1390*f18faf3fSek /* release the VOPs */ 1391*f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 1392*f18faf3fSek rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1393*f18faf3fSek 1394*f18faf3fSek if (err) { 1395*f18faf3fSek /* 1396*f18faf3fSek * Since we couldn't reopen zfsvfs::z_os, force 1397*f18faf3fSek * unmount this file system. 1398*f18faf3fSek */ 1399*f18faf3fSek if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) 1400*f18faf3fSek (void) dounmount(zfsvfs->z_vfs, MS_FORCE, CRED()); 1401*f18faf3fSek } 1402*f18faf3fSek return (err); 1403*f18faf3fSek } 1404*f18faf3fSek 1405fa9e4066Sahrens static void 1406fa9e4066Sahrens zfs_freevfs(vfs_t *vfsp) 1407fa9e4066Sahrens { 1408fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1409c25056deSgw int i; 1410c25056deSgw 1411c25056deSgw for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1412c25056deSgw mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1413fa9e4066Sahrens 141491ebeef5Sahrens mutex_destroy(&zfsvfs->z_znodes_lock); 1415c25056deSgw list_destroy(&zfsvfs->z_all_znodes); 1416*f18faf3fSek rrw_destroy(&zfsvfs->z_teardown_lock); 1417*f18faf3fSek rw_destroy(&zfsvfs->z_teardown_inactive_lock); 1418fa9e4066Sahrens kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1419fa9e4066Sahrens 1420fa9e4066Sahrens atomic_add_32(&zfs_active_fs_count, -1); 1421fa9e4066Sahrens } 1422fa9e4066Sahrens 1423fa9e4066Sahrens /* 1424fa9e4066Sahrens * VFS_INIT() initialization. Note that there is no VFS_FINI(), 1425fa9e4066Sahrens * so we can't safely do any non-idempotent initialization here. 1426fa9e4066Sahrens * Leave that to zfs_init() and zfs_fini(), which are called 1427fa9e4066Sahrens * from the module's _init() and _fini() entry points. 1428fa9e4066Sahrens */ 1429fa9e4066Sahrens /*ARGSUSED*/ 1430fa9e4066Sahrens static int 1431fa9e4066Sahrens zfs_vfsinit(int fstype, char *name) 1432fa9e4066Sahrens { 1433fa9e4066Sahrens int error; 1434fa9e4066Sahrens 1435fa9e4066Sahrens zfsfstype = fstype; 1436fa9e4066Sahrens 1437fa9e4066Sahrens /* 1438fa9e4066Sahrens * Setup vfsops and vnodeops tables. 1439fa9e4066Sahrens */ 1440fa9e4066Sahrens error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); 1441fa9e4066Sahrens if (error != 0) { 1442fa9e4066Sahrens cmn_err(CE_WARN, "zfs: bad vfs ops template"); 1443fa9e4066Sahrens } 1444fa9e4066Sahrens 1445fa9e4066Sahrens error = zfs_create_op_tables(); 1446fa9e4066Sahrens if (error) { 1447fa9e4066Sahrens zfs_remove_op_tables(); 1448fa9e4066Sahrens cmn_err(CE_WARN, "zfs: bad vnode ops template"); 1449fa9e4066Sahrens (void) vfs_freevfsops_by_type(zfsfstype); 1450fa9e4066Sahrens return (error); 1451fa9e4066Sahrens } 1452fa9e4066Sahrens 1453fa9e4066Sahrens mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); 1454fa9e4066Sahrens 1455fa9e4066Sahrens /* 1456a0965f35Sbonwick * Unique major number for all zfs mounts. 1457a0965f35Sbonwick * If we run out of 32-bit minors, we'll getudev() another major. 1458fa9e4066Sahrens */ 1459a0965f35Sbonwick zfs_major = ddi_name_to_major(ZFS_DRIVER); 1460a0965f35Sbonwick zfs_minor = ZFS_MIN_MINOR; 1461fa9e4066Sahrens 1462fa9e4066Sahrens return (0); 1463fa9e4066Sahrens } 1464fa9e4066Sahrens 1465fa9e4066Sahrens void 1466fa9e4066Sahrens zfs_init(void) 1467fa9e4066Sahrens { 1468fa9e4066Sahrens /* 1469fa9e4066Sahrens * Initialize .zfs directory structures 1470fa9e4066Sahrens */ 1471fa9e4066Sahrens zfsctl_init(); 1472fa9e4066Sahrens 1473fa9e4066Sahrens /* 1474fa9e4066Sahrens * Initialize znode cache, vnode ops, etc... 1475fa9e4066Sahrens */ 1476fa9e4066Sahrens zfs_znode_init(); 1477fa9e4066Sahrens } 1478fa9e4066Sahrens 1479fa9e4066Sahrens void 1480fa9e4066Sahrens zfs_fini(void) 1481fa9e4066Sahrens { 1482fa9e4066Sahrens zfsctl_fini(); 1483fa9e4066Sahrens zfs_znode_fini(); 1484fa9e4066Sahrens } 1485fa9e4066Sahrens 1486fa9e4066Sahrens int 1487fa9e4066Sahrens zfs_busy(void) 1488fa9e4066Sahrens { 1489fa9e4066Sahrens return (zfs_active_fs_count != 0); 1490fa9e4066Sahrens } 1491fa9e4066Sahrens 1492e7437265Sahrens int 1493bd00f61bSrm zfs_get_version(objset_t *os, uint64_t *version) 1494e7437265Sahrens { 1495e7437265Sahrens int error; 1496e7437265Sahrens 1497bd00f61bSrm error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, version); 1498e7437265Sahrens return (error); 1499e7437265Sahrens } 1500e7437265Sahrens 1501e7437265Sahrens int 1502e7437265Sahrens zfs_set_version(const char *name, uint64_t newvers) 1503e7437265Sahrens { 1504e7437265Sahrens int error; 1505e7437265Sahrens objset_t *os; 1506e7437265Sahrens dmu_tx_t *tx; 1507e7437265Sahrens uint64_t curvers; 1508e7437265Sahrens 1509e7437265Sahrens /* 1510e7437265Sahrens * XXX for now, require that the filesystem be unmounted. Would 1511e7437265Sahrens * be nice to find the zfsvfs_t and just update that if 1512e7437265Sahrens * possible. 1513e7437265Sahrens */ 1514e7437265Sahrens 1515e7437265Sahrens if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 1516e7437265Sahrens return (EINVAL); 1517e7437265Sahrens 1518e7437265Sahrens error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_PRIMARY, &os); 1519e7437265Sahrens if (error) 1520e7437265Sahrens return (error); 1521e7437265Sahrens 1522e7437265Sahrens error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 1523e7437265Sahrens 8, 1, &curvers); 1524e7437265Sahrens if (error) 1525e7437265Sahrens goto out; 1526e7437265Sahrens if (newvers < curvers) { 1527e7437265Sahrens error = EINVAL; 1528e7437265Sahrens goto out; 1529e7437265Sahrens } 1530e7437265Sahrens 1531e7437265Sahrens tx = dmu_tx_create(os); 1532e7437265Sahrens dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR); 1533e7437265Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 1534e7437265Sahrens if (error) { 1535e7437265Sahrens dmu_tx_abort(tx); 1536e7437265Sahrens goto out; 1537e7437265Sahrens } 1538e7437265Sahrens error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, 1539e7437265Sahrens &newvers, tx); 1540e7437265Sahrens 1541e7437265Sahrens spa_history_internal_log(LOG_DS_UPGRADE, 1542e7437265Sahrens dmu_objset_spa(os), tx, CRED(), 1543e7437265Sahrens "oldver=%llu newver=%llu dataset = %llu", curvers, newvers, 1544e7437265Sahrens dmu_objset_id(os)); 1545e7437265Sahrens dmu_tx_commit(tx); 1546e7437265Sahrens 1547e7437265Sahrens out: 1548e7437265Sahrens dmu_objset_close(os); 1549e7437265Sahrens return (error); 1550e7437265Sahrens } 1551e7437265Sahrens 1552fa9e4066Sahrens static vfsdef_t vfw = { 1553fa9e4066Sahrens VFSDEF_VERSION, 1554fa9e4066Sahrens MNTTYPE_ZFS, 1555fa9e4066Sahrens zfs_vfsinit, 15565a59a8b3Srsb VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS, 1557fa9e4066Sahrens &zfs_mntopts 1558fa9e4066Sahrens }; 1559fa9e4066Sahrens 1560fa9e4066Sahrens struct modlfs zfs_modlfs = { 1561e7437265Sahrens &mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw 1562fa9e4066Sahrens }; 1563