1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5033f9833Sek * Common Development and Distribution License (the "License"). 6033f9833Sek * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22893a6d32Sahrens * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 2678077464Sck #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens #include <sys/types.h> 29fa9e4066Sahrens #include <sys/param.h> 30fa9e4066Sahrens #include <sys/systm.h> 31fa9e4066Sahrens #include <sys/sysmacros.h> 32fa9e4066Sahrens #include <sys/kmem.h> 33fa9e4066Sahrens #include <sys/pathname.h> 34fa9e4066Sahrens #include <sys/vnode.h> 35fa9e4066Sahrens #include <sys/vfs.h> 36aa59c4cbSrsb #include <sys/vfs_opreg.h> 37fa9e4066Sahrens #include <sys/mntent.h> 38fa9e4066Sahrens #include <sys/mount.h> 39fa9e4066Sahrens #include <sys/cmn_err.h> 40fa9e4066Sahrens #include "fs/fs_subr.h" 41fa9e4066Sahrens #include <sys/zfs_znode.h> 42893a6d32Sahrens #include <sys/zfs_dir.h> 43fa9e4066Sahrens #include <sys/zil.h> 44fa9e4066Sahrens #include <sys/fs/zfs.h> 45fa9e4066Sahrens #include <sys/dmu.h> 46fa9e4066Sahrens #include <sys/dsl_prop.h> 47b1b8ab34Slling #include <sys/dsl_dataset.h> 48ecd6cf80Smarks #include <sys/dsl_deleg.h> 49fa9e4066Sahrens #include <sys/spa.h> 50fa9e4066Sahrens #include <sys/zap.h> 51fa9e4066Sahrens #include <sys/varargs.h> 52fa9e4066Sahrens #include <sys/policy.h> 53fa9e4066Sahrens #include <sys/atomic.h> 54fa9e4066Sahrens #include <sys/mkdev.h> 55fa9e4066Sahrens #include <sys/modctl.h> 56ecd6cf80Smarks #include <sys/refstr.h> 57fa9e4066Sahrens #include <sys/zfs_ioctl.h> 58fa9e4066Sahrens #include <sys/zfs_ctldir.h> 59ea8dc4b6Seschrock #include <sys/bootconf.h> 60a0965f35Sbonwick #include <sys/sunddi.h> 61033f9833Sek #include <sys/dnlc.h> 62fa9e4066Sahrens 63fa9e4066Sahrens int zfsfstype; 64fa9e4066Sahrens vfsops_t *zfs_vfsops = NULL; 65a0965f35Sbonwick static major_t zfs_major; 66fa9e4066Sahrens static minor_t zfs_minor; 67fa9e4066Sahrens static kmutex_t zfs_dev_mtx; 68fa9e4066Sahrens 69fa9e4066Sahrens static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); 70fa9e4066Sahrens static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); 71ea8dc4b6Seschrock static int zfs_mountroot(vfs_t *vfsp, enum whymountroot); 72fa9e4066Sahrens static int zfs_root(vfs_t *vfsp, vnode_t **vpp); 73fa9e4066Sahrens static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); 74fa9e4066Sahrens static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); 75fa9e4066Sahrens static void zfs_freevfs(vfs_t *vfsp); 76fa9e4066Sahrens static void zfs_objset_close(zfsvfs_t *zfsvfs); 77fa9e4066Sahrens 78fa9e4066Sahrens static const fs_operation_def_t zfs_vfsops_template[] = { 79aa59c4cbSrsb VFSNAME_MOUNT, { .vfs_mount = zfs_mount }, 80aa59c4cbSrsb VFSNAME_MOUNTROOT, { .vfs_mountroot = zfs_mountroot }, 81aa59c4cbSrsb VFSNAME_UNMOUNT, { .vfs_unmount = zfs_umount }, 82aa59c4cbSrsb VFSNAME_ROOT, { .vfs_root = zfs_root }, 83aa59c4cbSrsb VFSNAME_STATVFS, { .vfs_statvfs = zfs_statvfs }, 84aa59c4cbSrsb VFSNAME_SYNC, { .vfs_sync = zfs_sync }, 85aa59c4cbSrsb VFSNAME_VGET, { .vfs_vget = zfs_vget }, 86aa59c4cbSrsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 87aa59c4cbSrsb NULL, NULL 88fa9e4066Sahrens }; 89fa9e4066Sahrens 90fa9e4066Sahrens static const fs_operation_def_t zfs_vfsops_eio_template[] = { 91aa59c4cbSrsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 92aa59c4cbSrsb NULL, NULL 93fa9e4066Sahrens }; 94fa9e4066Sahrens 95fa9e4066Sahrens /* 96fa9e4066Sahrens * We need to keep a count of active fs's. 97fa9e4066Sahrens * This is necessary to prevent our module 98fa9e4066Sahrens * from being unloaded after a umount -f 99fa9e4066Sahrens */ 100fa9e4066Sahrens static uint32_t zfs_active_fs_count = 0; 101fa9e4066Sahrens 102fa9e4066Sahrens static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; 103fa9e4066Sahrens static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; 1047b55fa8eSck static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 1057b55fa8eSck static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 106fa9e4066Sahrens 1077b55fa8eSck /* 108*b510d378Slling * MO_DEFAULT is not used since the default value is determined 109*b510d378Slling * by the equivalent property. 1107b55fa8eSck */ 111fa9e4066Sahrens static mntopt_t mntopts[] = { 1127b55fa8eSck { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL }, 1137b55fa8eSck { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL }, 114*b510d378Slling { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL }, 115fa9e4066Sahrens { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } 116fa9e4066Sahrens }; 117fa9e4066Sahrens 118fa9e4066Sahrens static mntopts_t zfs_mntopts = { 119fa9e4066Sahrens sizeof (mntopts) / sizeof (mntopt_t), 120fa9e4066Sahrens mntopts 121fa9e4066Sahrens }; 122fa9e4066Sahrens 123fa9e4066Sahrens /*ARGSUSED*/ 124fa9e4066Sahrens int 125fa9e4066Sahrens zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 126fa9e4066Sahrens { 127fa9e4066Sahrens /* 128fa9e4066Sahrens * Data integrity is job one. We don't want a compromised kernel 129fa9e4066Sahrens * writing to the storage pool, so we never sync during panic. 130fa9e4066Sahrens */ 131fa9e4066Sahrens if (panicstr) 132fa9e4066Sahrens return (0); 133fa9e4066Sahrens 134fa9e4066Sahrens /* 135fa9e4066Sahrens * SYNC_ATTR is used by fsflush() to force old filesystems like UFS 136fa9e4066Sahrens * to sync metadata, which they would otherwise cache indefinitely. 137fa9e4066Sahrens * Semantically, the only requirement is that the sync be initiated. 138fa9e4066Sahrens * The DMU syncs out txgs frequently, so there's nothing to do. 139fa9e4066Sahrens */ 140fa9e4066Sahrens if (flag & SYNC_ATTR) 141fa9e4066Sahrens return (0); 142fa9e4066Sahrens 143fa9e4066Sahrens if (vfsp != NULL) { 144fa9e4066Sahrens /* 145fa9e4066Sahrens * Sync a specific filesystem. 146fa9e4066Sahrens */ 147fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 148fa9e4066Sahrens 149fa9e4066Sahrens ZFS_ENTER(zfsvfs); 150fa9e4066Sahrens if (zfsvfs->z_log != NULL) 151b19a79ecSperrin zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 152fa9e4066Sahrens else 153fa9e4066Sahrens txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 154fa9e4066Sahrens ZFS_EXIT(zfsvfs); 155fa9e4066Sahrens } else { 156fa9e4066Sahrens /* 157fa9e4066Sahrens * Sync all ZFS filesystems. This is what happens when you 158fa9e4066Sahrens * run sync(1M). Unlike other filesystems, ZFS honors the 159fa9e4066Sahrens * request by waiting for all pools to commit all dirty data. 160fa9e4066Sahrens */ 161fa9e4066Sahrens spa_sync_allpools(); 162fa9e4066Sahrens } 163fa9e4066Sahrens 164fa9e4066Sahrens return (0); 165fa9e4066Sahrens } 166fa9e4066Sahrens 167ea8dc4b6Seschrock static int 168ea8dc4b6Seschrock zfs_create_unique_device(dev_t *dev) 169ea8dc4b6Seschrock { 170ea8dc4b6Seschrock major_t new_major; 171ea8dc4b6Seschrock 172ea8dc4b6Seschrock do { 173ea8dc4b6Seschrock ASSERT3U(zfs_minor, <=, MAXMIN32); 174ea8dc4b6Seschrock minor_t start = zfs_minor; 175ea8dc4b6Seschrock do { 176ea8dc4b6Seschrock mutex_enter(&zfs_dev_mtx); 177ea8dc4b6Seschrock if (zfs_minor >= MAXMIN32) { 178ea8dc4b6Seschrock /* 179ea8dc4b6Seschrock * If we're still using the real major 180ea8dc4b6Seschrock * keep out of /dev/zfs and /dev/zvol minor 181ea8dc4b6Seschrock * number space. If we're using a getudev()'ed 182ea8dc4b6Seschrock * major number, we can use all of its minors. 183ea8dc4b6Seschrock */ 184ea8dc4b6Seschrock if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) 185ea8dc4b6Seschrock zfs_minor = ZFS_MIN_MINOR; 186ea8dc4b6Seschrock else 187ea8dc4b6Seschrock zfs_minor = 0; 188ea8dc4b6Seschrock } else { 189ea8dc4b6Seschrock zfs_minor++; 190ea8dc4b6Seschrock } 191ea8dc4b6Seschrock *dev = makedevice(zfs_major, zfs_minor); 192ea8dc4b6Seschrock mutex_exit(&zfs_dev_mtx); 193ea8dc4b6Seschrock } while (vfs_devismounted(*dev) && zfs_minor != start); 194ea8dc4b6Seschrock if (zfs_minor == start) { 195ea8dc4b6Seschrock /* 196ea8dc4b6Seschrock * We are using all ~262,000 minor numbers for the 197ea8dc4b6Seschrock * current major number. Create a new major number. 198ea8dc4b6Seschrock */ 199ea8dc4b6Seschrock if ((new_major = getudev()) == (major_t)-1) { 200ea8dc4b6Seschrock cmn_err(CE_WARN, 201ea8dc4b6Seschrock "zfs_mount: Can't get unique major " 202ea8dc4b6Seschrock "device number."); 203ea8dc4b6Seschrock return (-1); 204ea8dc4b6Seschrock } 205ea8dc4b6Seschrock mutex_enter(&zfs_dev_mtx); 206ea8dc4b6Seschrock zfs_major = new_major; 207ea8dc4b6Seschrock zfs_minor = 0; 208ea8dc4b6Seschrock 209ea8dc4b6Seschrock mutex_exit(&zfs_dev_mtx); 210ea8dc4b6Seschrock } else { 211ea8dc4b6Seschrock break; 212ea8dc4b6Seschrock } 213ea8dc4b6Seschrock /* CONSTANTCONDITION */ 214ea8dc4b6Seschrock } while (1); 215ea8dc4b6Seschrock 216ea8dc4b6Seschrock return (0); 217ea8dc4b6Seschrock } 218ea8dc4b6Seschrock 219fa9e4066Sahrens static void 220fa9e4066Sahrens atime_changed_cb(void *arg, uint64_t newval) 221fa9e4066Sahrens { 222fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 223fa9e4066Sahrens 224fa9e4066Sahrens if (newval == TRUE) { 225fa9e4066Sahrens zfsvfs->z_atime = TRUE; 226fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 227fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 228fa9e4066Sahrens } else { 229fa9e4066Sahrens zfsvfs->z_atime = FALSE; 230fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 231fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 232fa9e4066Sahrens } 233fa9e4066Sahrens } 234fa9e4066Sahrens 2357b55fa8eSck static void 2367b55fa8eSck xattr_changed_cb(void *arg, uint64_t newval) 2377b55fa8eSck { 2387b55fa8eSck zfsvfs_t *zfsvfs = arg; 2397b55fa8eSck 2407b55fa8eSck if (newval == TRUE) { 2417b55fa8eSck /* XXX locking on vfs_flag? */ 2427b55fa8eSck zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 2437b55fa8eSck vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 2447b55fa8eSck vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 2457b55fa8eSck } else { 2467b55fa8eSck /* XXX locking on vfs_flag? */ 2477b55fa8eSck zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 2487b55fa8eSck vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 2497b55fa8eSck vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 2507b55fa8eSck } 2517b55fa8eSck } 2527b55fa8eSck 253fa9e4066Sahrens static void 254fa9e4066Sahrens blksz_changed_cb(void *arg, uint64_t newval) 255fa9e4066Sahrens { 256fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 257fa9e4066Sahrens 258fa9e4066Sahrens if (newval < SPA_MINBLOCKSIZE || 259fa9e4066Sahrens newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 260fa9e4066Sahrens newval = SPA_MAXBLOCKSIZE; 261fa9e4066Sahrens 262fa9e4066Sahrens zfsvfs->z_max_blksz = newval; 263fa9e4066Sahrens zfsvfs->z_vfs->vfs_bsize = newval; 264fa9e4066Sahrens } 265fa9e4066Sahrens 266fa9e4066Sahrens static void 267fa9e4066Sahrens readonly_changed_cb(void *arg, uint64_t newval) 268fa9e4066Sahrens { 269fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 270fa9e4066Sahrens 271fa9e4066Sahrens if (newval) { 272fa9e4066Sahrens /* XXX locking on vfs_flag? */ 273fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 274fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 275fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 276fa9e4066Sahrens } else { 277fa9e4066Sahrens /* XXX locking on vfs_flag? */ 278fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 279fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 280fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 281fa9e4066Sahrens } 282fa9e4066Sahrens } 283fa9e4066Sahrens 284fa9e4066Sahrens static void 285fa9e4066Sahrens devices_changed_cb(void *arg, uint64_t newval) 286fa9e4066Sahrens { 287fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 288fa9e4066Sahrens 289fa9e4066Sahrens if (newval == FALSE) { 290fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; 291fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); 292fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); 293fa9e4066Sahrens } else { 294fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; 295fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); 296fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); 297fa9e4066Sahrens } 298fa9e4066Sahrens } 299fa9e4066Sahrens 300fa9e4066Sahrens static void 301fa9e4066Sahrens setuid_changed_cb(void *arg, uint64_t newval) 302fa9e4066Sahrens { 303fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 304fa9e4066Sahrens 305fa9e4066Sahrens if (newval == FALSE) { 306fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 307fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 308fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 309fa9e4066Sahrens } else { 310fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 311fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 312fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 313fa9e4066Sahrens } 314fa9e4066Sahrens } 315fa9e4066Sahrens 316fa9e4066Sahrens static void 317fa9e4066Sahrens exec_changed_cb(void *arg, uint64_t newval) 318fa9e4066Sahrens { 319fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 320fa9e4066Sahrens 321fa9e4066Sahrens if (newval == FALSE) { 322fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 323fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 324fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 325fa9e4066Sahrens } else { 326fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 327fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 328fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 329fa9e4066Sahrens } 330fa9e4066Sahrens } 331fa9e4066Sahrens 332fa9e4066Sahrens static void 333fa9e4066Sahrens snapdir_changed_cb(void *arg, uint64_t newval) 334fa9e4066Sahrens { 335fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 336fa9e4066Sahrens 337fa9e4066Sahrens zfsvfs->z_show_ctldir = newval; 338fa9e4066Sahrens } 339fa9e4066Sahrens 340fa9e4066Sahrens static void 341fa9e4066Sahrens acl_mode_changed_cb(void *arg, uint64_t newval) 342fa9e4066Sahrens { 343fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 344fa9e4066Sahrens 345fa9e4066Sahrens zfsvfs->z_acl_mode = newval; 346fa9e4066Sahrens } 347fa9e4066Sahrens 348fa9e4066Sahrens static void 349fa9e4066Sahrens acl_inherit_changed_cb(void *arg, uint64_t newval) 350fa9e4066Sahrens { 351fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 352fa9e4066Sahrens 353fa9e4066Sahrens zfsvfs->z_acl_inherit = newval; 354fa9e4066Sahrens } 355fa9e4066Sahrens 356ea8dc4b6Seschrock static int 357ea8dc4b6Seschrock zfs_register_callbacks(vfs_t *vfsp) 358ea8dc4b6Seschrock { 359ea8dc4b6Seschrock struct dsl_dataset *ds = NULL; 360ea8dc4b6Seschrock objset_t *os = NULL; 361ea8dc4b6Seschrock zfsvfs_t *zfsvfs = NULL; 3623ccfa83cSahrens int readonly, do_readonly = FALSE; 3633ccfa83cSahrens int setuid, do_setuid = FALSE; 3643ccfa83cSahrens int exec, do_exec = FALSE; 3653ccfa83cSahrens int devices, do_devices = FALSE; 3663ccfa83cSahrens int xattr, do_xattr = FALSE; 367*b510d378Slling int atime, do_atime = FALSE; 368ea8dc4b6Seschrock int error = 0; 369ea8dc4b6Seschrock 370ea8dc4b6Seschrock ASSERT(vfsp); 371ea8dc4b6Seschrock zfsvfs = vfsp->vfs_data; 372ea8dc4b6Seschrock ASSERT(zfsvfs); 373ea8dc4b6Seschrock os = zfsvfs->z_os; 374fa9e4066Sahrens 375fa9e4066Sahrens /* 376ea8dc4b6Seschrock * The act of registering our callbacks will destroy any mount 377ea8dc4b6Seschrock * options we may have. In order to enable temporary overrides 3787b55fa8eSck * of mount options, we stash away the current values and 379ea8dc4b6Seschrock * restore them after we register the callbacks. 380fa9e4066Sahrens */ 381ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 382ea8dc4b6Seschrock readonly = B_TRUE; 383ea8dc4b6Seschrock do_readonly = B_TRUE; 384ea8dc4b6Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 385ea8dc4b6Seschrock readonly = B_FALSE; 386ea8dc4b6Seschrock do_readonly = B_TRUE; 387ea8dc4b6Seschrock } 388ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 389ea8dc4b6Seschrock devices = B_FALSE; 390ea8dc4b6Seschrock setuid = B_FALSE; 391ea8dc4b6Seschrock do_devices = B_TRUE; 392ea8dc4b6Seschrock do_setuid = B_TRUE; 393ea8dc4b6Seschrock } else { 394ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 395ea8dc4b6Seschrock devices = B_FALSE; 396ea8dc4b6Seschrock do_devices = B_TRUE; 397b1b8ab34Slling } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) { 398ea8dc4b6Seschrock devices = B_TRUE; 399ea8dc4b6Seschrock do_devices = B_TRUE; 400fa9e4066Sahrens } 401fa9e4066Sahrens 402ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 403ea8dc4b6Seschrock setuid = B_FALSE; 404ea8dc4b6Seschrock do_setuid = B_TRUE; 405ea8dc4b6Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 406ea8dc4b6Seschrock setuid = B_TRUE; 407ea8dc4b6Seschrock do_setuid = B_TRUE; 408fa9e4066Sahrens } 409ea8dc4b6Seschrock } 410ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 411ea8dc4b6Seschrock exec = B_FALSE; 412ea8dc4b6Seschrock do_exec = B_TRUE; 413ea8dc4b6Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 414ea8dc4b6Seschrock exec = B_TRUE; 415ea8dc4b6Seschrock do_exec = B_TRUE; 416fa9e4066Sahrens } 4177b55fa8eSck if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 4187b55fa8eSck xattr = B_FALSE; 4197b55fa8eSck do_xattr = B_TRUE; 4207b55fa8eSck } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 4217b55fa8eSck xattr = B_TRUE; 4227b55fa8eSck do_xattr = B_TRUE; 4237b55fa8eSck } 424*b510d378Slling if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 425*b510d378Slling atime = B_FALSE; 426*b510d378Slling do_atime = B_TRUE; 427*b510d378Slling } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 428*b510d378Slling atime = B_TRUE; 429*b510d378Slling do_atime = B_TRUE; 430*b510d378Slling } 431fa9e4066Sahrens 432fa9e4066Sahrens /* 433ea8dc4b6Seschrock * Register property callbacks. 434ea8dc4b6Seschrock * 435ea8dc4b6Seschrock * It would probably be fine to just check for i/o error from 436ea8dc4b6Seschrock * the first prop_register(), but I guess I like to go 437ea8dc4b6Seschrock * overboard... 438fa9e4066Sahrens */ 439ea8dc4b6Seschrock ds = dmu_objset_ds(os); 440ea8dc4b6Seschrock error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 4417b55fa8eSck error = error ? error : dsl_prop_register(ds, 4427b55fa8eSck "xattr", xattr_changed_cb, zfsvfs); 443ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 444ea8dc4b6Seschrock "recordsize", blksz_changed_cb, zfsvfs); 445ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 446ea8dc4b6Seschrock "readonly", readonly_changed_cb, zfsvfs); 447ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 448ea8dc4b6Seschrock "devices", devices_changed_cb, zfsvfs); 449ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 450ea8dc4b6Seschrock "setuid", setuid_changed_cb, zfsvfs); 451ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 452ea8dc4b6Seschrock "exec", exec_changed_cb, zfsvfs); 453ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 454ea8dc4b6Seschrock "snapdir", snapdir_changed_cb, zfsvfs); 455ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 456ea8dc4b6Seschrock "aclmode", acl_mode_changed_cb, zfsvfs); 457ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 458ea8dc4b6Seschrock "aclinherit", acl_inherit_changed_cb, zfsvfs); 459ea8dc4b6Seschrock if (error) 460ea8dc4b6Seschrock goto unregister; 461fa9e4066Sahrens 462ea8dc4b6Seschrock /* 463ea8dc4b6Seschrock * Invoke our callbacks to restore temporary mount options. 464ea8dc4b6Seschrock */ 465ea8dc4b6Seschrock if (do_readonly) 466ea8dc4b6Seschrock readonly_changed_cb(zfsvfs, readonly); 467ea8dc4b6Seschrock if (do_setuid) 468ea8dc4b6Seschrock setuid_changed_cb(zfsvfs, setuid); 469ea8dc4b6Seschrock if (do_exec) 470ea8dc4b6Seschrock exec_changed_cb(zfsvfs, exec); 471ea8dc4b6Seschrock if (do_devices) 472ea8dc4b6Seschrock devices_changed_cb(zfsvfs, devices); 4737b55fa8eSck if (do_xattr) 4747b55fa8eSck xattr_changed_cb(zfsvfs, xattr); 475*b510d378Slling if (do_atime) 476*b510d378Slling atime_changed_cb(zfsvfs, atime); 477fa9e4066Sahrens 478ea8dc4b6Seschrock return (0); 479fa9e4066Sahrens 480ea8dc4b6Seschrock unregister: 481fa9e4066Sahrens /* 482ea8dc4b6Seschrock * We may attempt to unregister some callbacks that are not 483ea8dc4b6Seschrock * registered, but this is OK; it will simply return ENOMSG, 484ea8dc4b6Seschrock * which we will ignore. 485fa9e4066Sahrens */ 486ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 4877b55fa8eSck (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 488ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 489ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 490ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs); 491ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 492ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 493ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 494ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 495ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 496ea8dc4b6Seschrock zfsvfs); 497ea8dc4b6Seschrock return (error); 498ea8dc4b6Seschrock 499ea8dc4b6Seschrock } 500ea8dc4b6Seschrock 501ea8dc4b6Seschrock static int 502ea8dc4b6Seschrock zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr) 503ea8dc4b6Seschrock { 504ea8dc4b6Seschrock dev_t mount_dev; 505ea8dc4b6Seschrock uint64_t recordsize, readonly; 506ea8dc4b6Seschrock int error = 0; 507ea8dc4b6Seschrock int mode; 508ea8dc4b6Seschrock zfsvfs_t *zfsvfs; 509ea8dc4b6Seschrock znode_t *zp = NULL; 510ea8dc4b6Seschrock 511ea8dc4b6Seschrock ASSERT(vfsp); 512ea8dc4b6Seschrock ASSERT(osname); 513fa9e4066Sahrens 514fa9e4066Sahrens /* 515fa9e4066Sahrens * Initialize the zfs-specific filesystem structure. 516fa9e4066Sahrens * Should probably make this a kmem cache, shuffle fields, 517ea8dc4b6Seschrock * and just bzero up to z_hold_mtx[]. 518fa9e4066Sahrens */ 519fa9e4066Sahrens zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 520fa9e4066Sahrens zfsvfs->z_vfs = vfsp; 521fa9e4066Sahrens zfsvfs->z_parent = zfsvfs; 522fa9e4066Sahrens zfsvfs->z_assign = TXG_NOWAIT; 523fa9e4066Sahrens zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 524a0965f35Sbonwick zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 525fa9e4066Sahrens 526fa9e4066Sahrens mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 527fa9e4066Sahrens list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 528fa9e4066Sahrens offsetof(znode_t, z_link_node)); 529fa9e4066Sahrens rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); 530fa9e4066Sahrens 531ea8dc4b6Seschrock /* Initialize the generic filesystem structure. */ 532fa9e4066Sahrens vfsp->vfs_bcount = 0; 533fa9e4066Sahrens vfsp->vfs_data = NULL; 534fa9e4066Sahrens 535ea8dc4b6Seschrock if (zfs_create_unique_device(&mount_dev) == -1) { 536ea8dc4b6Seschrock error = ENODEV; 537ea8dc4b6Seschrock goto out; 538ea8dc4b6Seschrock } 539fa9e4066Sahrens ASSERT(vfs_devismounted(mount_dev) == 0); 540fa9e4066Sahrens 541ea8dc4b6Seschrock if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 542ea8dc4b6Seschrock NULL)) 543ea8dc4b6Seschrock goto out; 544fa9e4066Sahrens 545fa9e4066Sahrens vfsp->vfs_dev = mount_dev; 546fa9e4066Sahrens vfsp->vfs_fstype = zfsfstype; 547fa9e4066Sahrens vfsp->vfs_bsize = recordsize; 548fa9e4066Sahrens vfsp->vfs_flag |= VFS_NOTRUNC; 549fa9e4066Sahrens vfsp->vfs_data = zfsvfs; 550fa9e4066Sahrens 551ea8dc4b6Seschrock if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 552fa9e4066Sahrens goto out; 553fa9e4066Sahrens 554fa9e4066Sahrens if (readonly) 555fa9e4066Sahrens mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 556fa9e4066Sahrens else 557fa9e4066Sahrens mode = DS_MODE_PRIMARY; 558fa9e4066Sahrens 559fa9e4066Sahrens error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 560fa9e4066Sahrens if (error == EROFS) { 561fa9e4066Sahrens mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 562fa9e4066Sahrens error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 563fa9e4066Sahrens &zfsvfs->z_os); 564fa9e4066Sahrens } 565fa9e4066Sahrens 566fa9e4066Sahrens if (error) 567fa9e4066Sahrens goto out; 568fa9e4066Sahrens 569fa9e4066Sahrens if (error = zfs_init_fs(zfsvfs, &zp, cr)) 570fa9e4066Sahrens goto out; 571fa9e4066Sahrens 572ea8dc4b6Seschrock /* The call to zfs_init_fs leaves the vnode held, release it here. */ 573ea8dc4b6Seschrock VN_RELE(ZTOV(zp)); 574ea8dc4b6Seschrock 575ea8dc4b6Seschrock if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 5767b55fa8eSck uint64_t xattr; 5777b55fa8eSck 578fa9e4066Sahrens ASSERT(mode & DS_MODE_READONLY); 579fa9e4066Sahrens atime_changed_cb(zfsvfs, B_FALSE); 580fa9e4066Sahrens readonly_changed_cb(zfsvfs, B_TRUE); 5817b55fa8eSck if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) 5827b55fa8eSck goto out; 5837b55fa8eSck xattr_changed_cb(zfsvfs, xattr); 584fa9e4066Sahrens zfsvfs->z_issnap = B_TRUE; 585fa9e4066Sahrens } else { 586ea8dc4b6Seschrock error = zfs_register_callbacks(vfsp); 587ea8dc4b6Seschrock if (error) 588ea8dc4b6Seschrock goto out; 589fa9e4066Sahrens 590e7437265Sahrens if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)) 591e7437265Sahrens zfs_unlinked_drain(zfsvfs); 592fa9e4066Sahrens 593fa9e4066Sahrens /* 594fa9e4066Sahrens * Parse and replay the intent log. 595e7437265Sahrens * 596e7437265Sahrens * Because of ziltest, this must be done after 597e7437265Sahrens * zfs_unlinked_drain(). (Further note: ziltest doesn't 598e7437265Sahrens * use readonly mounts, where zfs_unlinked_drain() isn't 599e7437265Sahrens * called.) This is because ziltest causes spa_sync() 600e7437265Sahrens * to think it's committed, but actually it is not, so 601e7437265Sahrens * the intent log contains many txg's worth of changes. 602e7437265Sahrens * 603e7437265Sahrens * In particular, if object N is in the unlinked set in 604e7437265Sahrens * the last txg to actually sync, then it could be 605e7437265Sahrens * actually freed in a later txg and then reallocated in 606e7437265Sahrens * a yet later txg. This would write a "create object 607e7437265Sahrens * N" record to the intent log. Normally, this would be 608e7437265Sahrens * fine because the spa_sync() would have written out 609e7437265Sahrens * the fact that object N is free, before we could write 610e7437265Sahrens * the "create object N" intent log record. 611e7437265Sahrens * 612e7437265Sahrens * But when we are in ziltest mode, we advance the "open 613e7437265Sahrens * txg" without actually spa_sync()-ing the changes to 614e7437265Sahrens * disk. So we would see that object N is still 615e7437265Sahrens * allocated and in the unlinked set, and there is an 616e7437265Sahrens * intent log record saying to allocate it. 617fa9e4066Sahrens */ 618ea8dc4b6Seschrock zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 619893a6d32Sahrens zfs_replay_vector); 620fa9e4066Sahrens 621fa9e4066Sahrens if (!zil_disable) 622ea8dc4b6Seschrock zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 623ea8dc4b6Seschrock } 624fa9e4066Sahrens 625ea8dc4b6Seschrock if (!zfsvfs->z_issnap) 626ea8dc4b6Seschrock zfsctl_create(zfsvfs); 627ea8dc4b6Seschrock out: 628ea8dc4b6Seschrock if (error) { 629ea8dc4b6Seschrock if (zfsvfs->z_os) 630ea8dc4b6Seschrock dmu_objset_close(zfsvfs->z_os); 631ea8dc4b6Seschrock kmem_free(zfsvfs, sizeof (zfsvfs_t)); 632ea8dc4b6Seschrock } else { 633ea8dc4b6Seschrock atomic_add_32(&zfs_active_fs_count, 1); 634ea8dc4b6Seschrock } 635fa9e4066Sahrens 636ea8dc4b6Seschrock return (error); 637ea8dc4b6Seschrock } 638ea8dc4b6Seschrock 639ea8dc4b6Seschrock void 640ea8dc4b6Seschrock zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 641ea8dc4b6Seschrock { 642ea8dc4b6Seschrock objset_t *os = zfsvfs->z_os; 643ea8dc4b6Seschrock struct dsl_dataset *ds; 644ea8dc4b6Seschrock 645ea8dc4b6Seschrock /* 646ea8dc4b6Seschrock * Unregister properties. 647ea8dc4b6Seschrock */ 648ea8dc4b6Seschrock if (!dmu_objset_is_snapshot(os)) { 649fa9e4066Sahrens ds = dmu_objset_ds(os); 650ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 651fa9e4066Sahrens zfsvfs) == 0); 652fa9e4066Sahrens 6537b55fa8eSck VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 6547b55fa8eSck zfsvfs) == 0); 6557b55fa8eSck 656ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 657fa9e4066Sahrens zfsvfs) == 0); 658fa9e4066Sahrens 659ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 660fa9e4066Sahrens zfsvfs) == 0); 661fa9e4066Sahrens 662ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, 663fa9e4066Sahrens zfsvfs) == 0); 664fa9e4066Sahrens 665ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 666fa9e4066Sahrens zfsvfs) == 0); 667fa9e4066Sahrens 668ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 669fa9e4066Sahrens zfsvfs) == 0); 670fa9e4066Sahrens 671ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 672fa9e4066Sahrens zfsvfs) == 0); 673fa9e4066Sahrens 674ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 675fa9e4066Sahrens zfsvfs) == 0); 676fa9e4066Sahrens 677ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "aclinherit", 678fa9e4066Sahrens acl_inherit_changed_cb, zfsvfs) == 0); 679ea8dc4b6Seschrock } 680ea8dc4b6Seschrock } 681fa9e4066Sahrens 682b1b8ab34Slling /* 683b1b8ab34Slling * Convert a decimal digit string to a uint64_t integer. 684b1b8ab34Slling */ 685b1b8ab34Slling static int 686b1b8ab34Slling str_to_uint64(char *str, uint64_t *objnum) 687b1b8ab34Slling { 688b1b8ab34Slling uint64_t num = 0; 689b1b8ab34Slling 690b1b8ab34Slling while (*str) { 691b1b8ab34Slling if (*str < '0' || *str > '9') 692b1b8ab34Slling return (EINVAL); 693b1b8ab34Slling 694b1b8ab34Slling num = num*10 + *str++ - '0'; 695b1b8ab34Slling } 696b1b8ab34Slling 697b1b8ab34Slling *objnum = num; 698b1b8ab34Slling return (0); 699b1b8ab34Slling } 700b1b8ab34Slling 701b1b8ab34Slling /* 702b1b8ab34Slling * The boot path passed from the boot loader is in the form of 703b1b8ab34Slling * "rootpool-name/root-filesystem-object-number'. Convert this 704b1b8ab34Slling * string to a dataset name: "rootpool-name/root-filesystem-name". 705b1b8ab34Slling */ 706b1b8ab34Slling static int 707b1b8ab34Slling parse_bootpath(char *bpath, char *outpath) 708b1b8ab34Slling { 709b1b8ab34Slling char *slashp; 710b1b8ab34Slling uint64_t objnum; 711b1b8ab34Slling int error; 712b1b8ab34Slling 713b1b8ab34Slling if (*bpath == 0 || *bpath == '/') 714b1b8ab34Slling return (EINVAL); 715b1b8ab34Slling 716b1b8ab34Slling slashp = strchr(bpath, '/'); 717b1b8ab34Slling 718b1b8ab34Slling /* if no '/', just return the pool name */ 719b1b8ab34Slling if (slashp == NULL) { 720b1b8ab34Slling (void) strcpy(outpath, bpath); 721b1b8ab34Slling return (0); 722b1b8ab34Slling } 723b1b8ab34Slling 724b1b8ab34Slling if (error = str_to_uint64(slashp+1, &objnum)) 725b1b8ab34Slling return (error); 726b1b8ab34Slling 727b1b8ab34Slling *slashp = '\0'; 728b1b8ab34Slling error = dsl_dsobj_to_dsname(bpath, objnum, outpath); 729b1b8ab34Slling *slashp = '/'; 730b1b8ab34Slling 731b1b8ab34Slling return (error); 732b1b8ab34Slling } 733b1b8ab34Slling 734ea8dc4b6Seschrock static int 735ea8dc4b6Seschrock zfs_mountroot(vfs_t *vfsp, enum whymountroot why) 736ea8dc4b6Seschrock { 737ea8dc4b6Seschrock int error = 0; 738ea8dc4b6Seschrock int ret = 0; 739ea8dc4b6Seschrock static int zfsrootdone = 0; 740ea8dc4b6Seschrock zfsvfs_t *zfsvfs = NULL; 741ea8dc4b6Seschrock znode_t *zp = NULL; 742ea8dc4b6Seschrock vnode_t *vp = NULL; 743b1b8ab34Slling char *zfs_bootpath; 744ea8dc4b6Seschrock 745ea8dc4b6Seschrock ASSERT(vfsp); 746ea8dc4b6Seschrock 747ea8dc4b6Seschrock /* 748b1b8ab34Slling * The filesystem that we mount as root is defined in the 749b1b8ab34Slling * "zfs-bootfs" property. 750ea8dc4b6Seschrock */ 751ea8dc4b6Seschrock if (why == ROOT_INIT) { 752ea8dc4b6Seschrock if (zfsrootdone++) 753ea8dc4b6Seschrock return (EBUSY); 754fa9e4066Sahrens 755b1b8ab34Slling if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), 756b1b8ab34Slling DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bootpath) != 757b1b8ab34Slling DDI_SUCCESS) 758b1b8ab34Slling return (EIO); 759b1b8ab34Slling 760b1b8ab34Slling error = parse_bootpath(zfs_bootpath, rootfs.bo_name); 761b1b8ab34Slling ddi_prop_free(zfs_bootpath); 762b1b8ab34Slling 763b1b8ab34Slling if (error) 764b1b8ab34Slling return (error); 765fa9e4066Sahrens 766ea8dc4b6Seschrock if (error = vfs_lock(vfsp)) 767ea8dc4b6Seschrock return (error); 768fa9e4066Sahrens 769b1b8ab34Slling if (error = zfs_domount(vfsp, rootfs.bo_name, CRED())) 770ea8dc4b6Seschrock goto out; 771ea8dc4b6Seschrock 772ea8dc4b6Seschrock zfsvfs = (zfsvfs_t *)vfsp->vfs_data; 773ea8dc4b6Seschrock ASSERT(zfsvfs); 774ea8dc4b6Seschrock if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) 775ea8dc4b6Seschrock goto out; 776ea8dc4b6Seschrock 777ea8dc4b6Seschrock vp = ZTOV(zp); 778ea8dc4b6Seschrock mutex_enter(&vp->v_lock); 779ea8dc4b6Seschrock vp->v_flag |= VROOT; 780ea8dc4b6Seschrock mutex_exit(&vp->v_lock); 781ea8dc4b6Seschrock rootvp = vp; 782ea8dc4b6Seschrock 783ea8dc4b6Seschrock /* 784ea8dc4b6Seschrock * The zfs_zget call above returns with a hold on vp, we release 785ea8dc4b6Seschrock * it here. 786ea8dc4b6Seschrock */ 787fa9e4066Sahrens VN_RELE(vp); 788ea8dc4b6Seschrock 789ea8dc4b6Seschrock /* 790ea8dc4b6Seschrock * Mount root as readonly initially, it will be remouted 791ea8dc4b6Seschrock * read/write by /lib/svc/method/fs-usr. 792ea8dc4b6Seschrock */ 793ea8dc4b6Seschrock readonly_changed_cb(vfsp->vfs_data, B_TRUE); 794ea8dc4b6Seschrock vfs_add((struct vnode *)0, vfsp, 795ea8dc4b6Seschrock (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); 796ea8dc4b6Seschrock out: 797ea8dc4b6Seschrock vfs_unlock(vfsp); 798ea8dc4b6Seschrock ret = (error) ? error : 0; 799ea8dc4b6Seschrock return (ret); 800ea8dc4b6Seschrock } else if (why == ROOT_REMOUNT) { 801ea8dc4b6Seschrock readonly_changed_cb(vfsp->vfs_data, B_FALSE); 802ea8dc4b6Seschrock vfsp->vfs_flag |= VFS_REMOUNT; 803*b510d378Slling 804*b510d378Slling /* refresh mount options */ 805*b510d378Slling zfs_unregister_callbacks(vfsp->vfs_data); 806*b510d378Slling return (zfs_register_callbacks(vfsp)); 807*b510d378Slling 808ea8dc4b6Seschrock } else if (why == ROOT_UNMOUNT) { 809ea8dc4b6Seschrock zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); 810ea8dc4b6Seschrock (void) zfs_sync(vfsp, 0, 0); 811ea8dc4b6Seschrock return (0); 812ea8dc4b6Seschrock } 813ea8dc4b6Seschrock 814ea8dc4b6Seschrock /* 815ea8dc4b6Seschrock * if "why" is equal to anything else other than ROOT_INIT, 816ea8dc4b6Seschrock * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. 817ea8dc4b6Seschrock */ 818ea8dc4b6Seschrock return (ENOTSUP); 819ea8dc4b6Seschrock } 820ea8dc4b6Seschrock 821ea8dc4b6Seschrock /*ARGSUSED*/ 822ea8dc4b6Seschrock static int 823ea8dc4b6Seschrock zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 824ea8dc4b6Seschrock { 825ea8dc4b6Seschrock char *osname; 826ea8dc4b6Seschrock pathname_t spn; 827ea8dc4b6Seschrock int error = 0; 828ea8dc4b6Seschrock uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? 829b1b8ab34Slling UIO_SYSSPACE : UIO_USERSPACE; 830ea8dc4b6Seschrock int canwrite; 831ea8dc4b6Seschrock 832ea8dc4b6Seschrock if (mvp->v_type != VDIR) 833ea8dc4b6Seschrock return (ENOTDIR); 834ea8dc4b6Seschrock 835ea8dc4b6Seschrock mutex_enter(&mvp->v_lock); 836ea8dc4b6Seschrock if ((uap->flags & MS_REMOUNT) == 0 && 837ea8dc4b6Seschrock (uap->flags & MS_OVERLAY) == 0 && 838ea8dc4b6Seschrock (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 839ea8dc4b6Seschrock mutex_exit(&mvp->v_lock); 840ea8dc4b6Seschrock return (EBUSY); 841ea8dc4b6Seschrock } 842ea8dc4b6Seschrock mutex_exit(&mvp->v_lock); 843ea8dc4b6Seschrock 844ea8dc4b6Seschrock /* 845ea8dc4b6Seschrock * ZFS does not support passing unparsed data in via MS_DATA. 846ea8dc4b6Seschrock * Users should use the MS_OPTIONSTR interface; this means 847ea8dc4b6Seschrock * that all option parsing is already done and the options struct 848ea8dc4b6Seschrock * can be interrogated. 849ea8dc4b6Seschrock */ 850ea8dc4b6Seschrock if ((uap->flags & MS_DATA) && uap->datalen > 0) 851ea8dc4b6Seschrock return (EINVAL); 852ea8dc4b6Seschrock 853ea8dc4b6Seschrock /* 854ea8dc4b6Seschrock * Get the objset name (the "special" mount argument). 855ea8dc4b6Seschrock */ 856ea8dc4b6Seschrock if (error = pn_get(uap->spec, fromspace, &spn)) 857ea8dc4b6Seschrock return (error); 858ea8dc4b6Seschrock 859ea8dc4b6Seschrock osname = spn.pn_path; 860ea8dc4b6Seschrock 861ecd6cf80Smarks /* 862ecd6cf80Smarks * Check for mount privilege? 863ecd6cf80Smarks * 864ecd6cf80Smarks * If we don't have privilege then see if 865ecd6cf80Smarks * we have local permission to allow it 866ecd6cf80Smarks */ 867ecd6cf80Smarks error = secpolicy_fs_mount(cr, mvp, vfsp); 868ecd6cf80Smarks if (error) { 869ecd6cf80Smarks error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 870ecd6cf80Smarks if (error == 0) { 871ecd6cf80Smarks vattr_t vattr; 872ecd6cf80Smarks 873ecd6cf80Smarks /* 874ecd6cf80Smarks * Make sure user is the owner of the mount point 875ecd6cf80Smarks * or has sufficient privileges. 876ecd6cf80Smarks */ 877ecd6cf80Smarks 878ecd6cf80Smarks vattr.va_mask = AT_UID; 879ecd6cf80Smarks 880ecd6cf80Smarks if (VOP_GETATTR(mvp, &vattr, 0, cr)) { 881ecd6cf80Smarks goto out; 882ecd6cf80Smarks } 883ecd6cf80Smarks 884ecd6cf80Smarks if (error = secpolicy_vnode_owner(cr, vattr.va_uid)) { 885ecd6cf80Smarks goto out; 886ecd6cf80Smarks } 887ecd6cf80Smarks 888ecd6cf80Smarks if (error = VOP_ACCESS(mvp, VWRITE, 0, cr)) { 889ecd6cf80Smarks goto out; 890ecd6cf80Smarks } 891ecd6cf80Smarks 892ecd6cf80Smarks secpolicy_fs_mount_clearopts(cr, vfsp); 893ecd6cf80Smarks } else { 894ecd6cf80Smarks goto out; 895ecd6cf80Smarks } 896ecd6cf80Smarks } 897ea8dc4b6Seschrock 898ea8dc4b6Seschrock /* 899ea8dc4b6Seschrock * Refuse to mount a filesystem if we are in a local zone and the 900ea8dc4b6Seschrock * dataset is not visible. 901ea8dc4b6Seschrock */ 902ea8dc4b6Seschrock if (!INGLOBALZONE(curproc) && 903ea8dc4b6Seschrock (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 904ea8dc4b6Seschrock error = EPERM; 905ea8dc4b6Seschrock goto out; 906ea8dc4b6Seschrock } 907ea8dc4b6Seschrock 908*b510d378Slling /* 909*b510d378Slling * When doing a remount, we simply refresh our temporary properties 910*b510d378Slling * according to those options set in the current VFS options. 911*b510d378Slling */ 912*b510d378Slling if (uap->flags & MS_REMOUNT) { 913*b510d378Slling /* refresh mount options */ 914*b510d378Slling zfs_unregister_callbacks(vfsp->vfs_data); 915*b510d378Slling error = zfs_register_callbacks(vfsp); 916*b510d378Slling goto out; 917*b510d378Slling } 918*b510d378Slling 919ea8dc4b6Seschrock error = zfs_domount(vfsp, osname, cr); 920ea8dc4b6Seschrock 921ea8dc4b6Seschrock out: 922fa9e4066Sahrens pn_free(&spn); 923fa9e4066Sahrens return (error); 924fa9e4066Sahrens } 925fa9e4066Sahrens 926fa9e4066Sahrens static int 927fa9e4066Sahrens zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) 928fa9e4066Sahrens { 929fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 930fa9e4066Sahrens dev32_t d32; 931a2eea2e1Sahrens uint64_t refdbytes, availbytes, usedobjs, availobjs; 932fa9e4066Sahrens 933fa9e4066Sahrens ZFS_ENTER(zfsvfs); 934fa9e4066Sahrens 935a2eea2e1Sahrens dmu_objset_space(zfsvfs->z_os, 936a2eea2e1Sahrens &refdbytes, &availbytes, &usedobjs, &availobjs); 937fa9e4066Sahrens 938fa9e4066Sahrens /* 939fa9e4066Sahrens * The underlying storage pool actually uses multiple block sizes. 940fa9e4066Sahrens * We report the fragsize as the smallest block size we support, 941fa9e4066Sahrens * and we report our blocksize as the filesystem's maximum blocksize. 942fa9e4066Sahrens */ 943fa9e4066Sahrens statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; 944fa9e4066Sahrens statp->f_bsize = zfsvfs->z_max_blksz; 945fa9e4066Sahrens 946fa9e4066Sahrens /* 947fa9e4066Sahrens * The following report "total" blocks of various kinds in the 948fa9e4066Sahrens * file system, but reported in terms of f_frsize - the 949fa9e4066Sahrens * "fragment" size. 950fa9e4066Sahrens */ 951fa9e4066Sahrens 952a2eea2e1Sahrens statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 953a2eea2e1Sahrens statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT; 954fa9e4066Sahrens statp->f_bavail = statp->f_bfree; /* no root reservation */ 955fa9e4066Sahrens 956fa9e4066Sahrens /* 957fa9e4066Sahrens * statvfs() should really be called statufs(), because it assumes 958fa9e4066Sahrens * static metadata. ZFS doesn't preallocate files, so the best 959fa9e4066Sahrens * we can do is report the max that could possibly fit in f_files, 960fa9e4066Sahrens * and that minus the number actually used in f_ffree. 961fa9e4066Sahrens * For f_ffree, report the smaller of the number of object available 962fa9e4066Sahrens * and the number of blocks (each object will take at least a block). 963fa9e4066Sahrens */ 964a2eea2e1Sahrens statp->f_ffree = MIN(availobjs, statp->f_bfree); 965fa9e4066Sahrens statp->f_favail = statp->f_ffree; /* no "root reservation" */ 966a2eea2e1Sahrens statp->f_files = statp->f_ffree + usedobjs; 967fa9e4066Sahrens 968fa9e4066Sahrens (void) cmpldev(&d32, vfsp->vfs_dev); 969fa9e4066Sahrens statp->f_fsid = d32; 970fa9e4066Sahrens 971fa9e4066Sahrens /* 972fa9e4066Sahrens * We're a zfs filesystem. 973fa9e4066Sahrens */ 974fa9e4066Sahrens (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); 975fa9e4066Sahrens 976a5be7ebbSmarks statp->f_flag = vf_to_stf(vfsp->vfs_flag); 977fa9e4066Sahrens 978fa9e4066Sahrens statp->f_namemax = ZFS_MAXNAMELEN; 979fa9e4066Sahrens 980fa9e4066Sahrens /* 981fa9e4066Sahrens * We have all of 32 characters to stuff a string here. 982fa9e4066Sahrens * Is there anything useful we could/should provide? 983fa9e4066Sahrens */ 984fa9e4066Sahrens bzero(statp->f_fstr, sizeof (statp->f_fstr)); 985fa9e4066Sahrens 986fa9e4066Sahrens ZFS_EXIT(zfsvfs); 987fa9e4066Sahrens return (0); 988fa9e4066Sahrens } 989fa9e4066Sahrens 990fa9e4066Sahrens static int 991fa9e4066Sahrens zfs_root(vfs_t *vfsp, vnode_t **vpp) 992fa9e4066Sahrens { 993fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 994fa9e4066Sahrens znode_t *rootzp; 995fa9e4066Sahrens int error; 996fa9e4066Sahrens 997fa9e4066Sahrens ZFS_ENTER(zfsvfs); 998fa9e4066Sahrens 999fa9e4066Sahrens error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1000fa9e4066Sahrens if (error == 0) 1001fa9e4066Sahrens *vpp = ZTOV(rootzp); 1002fa9e4066Sahrens 1003fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1004fa9e4066Sahrens return (error); 1005fa9e4066Sahrens } 1006fa9e4066Sahrens 1007fa9e4066Sahrens /*ARGSUSED*/ 1008fa9e4066Sahrens static int 1009fa9e4066Sahrens zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) 1010fa9e4066Sahrens { 1011fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1012fa9e4066Sahrens int ret; 1013fa9e4066Sahrens 1014ecd6cf80Smarks ret = secpolicy_fs_unmount(cr, vfsp); 1015ecd6cf80Smarks if (ret) { 1016ecd6cf80Smarks ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 1017ecd6cf80Smarks ZFS_DELEG_PERM_MOUNT, cr); 1018ecd6cf80Smarks if (ret) 1019ecd6cf80Smarks return (ret); 1020ecd6cf80Smarks } 1021033f9833Sek 1022033f9833Sek (void) dnlc_purge_vfsp(vfsp, 0); 1023033f9833Sek 1024fa9e4066Sahrens /* 1025fa9e4066Sahrens * Unmount any snapshots mounted under .zfs before unmounting the 1026fa9e4066Sahrens * dataset itself. 1027fa9e4066Sahrens */ 1028fa9e4066Sahrens if (zfsvfs->z_ctldir != NULL && 1029ecd6cf80Smarks (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) { 1030fa9e4066Sahrens return (ret); 1031ecd6cf80Smarks } 1032fa9e4066Sahrens 1033fa9e4066Sahrens if (fflag & MS_FORCE) { 1034fa9e4066Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 1035fa9e4066Sahrens zfsvfs->z_unmounted1 = B_TRUE; 1036fa9e4066Sahrens 1037758f6e0bSgw /* 1038758f6e0bSgw * Ensure that z_unmounted1 reaches global visibility 1039758f6e0bSgw * before z_op_cnt. 1040758f6e0bSgw */ 1041758f6e0bSgw membar_producer(); 1042758f6e0bSgw 1043fa9e4066Sahrens /* 1044fa9e4066Sahrens * Wait for all zfs threads to leave zfs. 1045fa9e4066Sahrens * Grabbing a rwlock as reader in all vops and 1046fa9e4066Sahrens * as writer here doesn't work because it too easy to get 1047fa9e4066Sahrens * multiple reader enters as zfs can re-enter itself. 1048fa9e4066Sahrens * This can lead to deadlock if there is an intervening 1049fa9e4066Sahrens * rw_enter as writer. 1050fa9e4066Sahrens * So a file system threads ref count (z_op_cnt) is used. 1051fa9e4066Sahrens * A polling loop on z_op_cnt may seem inefficient, but 1052fa9e4066Sahrens * - this saves all threads on exit from having to grab a 1053fa9e4066Sahrens * mutex in order to cv_signal 1054fa9e4066Sahrens * - only occurs on forced unmount in the rare case when 1055fa9e4066Sahrens * there are outstanding threads within the file system. 1056fa9e4066Sahrens */ 1057fa9e4066Sahrens while (zfsvfs->z_op_cnt) { 1058fa9e4066Sahrens delay(1); 1059fa9e4066Sahrens } 1060fa9e4066Sahrens 1061fa9e4066Sahrens zfs_objset_close(zfsvfs); 1062fa9e4066Sahrens 1063fa9e4066Sahrens return (0); 1064fa9e4066Sahrens } 1065fa9e4066Sahrens /* 1066fa9e4066Sahrens * Check the number of active vnodes in the file system. 1067fa9e4066Sahrens * Our count is maintained in the vfs structure, but the number 1068fa9e4066Sahrens * is off by 1 to indicate a hold on the vfs structure itself. 1069fa9e4066Sahrens * 1070fa9e4066Sahrens * The '.zfs' directory maintains a reference of its own, and any active 1071fa9e4066Sahrens * references underneath are reflected in the vnode count. 1072fa9e4066Sahrens */ 1073fa9e4066Sahrens if (zfsvfs->z_ctldir == NULL) { 1074893a6d32Sahrens if (vfsp->vfs_count > 1) 1075fa9e4066Sahrens return (EBUSY); 1076fa9e4066Sahrens } else { 1077fa9e4066Sahrens if (vfsp->vfs_count > 2 || 1078fa9e4066Sahrens (zfsvfs->z_ctldir->v_count > 1 && !(fflag & MS_FORCE))) { 1079fa9e4066Sahrens return (EBUSY); 1080fa9e4066Sahrens } 1081fa9e4066Sahrens } 1082fa9e4066Sahrens 1083fa9e4066Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 1084fa9e4066Sahrens zfs_objset_close(zfsvfs); 1085fa9e4066Sahrens 1086fa9e4066Sahrens return (0); 1087fa9e4066Sahrens } 1088fa9e4066Sahrens 1089fa9e4066Sahrens static int 1090fa9e4066Sahrens zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1091fa9e4066Sahrens { 1092fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1093fa9e4066Sahrens znode_t *zp; 1094fa9e4066Sahrens uint64_t object = 0; 1095fa9e4066Sahrens uint64_t fid_gen = 0; 1096fa9e4066Sahrens uint64_t gen_mask; 1097fa9e4066Sahrens uint64_t zp_gen; 1098fa9e4066Sahrens int i, err; 1099fa9e4066Sahrens 1100fa9e4066Sahrens *vpp = NULL; 1101fa9e4066Sahrens 1102fa9e4066Sahrens ZFS_ENTER(zfsvfs); 1103fa9e4066Sahrens 1104fa9e4066Sahrens if (fidp->fid_len == LONG_FID_LEN) { 1105fa9e4066Sahrens zfid_long_t *zlfid = (zfid_long_t *)fidp; 1106fa9e4066Sahrens uint64_t objsetid = 0; 1107fa9e4066Sahrens uint64_t setgen = 0; 1108fa9e4066Sahrens 1109fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1110fa9e4066Sahrens objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1111fa9e4066Sahrens 1112fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1113fa9e4066Sahrens setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1114fa9e4066Sahrens 1115fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1116fa9e4066Sahrens 1117fa9e4066Sahrens err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1118fa9e4066Sahrens if (err) 1119fa9e4066Sahrens return (EINVAL); 1120fa9e4066Sahrens ZFS_ENTER(zfsvfs); 1121fa9e4066Sahrens } 1122fa9e4066Sahrens 1123fa9e4066Sahrens if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1124fa9e4066Sahrens zfid_short_t *zfid = (zfid_short_t *)fidp; 1125fa9e4066Sahrens 1126fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 1127fa9e4066Sahrens object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1128fa9e4066Sahrens 1129fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 1130fa9e4066Sahrens fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1131fa9e4066Sahrens } else { 1132fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1133fa9e4066Sahrens return (EINVAL); 1134fa9e4066Sahrens } 1135fa9e4066Sahrens 1136fa9e4066Sahrens /* A zero fid_gen means we are in the .zfs control directories */ 1137fa9e4066Sahrens if (fid_gen == 0 && 1138fa9e4066Sahrens (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1139fa9e4066Sahrens *vpp = zfsvfs->z_ctldir; 1140fa9e4066Sahrens ASSERT(*vpp != NULL); 1141fa9e4066Sahrens if (object == ZFSCTL_INO_SNAPDIR) { 1142fa9e4066Sahrens VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1143fa9e4066Sahrens 0, NULL, NULL) == 0); 1144fa9e4066Sahrens } else { 1145fa9e4066Sahrens VN_HOLD(*vpp); 1146fa9e4066Sahrens } 1147fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1148fa9e4066Sahrens return (0); 1149fa9e4066Sahrens } 1150fa9e4066Sahrens 1151fa9e4066Sahrens gen_mask = -1ULL >> (64 - 8 * i); 1152fa9e4066Sahrens 1153fa9e4066Sahrens dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1154fa9e4066Sahrens if (err = zfs_zget(zfsvfs, object, &zp)) { 1155fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1156fa9e4066Sahrens return (err); 1157fa9e4066Sahrens } 1158fa9e4066Sahrens zp_gen = zp->z_phys->zp_gen & gen_mask; 1159fa9e4066Sahrens if (zp_gen == 0) 1160fa9e4066Sahrens zp_gen = 1; 1161893a6d32Sahrens if (zp->z_unlinked || zp_gen != fid_gen) { 1162fa9e4066Sahrens dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1163fa9e4066Sahrens VN_RELE(ZTOV(zp)); 1164fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1165fa9e4066Sahrens return (EINVAL); 1166fa9e4066Sahrens } 1167fa9e4066Sahrens 1168fa9e4066Sahrens *vpp = ZTOV(zp); 1169fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1170fa9e4066Sahrens return (0); 1171fa9e4066Sahrens } 1172fa9e4066Sahrens 1173fa9e4066Sahrens static void 1174fa9e4066Sahrens zfs_objset_close(zfsvfs_t *zfsvfs) 1175fa9e4066Sahrens { 1176fa9e4066Sahrens znode_t *zp, *nextzp; 1177fa9e4066Sahrens objset_t *os = zfsvfs->z_os; 1178fa9e4066Sahrens 1179fa9e4066Sahrens /* 1180fa9e4066Sahrens * For forced unmount, at this point all vops except zfs_inactive 1181fa9e4066Sahrens * are erroring EIO. We need to now suspend zfs_inactive threads 1182fa9e4066Sahrens * while we are freeing dbufs before switching zfs_inactive 1183fa9e4066Sahrens * to use behaviour without a objset. 1184fa9e4066Sahrens */ 1185fa9e4066Sahrens rw_enter(&zfsvfs->z_um_lock, RW_WRITER); 1186fa9e4066Sahrens 1187fa9e4066Sahrens /* 1188fa9e4066Sahrens * Release all holds on dbufs 1189fa9e4066Sahrens * Note, although we have stopped all other vop threads and 1190fa9e4066Sahrens * zfs_inactive(), the dmu can callback via znode_pageout_func() 1191fa9e4066Sahrens * which can zfs_znode_free() the znode. 1192fa9e4066Sahrens * So we lock z_all_znodes; search the list for a held 1193fa9e4066Sahrens * dbuf; drop the lock (we know zp can't disappear if we hold 1194fa9e4066Sahrens * a dbuf lock; then regrab the lock and restart. 1195fa9e4066Sahrens */ 1196fa9e4066Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 1197fa9e4066Sahrens for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { 1198fa9e4066Sahrens nextzp = list_next(&zfsvfs->z_all_znodes, zp); 1199fa9e4066Sahrens if (zp->z_dbuf_held) { 1200fa9e4066Sahrens /* dbufs should only be held when force unmounting */ 1201fa9e4066Sahrens zp->z_dbuf_held = 0; 1202fa9e4066Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 1203ea8dc4b6Seschrock dmu_buf_rele(zp->z_dbuf, NULL); 1204fa9e4066Sahrens /* Start again */ 1205fa9e4066Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 1206fa9e4066Sahrens nextzp = list_head(&zfsvfs->z_all_znodes); 1207fa9e4066Sahrens } 1208fa9e4066Sahrens } 1209fa9e4066Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 1210fa9e4066Sahrens 1211fa9e4066Sahrens /* 1212fa9e4066Sahrens * Unregister properties. 1213fa9e4066Sahrens */ 1214ea8dc4b6Seschrock if (!dmu_objset_is_snapshot(os)) 1215ea8dc4b6Seschrock zfs_unregister_callbacks(zfsvfs); 1216fa9e4066Sahrens 1217fa9e4066Sahrens /* 1218fa9e4066Sahrens * Switch zfs_inactive to behaviour without an objset. 1219fa9e4066Sahrens * It just tosses cached pages and frees the znode & vnode. 1220fa9e4066Sahrens * Then re-enable zfs_inactive threads in that new behaviour. 1221fa9e4066Sahrens */ 1222fa9e4066Sahrens zfsvfs->z_unmounted2 = B_TRUE; 1223fa9e4066Sahrens rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */ 1224fa9e4066Sahrens 1225fa9e4066Sahrens /* 1226fa9e4066Sahrens * Close the zil. Can't close the zil while zfs_inactive 1227fa9e4066Sahrens * threads are blocked as zil_close can call zfs_inactive. 1228fa9e4066Sahrens */ 1229fa9e4066Sahrens if (zfsvfs->z_log) { 1230fa9e4066Sahrens zil_close(zfsvfs->z_log); 1231fa9e4066Sahrens zfsvfs->z_log = NULL; 1232fa9e4066Sahrens } 1233fa9e4066Sahrens 1234ea8dc4b6Seschrock /* 1235ea8dc4b6Seschrock * Evict all dbufs so that cached znodes will be freed 1236ea8dc4b6Seschrock */ 1237436b2950Sperrin if (dmu_objset_evict_dbufs(os, 1)) { 1238436b2950Sperrin txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1239436b2950Sperrin (void) dmu_objset_evict_dbufs(os, 0); 1240436b2950Sperrin } 1241ea8dc4b6Seschrock 1242fa9e4066Sahrens /* 1243fa9e4066Sahrens * Finally close the objset 1244fa9e4066Sahrens */ 1245fa9e4066Sahrens dmu_objset_close(os); 1246fa9e4066Sahrens 12478afd4dd6Sperrin /* 12488afd4dd6Sperrin * We can now safely destroy the '.zfs' directory node. 12498afd4dd6Sperrin */ 12508afd4dd6Sperrin if (zfsvfs->z_ctldir != NULL) 12518afd4dd6Sperrin zfsctl_destroy(zfsvfs); 12528afd4dd6Sperrin 1253fa9e4066Sahrens } 1254fa9e4066Sahrens 1255fa9e4066Sahrens static void 1256fa9e4066Sahrens zfs_freevfs(vfs_t *vfsp) 1257fa9e4066Sahrens { 1258fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1259fa9e4066Sahrens 1260fa9e4066Sahrens kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1261fa9e4066Sahrens 1262fa9e4066Sahrens atomic_add_32(&zfs_active_fs_count, -1); 1263fa9e4066Sahrens } 1264fa9e4066Sahrens 1265fa9e4066Sahrens /* 1266fa9e4066Sahrens * VFS_INIT() initialization. Note that there is no VFS_FINI(), 1267fa9e4066Sahrens * so we can't safely do any non-idempotent initialization here. 1268fa9e4066Sahrens * Leave that to zfs_init() and zfs_fini(), which are called 1269fa9e4066Sahrens * from the module's _init() and _fini() entry points. 1270fa9e4066Sahrens */ 1271fa9e4066Sahrens /*ARGSUSED*/ 1272fa9e4066Sahrens static int 1273fa9e4066Sahrens zfs_vfsinit(int fstype, char *name) 1274fa9e4066Sahrens { 1275fa9e4066Sahrens int error; 1276fa9e4066Sahrens 1277fa9e4066Sahrens zfsfstype = fstype; 1278fa9e4066Sahrens 1279fa9e4066Sahrens /* 1280fa9e4066Sahrens * Setup vfsops and vnodeops tables. 1281fa9e4066Sahrens */ 1282fa9e4066Sahrens error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); 1283fa9e4066Sahrens if (error != 0) { 1284fa9e4066Sahrens cmn_err(CE_WARN, "zfs: bad vfs ops template"); 1285fa9e4066Sahrens } 1286fa9e4066Sahrens 1287fa9e4066Sahrens error = zfs_create_op_tables(); 1288fa9e4066Sahrens if (error) { 1289fa9e4066Sahrens zfs_remove_op_tables(); 1290fa9e4066Sahrens cmn_err(CE_WARN, "zfs: bad vnode ops template"); 1291fa9e4066Sahrens (void) vfs_freevfsops_by_type(zfsfstype); 1292fa9e4066Sahrens return (error); 1293fa9e4066Sahrens } 1294fa9e4066Sahrens 1295fa9e4066Sahrens mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); 1296fa9e4066Sahrens 1297fa9e4066Sahrens /* 1298a0965f35Sbonwick * Unique major number for all zfs mounts. 1299a0965f35Sbonwick * If we run out of 32-bit minors, we'll getudev() another major. 1300fa9e4066Sahrens */ 1301a0965f35Sbonwick zfs_major = ddi_name_to_major(ZFS_DRIVER); 1302a0965f35Sbonwick zfs_minor = ZFS_MIN_MINOR; 1303fa9e4066Sahrens 1304fa9e4066Sahrens return (0); 1305fa9e4066Sahrens } 1306fa9e4066Sahrens 1307fa9e4066Sahrens void 1308fa9e4066Sahrens zfs_init(void) 1309fa9e4066Sahrens { 1310fa9e4066Sahrens /* 1311fa9e4066Sahrens * Initialize .zfs directory structures 1312fa9e4066Sahrens */ 1313fa9e4066Sahrens zfsctl_init(); 1314fa9e4066Sahrens 1315fa9e4066Sahrens /* 1316fa9e4066Sahrens * Initialize znode cache, vnode ops, etc... 1317fa9e4066Sahrens */ 1318fa9e4066Sahrens zfs_znode_init(); 1319fa9e4066Sahrens } 1320fa9e4066Sahrens 1321fa9e4066Sahrens void 1322fa9e4066Sahrens zfs_fini(void) 1323fa9e4066Sahrens { 1324fa9e4066Sahrens zfsctl_fini(); 1325fa9e4066Sahrens zfs_znode_fini(); 1326fa9e4066Sahrens } 1327fa9e4066Sahrens 1328fa9e4066Sahrens int 1329fa9e4066Sahrens zfs_busy(void) 1330fa9e4066Sahrens { 1331fa9e4066Sahrens return (zfs_active_fs_count != 0); 1332fa9e4066Sahrens } 1333fa9e4066Sahrens 1334e7437265Sahrens int 1335e7437265Sahrens zfs_get_stats(objset_t *os, nvlist_t *nv) 1336e7437265Sahrens { 1337e7437265Sahrens int error; 1338e7437265Sahrens uint64_t val; 1339e7437265Sahrens 1340e7437265Sahrens error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, &val); 1341e7437265Sahrens if (error == 0) 1342e7437265Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VERSION, val); 1343e7437265Sahrens 1344e7437265Sahrens return (error); 1345e7437265Sahrens } 1346e7437265Sahrens 1347e7437265Sahrens int 1348e7437265Sahrens zfs_set_version(const char *name, uint64_t newvers) 1349e7437265Sahrens { 1350e7437265Sahrens int error; 1351e7437265Sahrens objset_t *os; 1352e7437265Sahrens dmu_tx_t *tx; 1353e7437265Sahrens uint64_t curvers; 1354e7437265Sahrens 1355e7437265Sahrens /* 1356e7437265Sahrens * XXX for now, require that the filesystem be unmounted. Would 1357e7437265Sahrens * be nice to find the zfsvfs_t and just update that if 1358e7437265Sahrens * possible. 1359e7437265Sahrens */ 1360e7437265Sahrens 1361e7437265Sahrens if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 1362e7437265Sahrens return (EINVAL); 1363e7437265Sahrens 1364e7437265Sahrens error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_PRIMARY, &os); 1365e7437265Sahrens if (error) 1366e7437265Sahrens return (error); 1367e7437265Sahrens 1368e7437265Sahrens error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 1369e7437265Sahrens 8, 1, &curvers); 1370e7437265Sahrens if (error) 1371e7437265Sahrens goto out; 1372e7437265Sahrens if (newvers < curvers) { 1373e7437265Sahrens error = EINVAL; 1374e7437265Sahrens goto out; 1375e7437265Sahrens } 1376e7437265Sahrens 1377e7437265Sahrens tx = dmu_tx_create(os); 1378e7437265Sahrens dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR); 1379e7437265Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 1380e7437265Sahrens if (error) { 1381e7437265Sahrens dmu_tx_abort(tx); 1382e7437265Sahrens goto out; 1383e7437265Sahrens } 1384e7437265Sahrens error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, 1385e7437265Sahrens &newvers, tx); 1386e7437265Sahrens 1387e7437265Sahrens spa_history_internal_log(LOG_DS_UPGRADE, 1388e7437265Sahrens dmu_objset_spa(os), tx, CRED(), 1389e7437265Sahrens "oldver=%llu newver=%llu dataset = %llu", curvers, newvers, 1390e7437265Sahrens dmu_objset_id(os)); 1391e7437265Sahrens dmu_tx_commit(tx); 1392e7437265Sahrens 1393e7437265Sahrens out: 1394e7437265Sahrens dmu_objset_close(os); 1395e7437265Sahrens return (error); 1396e7437265Sahrens } 1397e7437265Sahrens 1398fa9e4066Sahrens static vfsdef_t vfw = { 1399fa9e4066Sahrens VFSDEF_VERSION, 1400fa9e4066Sahrens MNTTYPE_ZFS, 1401fa9e4066Sahrens zfs_vfsinit, 14025a59a8b3Srsb VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS, 1403fa9e4066Sahrens &zfs_mntopts 1404fa9e4066Sahrens }; 1405fa9e4066Sahrens 1406fa9e4066Sahrens struct modlfs zfs_modlfs = { 1407e7437265Sahrens &mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw 1408fa9e4066Sahrens }; 1409