1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5033f9833Sek * Common Development and Distribution License (the "License"). 6033f9833Sek * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22893a6d32Sahrens * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 2678077464Sck #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens #include <sys/types.h> 29fa9e4066Sahrens #include <sys/param.h> 30fa9e4066Sahrens #include <sys/systm.h> 31fa9e4066Sahrens #include <sys/sysmacros.h> 32fa9e4066Sahrens #include <sys/kmem.h> 33fa9e4066Sahrens #include <sys/pathname.h> 34fa9e4066Sahrens #include <sys/vnode.h> 35fa9e4066Sahrens #include <sys/vfs.h> 36aa59c4cbSrsb #include <sys/vfs_opreg.h> 37fa9e4066Sahrens #include <sys/mntent.h> 38fa9e4066Sahrens #include <sys/mount.h> 39fa9e4066Sahrens #include <sys/cmn_err.h> 40fa9e4066Sahrens #include "fs/fs_subr.h" 41fa9e4066Sahrens #include <sys/zfs_znode.h> 42893a6d32Sahrens #include <sys/zfs_dir.h> 43*da6c28aaSamw #include <sys/zfs_i18n.h> 44fa9e4066Sahrens #include <sys/zil.h> 45fa9e4066Sahrens #include <sys/fs/zfs.h> 46fa9e4066Sahrens #include <sys/dmu.h> 47fa9e4066Sahrens #include <sys/dsl_prop.h> 48b1b8ab34Slling #include <sys/dsl_dataset.h> 49ecd6cf80Smarks #include <sys/dsl_deleg.h> 50fa9e4066Sahrens #include <sys/spa.h> 51fa9e4066Sahrens #include <sys/zap.h> 52fa9e4066Sahrens #include <sys/varargs.h> 53fa9e4066Sahrens #include <sys/policy.h> 54fa9e4066Sahrens #include <sys/atomic.h> 55fa9e4066Sahrens #include <sys/mkdev.h> 56fa9e4066Sahrens #include <sys/modctl.h> 57ecd6cf80Smarks #include <sys/refstr.h> 58fa9e4066Sahrens #include <sys/zfs_ioctl.h> 59fa9e4066Sahrens #include <sys/zfs_ctldir.h> 60*da6c28aaSamw #include <sys/zfs_fuid.h> 61ea8dc4b6Seschrock #include <sys/bootconf.h> 62a0965f35Sbonwick #include <sys/sunddi.h> 63033f9833Sek #include <sys/dnlc.h> 64f18faf3fSek #include <sys/dmu_objset.h> 65fa9e4066Sahrens 66fa9e4066Sahrens int zfsfstype; 67fa9e4066Sahrens vfsops_t *zfs_vfsops = NULL; 68a0965f35Sbonwick static major_t zfs_major; 69fa9e4066Sahrens static minor_t zfs_minor; 70fa9e4066Sahrens static kmutex_t zfs_dev_mtx; 71fa9e4066Sahrens 72fa9e4066Sahrens static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); 73fa9e4066Sahrens static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); 74ea8dc4b6Seschrock static int zfs_mountroot(vfs_t *vfsp, enum whymountroot); 75fa9e4066Sahrens static int zfs_root(vfs_t *vfsp, vnode_t **vpp); 76fa9e4066Sahrens static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); 77fa9e4066Sahrens static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); 78fa9e4066Sahrens static void zfs_freevfs(vfs_t *vfsp); 79fa9e4066Sahrens 80fa9e4066Sahrens static const fs_operation_def_t zfs_vfsops_template[] = { 81aa59c4cbSrsb VFSNAME_MOUNT, { .vfs_mount = zfs_mount }, 82aa59c4cbSrsb VFSNAME_MOUNTROOT, { .vfs_mountroot = zfs_mountroot }, 83aa59c4cbSrsb VFSNAME_UNMOUNT, { .vfs_unmount = zfs_umount }, 84aa59c4cbSrsb VFSNAME_ROOT, { .vfs_root = zfs_root }, 85aa59c4cbSrsb VFSNAME_STATVFS, { .vfs_statvfs = zfs_statvfs }, 86aa59c4cbSrsb VFSNAME_SYNC, { .vfs_sync = zfs_sync }, 87aa59c4cbSrsb VFSNAME_VGET, { .vfs_vget = zfs_vget }, 88aa59c4cbSrsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 89aa59c4cbSrsb NULL, NULL 90fa9e4066Sahrens }; 91fa9e4066Sahrens 92fa9e4066Sahrens static const fs_operation_def_t zfs_vfsops_eio_template[] = { 93aa59c4cbSrsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 94aa59c4cbSrsb NULL, NULL 95fa9e4066Sahrens }; 96fa9e4066Sahrens 97fa9e4066Sahrens /* 98fa9e4066Sahrens * We need to keep a count of active fs's. 99fa9e4066Sahrens * This is necessary to prevent our module 100fa9e4066Sahrens * from being unloaded after a umount -f 101fa9e4066Sahrens */ 102fa9e4066Sahrens static uint32_t zfs_active_fs_count = 0; 103fa9e4066Sahrens 104fa9e4066Sahrens static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; 105fa9e4066Sahrens static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; 1067b55fa8eSck static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 1077b55fa8eSck static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 108fa9e4066Sahrens 1097b55fa8eSck /* 110b510d378Slling * MO_DEFAULT is not used since the default value is determined 111b510d378Slling * by the equivalent property. 1127b55fa8eSck */ 113fa9e4066Sahrens static mntopt_t mntopts[] = { 1147b55fa8eSck { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL }, 1157b55fa8eSck { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL }, 116b510d378Slling { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL }, 117fa9e4066Sahrens { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } 118fa9e4066Sahrens }; 119fa9e4066Sahrens 120fa9e4066Sahrens static mntopts_t zfs_mntopts = { 121fa9e4066Sahrens sizeof (mntopts) / sizeof (mntopt_t), 122fa9e4066Sahrens mntopts 123fa9e4066Sahrens }; 124fa9e4066Sahrens 125fa9e4066Sahrens /*ARGSUSED*/ 126fa9e4066Sahrens int 127fa9e4066Sahrens zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 128fa9e4066Sahrens { 129fa9e4066Sahrens /* 130fa9e4066Sahrens * Data integrity is job one. We don't want a compromised kernel 131fa9e4066Sahrens * writing to the storage pool, so we never sync during panic. 132fa9e4066Sahrens */ 133fa9e4066Sahrens if (panicstr) 134fa9e4066Sahrens return (0); 135fa9e4066Sahrens 136fa9e4066Sahrens /* 137fa9e4066Sahrens * SYNC_ATTR is used by fsflush() to force old filesystems like UFS 138fa9e4066Sahrens * to sync metadata, which they would otherwise cache indefinitely. 139fa9e4066Sahrens * Semantically, the only requirement is that the sync be initiated. 140fa9e4066Sahrens * The DMU syncs out txgs frequently, so there's nothing to do. 141fa9e4066Sahrens */ 142fa9e4066Sahrens if (flag & SYNC_ATTR) 143fa9e4066Sahrens return (0); 144fa9e4066Sahrens 145fa9e4066Sahrens if (vfsp != NULL) { 146fa9e4066Sahrens /* 147fa9e4066Sahrens * Sync a specific filesystem. 148fa9e4066Sahrens */ 149fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 150fa9e4066Sahrens 151fa9e4066Sahrens ZFS_ENTER(zfsvfs); 152fa9e4066Sahrens if (zfsvfs->z_log != NULL) 153b19a79ecSperrin zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 154fa9e4066Sahrens else 155fa9e4066Sahrens txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 156fa9e4066Sahrens ZFS_EXIT(zfsvfs); 157fa9e4066Sahrens } else { 158fa9e4066Sahrens /* 159fa9e4066Sahrens * Sync all ZFS filesystems. This is what happens when you 160fa9e4066Sahrens * run sync(1M). Unlike other filesystems, ZFS honors the 161fa9e4066Sahrens * request by waiting for all pools to commit all dirty data. 162fa9e4066Sahrens */ 163fa9e4066Sahrens spa_sync_allpools(); 164fa9e4066Sahrens } 165fa9e4066Sahrens 166fa9e4066Sahrens return (0); 167fa9e4066Sahrens } 168fa9e4066Sahrens 169ea8dc4b6Seschrock static int 170ea8dc4b6Seschrock zfs_create_unique_device(dev_t *dev) 171ea8dc4b6Seschrock { 172ea8dc4b6Seschrock major_t new_major; 173ea8dc4b6Seschrock 174ea8dc4b6Seschrock do { 175ea8dc4b6Seschrock ASSERT3U(zfs_minor, <=, MAXMIN32); 176ea8dc4b6Seschrock minor_t start = zfs_minor; 177ea8dc4b6Seschrock do { 178ea8dc4b6Seschrock mutex_enter(&zfs_dev_mtx); 179ea8dc4b6Seschrock if (zfs_minor >= MAXMIN32) { 180ea8dc4b6Seschrock /* 181ea8dc4b6Seschrock * If we're still using the real major 182ea8dc4b6Seschrock * keep out of /dev/zfs and /dev/zvol minor 183ea8dc4b6Seschrock * number space. If we're using a getudev()'ed 184ea8dc4b6Seschrock * major number, we can use all of its minors. 185ea8dc4b6Seschrock */ 186ea8dc4b6Seschrock if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) 187ea8dc4b6Seschrock zfs_minor = ZFS_MIN_MINOR; 188ea8dc4b6Seschrock else 189ea8dc4b6Seschrock zfs_minor = 0; 190ea8dc4b6Seschrock } else { 191ea8dc4b6Seschrock zfs_minor++; 192ea8dc4b6Seschrock } 193ea8dc4b6Seschrock *dev = makedevice(zfs_major, zfs_minor); 194ea8dc4b6Seschrock mutex_exit(&zfs_dev_mtx); 195ea8dc4b6Seschrock } while (vfs_devismounted(*dev) && zfs_minor != start); 196ea8dc4b6Seschrock if (zfs_minor == start) { 197ea8dc4b6Seschrock /* 198ea8dc4b6Seschrock * We are using all ~262,000 minor numbers for the 199ea8dc4b6Seschrock * current major number. Create a new major number. 200ea8dc4b6Seschrock */ 201ea8dc4b6Seschrock if ((new_major = getudev()) == (major_t)-1) { 202ea8dc4b6Seschrock cmn_err(CE_WARN, 203ea8dc4b6Seschrock "zfs_mount: Can't get unique major " 204ea8dc4b6Seschrock "device number."); 205ea8dc4b6Seschrock return (-1); 206ea8dc4b6Seschrock } 207ea8dc4b6Seschrock mutex_enter(&zfs_dev_mtx); 208ea8dc4b6Seschrock zfs_major = new_major; 209ea8dc4b6Seschrock zfs_minor = 0; 210ea8dc4b6Seschrock 211ea8dc4b6Seschrock mutex_exit(&zfs_dev_mtx); 212ea8dc4b6Seschrock } else { 213ea8dc4b6Seschrock break; 214ea8dc4b6Seschrock } 215ea8dc4b6Seschrock /* CONSTANTCONDITION */ 216ea8dc4b6Seschrock } while (1); 217ea8dc4b6Seschrock 218ea8dc4b6Seschrock return (0); 219ea8dc4b6Seschrock } 220ea8dc4b6Seschrock 221fa9e4066Sahrens static void 222fa9e4066Sahrens atime_changed_cb(void *arg, uint64_t newval) 223fa9e4066Sahrens { 224fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 225fa9e4066Sahrens 226fa9e4066Sahrens if (newval == TRUE) { 227fa9e4066Sahrens zfsvfs->z_atime = TRUE; 228fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 229fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 230fa9e4066Sahrens } else { 231fa9e4066Sahrens zfsvfs->z_atime = FALSE; 232fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 233fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 234fa9e4066Sahrens } 235fa9e4066Sahrens } 236fa9e4066Sahrens 2377b55fa8eSck static void 2387b55fa8eSck xattr_changed_cb(void *arg, uint64_t newval) 2397b55fa8eSck { 2407b55fa8eSck zfsvfs_t *zfsvfs = arg; 2417b55fa8eSck 2427b55fa8eSck if (newval == TRUE) { 2437b55fa8eSck /* XXX locking on vfs_flag? */ 2447b55fa8eSck zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 2457b55fa8eSck vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 2467b55fa8eSck vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 2477b55fa8eSck } else { 2487b55fa8eSck /* XXX locking on vfs_flag? */ 2497b55fa8eSck zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 2507b55fa8eSck vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 2517b55fa8eSck vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 2527b55fa8eSck } 2537b55fa8eSck } 2547b55fa8eSck 255fa9e4066Sahrens static void 256fa9e4066Sahrens blksz_changed_cb(void *arg, uint64_t newval) 257fa9e4066Sahrens { 258fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 259fa9e4066Sahrens 260fa9e4066Sahrens if (newval < SPA_MINBLOCKSIZE || 261fa9e4066Sahrens newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 262fa9e4066Sahrens newval = SPA_MAXBLOCKSIZE; 263fa9e4066Sahrens 264fa9e4066Sahrens zfsvfs->z_max_blksz = newval; 265fa9e4066Sahrens zfsvfs->z_vfs->vfs_bsize = newval; 266fa9e4066Sahrens } 267fa9e4066Sahrens 268fa9e4066Sahrens static void 269fa9e4066Sahrens readonly_changed_cb(void *arg, uint64_t newval) 270fa9e4066Sahrens { 271fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 272fa9e4066Sahrens 273fa9e4066Sahrens if (newval) { 274fa9e4066Sahrens /* XXX locking on vfs_flag? */ 275fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 276fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 277fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 278fa9e4066Sahrens } else { 279fa9e4066Sahrens /* XXX locking on vfs_flag? */ 280fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 281fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 282fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 283fa9e4066Sahrens } 284fa9e4066Sahrens } 285fa9e4066Sahrens 286fa9e4066Sahrens static void 287fa9e4066Sahrens devices_changed_cb(void *arg, uint64_t newval) 288fa9e4066Sahrens { 289fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 290fa9e4066Sahrens 291fa9e4066Sahrens if (newval == FALSE) { 292fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; 293fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); 294fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); 295fa9e4066Sahrens } else { 296fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; 297fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); 298fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); 299fa9e4066Sahrens } 300fa9e4066Sahrens } 301fa9e4066Sahrens 302fa9e4066Sahrens static void 303fa9e4066Sahrens setuid_changed_cb(void *arg, uint64_t newval) 304fa9e4066Sahrens { 305fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 306fa9e4066Sahrens 307fa9e4066Sahrens if (newval == FALSE) { 308fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 309fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 310fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 311fa9e4066Sahrens } else { 312fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 313fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 314fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 315fa9e4066Sahrens } 316fa9e4066Sahrens } 317fa9e4066Sahrens 318fa9e4066Sahrens static void 319fa9e4066Sahrens exec_changed_cb(void *arg, uint64_t newval) 320fa9e4066Sahrens { 321fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 322fa9e4066Sahrens 323fa9e4066Sahrens if (newval == FALSE) { 324fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 325fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 326fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 327fa9e4066Sahrens } else { 328fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 329fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 330fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 331fa9e4066Sahrens } 332fa9e4066Sahrens } 333fa9e4066Sahrens 334*da6c28aaSamw /* 335*da6c28aaSamw * The nbmand mount option can be changed at mount time. 336*da6c28aaSamw * We can't allow it to be toggled on live file systems or incorrect 337*da6c28aaSamw * behavior may be seen from cifs clients 338*da6c28aaSamw * 339*da6c28aaSamw * This property isn't registered via dsl_prop_register(), but this callback 340*da6c28aaSamw * will be called when a file system is first mounted 341*da6c28aaSamw */ 342*da6c28aaSamw static void 343*da6c28aaSamw nbmand_changed_cb(void *arg, uint64_t newval) 344*da6c28aaSamw { 345*da6c28aaSamw zfsvfs_t *zfsvfs = arg; 346*da6c28aaSamw if (newval == FALSE) { 347*da6c28aaSamw vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 348*da6c28aaSamw vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 349*da6c28aaSamw } else { 350*da6c28aaSamw vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 351*da6c28aaSamw vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 352*da6c28aaSamw } 353*da6c28aaSamw } 354*da6c28aaSamw 355fa9e4066Sahrens static void 356fa9e4066Sahrens snapdir_changed_cb(void *arg, uint64_t newval) 357fa9e4066Sahrens { 358fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 359fa9e4066Sahrens 360fa9e4066Sahrens zfsvfs->z_show_ctldir = newval; 361fa9e4066Sahrens } 362fa9e4066Sahrens 363*da6c28aaSamw static void 364*da6c28aaSamw vscan_changed_cb(void *arg, uint64_t newval) 365*da6c28aaSamw { 366*da6c28aaSamw zfsvfs_t *zfsvfs = arg; 367*da6c28aaSamw 368*da6c28aaSamw zfsvfs->z_vscan = newval; 369*da6c28aaSamw } 370*da6c28aaSamw 371fa9e4066Sahrens static void 372fa9e4066Sahrens acl_mode_changed_cb(void *arg, uint64_t newval) 373fa9e4066Sahrens { 374fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 375fa9e4066Sahrens 376fa9e4066Sahrens zfsvfs->z_acl_mode = newval; 377fa9e4066Sahrens } 378fa9e4066Sahrens 379fa9e4066Sahrens static void 380fa9e4066Sahrens acl_inherit_changed_cb(void *arg, uint64_t newval) 381fa9e4066Sahrens { 382fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 383fa9e4066Sahrens 384fa9e4066Sahrens zfsvfs->z_acl_inherit = newval; 385fa9e4066Sahrens } 386fa9e4066Sahrens 387*da6c28aaSamw static int 388*da6c28aaSamw zfs_normalization_set(char *osname, zfsvfs_t *zfsvfs) 389*da6c28aaSamw { 390*da6c28aaSamw uint64_t pval; 391*da6c28aaSamw int error; 392*da6c28aaSamw 393*da6c28aaSamw if (zfsvfs->z_version < ZPL_VERSION_FUID) 394*da6c28aaSamw return (0); 395*da6c28aaSamw 396*da6c28aaSamw error = dsl_prop_get_integer(osname, "normalization", &pval, NULL); 397*da6c28aaSamw if (error) 398*da6c28aaSamw goto normquit; 399*da6c28aaSamw switch ((int)pval) { 400*da6c28aaSamw case ZFS_NORMALIZE_NONE: 401*da6c28aaSamw break; 402*da6c28aaSamw case ZFS_NORMALIZE_C: 403*da6c28aaSamw zfsvfs->z_norm |= U8_TEXTPREP_NFC; 404*da6c28aaSamw break; 405*da6c28aaSamw case ZFS_NORMALIZE_KC: 406*da6c28aaSamw zfsvfs->z_norm |= U8_TEXTPREP_NFKC; 407*da6c28aaSamw break; 408*da6c28aaSamw case ZFS_NORMALIZE_D: 409*da6c28aaSamw zfsvfs->z_norm |= U8_TEXTPREP_NFD; 410*da6c28aaSamw break; 411*da6c28aaSamw case ZFS_NORMALIZE_KD: 412*da6c28aaSamw zfsvfs->z_norm |= U8_TEXTPREP_NFKD; 413*da6c28aaSamw break; 414*da6c28aaSamw default: 415*da6c28aaSamw ASSERT(pval <= ZFS_NORMALIZE_KD); 416*da6c28aaSamw break; 417*da6c28aaSamw } 418*da6c28aaSamw 419*da6c28aaSamw error = dsl_prop_get_integer(osname, "utf8only", &pval, NULL); 420*da6c28aaSamw if (error) 421*da6c28aaSamw goto normquit; 422*da6c28aaSamw if (pval) 423*da6c28aaSamw zfsvfs->z_case |= ZFS_UTF8_ONLY; 424*da6c28aaSamw else 425*da6c28aaSamw zfsvfs->z_case &= ~ZFS_UTF8_ONLY; 426*da6c28aaSamw 427*da6c28aaSamw error = dsl_prop_get_integer(osname, "casesensitivity", &pval, NULL); 428*da6c28aaSamw if (error) 429*da6c28aaSamw goto normquit; 430*da6c28aaSamw vfs_set_feature(zfsvfs->z_vfs, VFSFT_DIRENTFLAGS); 431*da6c28aaSamw switch ((int)pval) { 432*da6c28aaSamw case ZFS_CASE_SENSITIVE: 433*da6c28aaSamw break; 434*da6c28aaSamw case ZFS_CASE_INSENSITIVE: 435*da6c28aaSamw zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 436*da6c28aaSamw zfsvfs->z_case |= ZFS_CI_ONLY; 437*da6c28aaSamw vfs_set_feature(zfsvfs->z_vfs, VFSFT_CASEINSENSITIVE); 438*da6c28aaSamw vfs_set_feature(zfsvfs->z_vfs, VFSFT_NOCASESENSITIVE); 439*da6c28aaSamw break; 440*da6c28aaSamw case ZFS_CASE_MIXED: 441*da6c28aaSamw zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 442*da6c28aaSamw zfsvfs->z_case |= ZFS_CI_MIXD; 443*da6c28aaSamw vfs_set_feature(zfsvfs->z_vfs, VFSFT_CASEINSENSITIVE); 444*da6c28aaSamw break; 445*da6c28aaSamw default: 446*da6c28aaSamw ASSERT(pval <= ZFS_CASE_MIXED); 447*da6c28aaSamw break; 448*da6c28aaSamw } 449*da6c28aaSamw 450*da6c28aaSamw normquit: 451*da6c28aaSamw return (error); 452*da6c28aaSamw } 453*da6c28aaSamw 454ea8dc4b6Seschrock static int 455ea8dc4b6Seschrock zfs_register_callbacks(vfs_t *vfsp) 456ea8dc4b6Seschrock { 457ea8dc4b6Seschrock struct dsl_dataset *ds = NULL; 458ea8dc4b6Seschrock objset_t *os = NULL; 459ea8dc4b6Seschrock zfsvfs_t *zfsvfs = NULL; 460*da6c28aaSamw uint64_t nbmand; 461*da6c28aaSamw int readonly, do_readonly = B_FALSE; 462*da6c28aaSamw int setuid, do_setuid = B_FALSE; 463*da6c28aaSamw int exec, do_exec = B_FALSE; 464*da6c28aaSamw int devices, do_devices = B_FALSE; 465*da6c28aaSamw int xattr, do_xattr = B_FALSE; 466*da6c28aaSamw int atime, do_atime = B_FALSE; 467ea8dc4b6Seschrock int error = 0; 468ea8dc4b6Seschrock 469ea8dc4b6Seschrock ASSERT(vfsp); 470ea8dc4b6Seschrock zfsvfs = vfsp->vfs_data; 471ea8dc4b6Seschrock ASSERT(zfsvfs); 472ea8dc4b6Seschrock os = zfsvfs->z_os; 473fa9e4066Sahrens 474fa9e4066Sahrens /* 475ea8dc4b6Seschrock * The act of registering our callbacks will destroy any mount 476ea8dc4b6Seschrock * options we may have. In order to enable temporary overrides 4777b55fa8eSck * of mount options, we stash away the current values and 478ea8dc4b6Seschrock * restore them after we register the callbacks. 479fa9e4066Sahrens */ 480ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 481ea8dc4b6Seschrock readonly = B_TRUE; 482ea8dc4b6Seschrock do_readonly = B_TRUE; 483ea8dc4b6Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 484ea8dc4b6Seschrock readonly = B_FALSE; 485ea8dc4b6Seschrock do_readonly = B_TRUE; 486ea8dc4b6Seschrock } 487ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 488ea8dc4b6Seschrock devices = B_FALSE; 489ea8dc4b6Seschrock setuid = B_FALSE; 490ea8dc4b6Seschrock do_devices = B_TRUE; 491ea8dc4b6Seschrock do_setuid = B_TRUE; 492ea8dc4b6Seschrock } else { 493ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 494ea8dc4b6Seschrock devices = B_FALSE; 495ea8dc4b6Seschrock do_devices = B_TRUE; 496b1b8ab34Slling } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) { 497ea8dc4b6Seschrock devices = B_TRUE; 498ea8dc4b6Seschrock do_devices = B_TRUE; 499fa9e4066Sahrens } 500fa9e4066Sahrens 501ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 502ea8dc4b6Seschrock setuid = B_FALSE; 503ea8dc4b6Seschrock do_setuid = B_TRUE; 504ea8dc4b6Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 505ea8dc4b6Seschrock setuid = B_TRUE; 506ea8dc4b6Seschrock do_setuid = B_TRUE; 507fa9e4066Sahrens } 508ea8dc4b6Seschrock } 509ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 510ea8dc4b6Seschrock exec = B_FALSE; 511ea8dc4b6Seschrock do_exec = B_TRUE; 512ea8dc4b6Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 513ea8dc4b6Seschrock exec = B_TRUE; 514ea8dc4b6Seschrock do_exec = B_TRUE; 515fa9e4066Sahrens } 5167b55fa8eSck if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 5177b55fa8eSck xattr = B_FALSE; 5187b55fa8eSck do_xattr = B_TRUE; 5197b55fa8eSck } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 5207b55fa8eSck xattr = B_TRUE; 5217b55fa8eSck do_xattr = B_TRUE; 5227b55fa8eSck } 523b510d378Slling if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 524b510d378Slling atime = B_FALSE; 525b510d378Slling do_atime = B_TRUE; 526b510d378Slling } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 527b510d378Slling atime = B_TRUE; 528b510d378Slling do_atime = B_TRUE; 529b510d378Slling } 530fa9e4066Sahrens 531*da6c28aaSamw /* 532*da6c28aaSamw * nbmand is a special property. It can only be changed at 533*da6c28aaSamw * mount time. 534*da6c28aaSamw * 535*da6c28aaSamw * This is weird, but it is documented to only be changeable 536*da6c28aaSamw * at mount time. 537*da6c28aaSamw */ 538*da6c28aaSamw if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 539*da6c28aaSamw nbmand = B_FALSE; 540*da6c28aaSamw } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 541*da6c28aaSamw nbmand = B_TRUE; 542*da6c28aaSamw } else { 543*da6c28aaSamw char osname[MAXNAMELEN]; 544*da6c28aaSamw 545*da6c28aaSamw dmu_objset_name(os, osname); 546*da6c28aaSamw if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, 547*da6c28aaSamw NULL)) 548*da6c28aaSamw return (error); 549*da6c28aaSamw } 550*da6c28aaSamw 551fa9e4066Sahrens /* 552ea8dc4b6Seschrock * Register property callbacks. 553ea8dc4b6Seschrock * 554ea8dc4b6Seschrock * It would probably be fine to just check for i/o error from 555ea8dc4b6Seschrock * the first prop_register(), but I guess I like to go 556ea8dc4b6Seschrock * overboard... 557fa9e4066Sahrens */ 558ea8dc4b6Seschrock ds = dmu_objset_ds(os); 559ea8dc4b6Seschrock error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 5607b55fa8eSck error = error ? error : dsl_prop_register(ds, 5617b55fa8eSck "xattr", xattr_changed_cb, zfsvfs); 562ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 563ea8dc4b6Seschrock "recordsize", blksz_changed_cb, zfsvfs); 564ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 565ea8dc4b6Seschrock "readonly", readonly_changed_cb, zfsvfs); 566ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 567ea8dc4b6Seschrock "devices", devices_changed_cb, zfsvfs); 568ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 569ea8dc4b6Seschrock "setuid", setuid_changed_cb, zfsvfs); 570ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 571ea8dc4b6Seschrock "exec", exec_changed_cb, zfsvfs); 572ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 573ea8dc4b6Seschrock "snapdir", snapdir_changed_cb, zfsvfs); 574ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 575ea8dc4b6Seschrock "aclmode", acl_mode_changed_cb, zfsvfs); 576ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 577ea8dc4b6Seschrock "aclinherit", acl_inherit_changed_cb, zfsvfs); 578*da6c28aaSamw error = error ? error : dsl_prop_register(ds, 579*da6c28aaSamw "vscan", vscan_changed_cb, zfsvfs); 580ea8dc4b6Seschrock if (error) 581ea8dc4b6Seschrock goto unregister; 582fa9e4066Sahrens 583ea8dc4b6Seschrock /* 584ea8dc4b6Seschrock * Invoke our callbacks to restore temporary mount options. 585ea8dc4b6Seschrock */ 586ea8dc4b6Seschrock if (do_readonly) 587ea8dc4b6Seschrock readonly_changed_cb(zfsvfs, readonly); 588ea8dc4b6Seschrock if (do_setuid) 589ea8dc4b6Seschrock setuid_changed_cb(zfsvfs, setuid); 590ea8dc4b6Seschrock if (do_exec) 591ea8dc4b6Seschrock exec_changed_cb(zfsvfs, exec); 592ea8dc4b6Seschrock if (do_devices) 593ea8dc4b6Seschrock devices_changed_cb(zfsvfs, devices); 5947b55fa8eSck if (do_xattr) 5957b55fa8eSck xattr_changed_cb(zfsvfs, xattr); 596b510d378Slling if (do_atime) 597b510d378Slling atime_changed_cb(zfsvfs, atime); 598fa9e4066Sahrens 599*da6c28aaSamw nbmand_changed_cb(zfsvfs, nbmand); 600*da6c28aaSamw 601ea8dc4b6Seschrock return (0); 602fa9e4066Sahrens 603ea8dc4b6Seschrock unregister: 604fa9e4066Sahrens /* 605ea8dc4b6Seschrock * We may attempt to unregister some callbacks that are not 606ea8dc4b6Seschrock * registered, but this is OK; it will simply return ENOMSG, 607ea8dc4b6Seschrock * which we will ignore. 608fa9e4066Sahrens */ 609ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 6107b55fa8eSck (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 611ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 612ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 613ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs); 614ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 615ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 616ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 617ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 618ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 619ea8dc4b6Seschrock zfsvfs); 620*da6c28aaSamw (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); 621ea8dc4b6Seschrock return (error); 622ea8dc4b6Seschrock 623ea8dc4b6Seschrock } 624ea8dc4b6Seschrock 625f18faf3fSek static int 626f18faf3fSek zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 627f18faf3fSek { 628f18faf3fSek uint_t readonly; 629f18faf3fSek int error; 630f18faf3fSek 631f18faf3fSek error = zfs_register_callbacks(zfsvfs->z_vfs); 632f18faf3fSek if (error) 633f18faf3fSek return (error); 634f18faf3fSek 635f18faf3fSek /* 636f18faf3fSek * Set the objset user_ptr to track its zfsvfs. 637f18faf3fSek */ 638f18faf3fSek mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); 639f18faf3fSek dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 640f18faf3fSek mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); 641f18faf3fSek 642f18faf3fSek /* 643f18faf3fSek * If we are not mounting (ie: online recv), then we don't 644f18faf3fSek * have to worry about replaying the log as we blocked all 645f18faf3fSek * operations out since we closed the ZIL. 646f18faf3fSek */ 647f18faf3fSek if (mounting) { 648f18faf3fSek /* 649f18faf3fSek * During replay we remove the read only flag to 650f18faf3fSek * allow replays to succeed. 651f18faf3fSek */ 652f18faf3fSek readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 653f18faf3fSek if (readonly != 0) 654f18faf3fSek zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 655f18faf3fSek else 656f18faf3fSek zfs_unlinked_drain(zfsvfs); 657f18faf3fSek 658f18faf3fSek /* 659f18faf3fSek * Parse and replay the intent log. 660f18faf3fSek * 661f18faf3fSek * Because of ziltest, this must be done after 662f18faf3fSek * zfs_unlinked_drain(). (Further note: ziltest doesn't 663f18faf3fSek * use readonly mounts, where zfs_unlinked_drain() isn't 664f18faf3fSek * called.) This is because ziltest causes spa_sync() 665f18faf3fSek * to think it's committed, but actually it is not, so 666f18faf3fSek * the intent log contains many txg's worth of changes. 667f18faf3fSek * 668f18faf3fSek * In particular, if object N is in the unlinked set in 669f18faf3fSek * the last txg to actually sync, then it could be 670f18faf3fSek * actually freed in a later txg and then reallocated in 671f18faf3fSek * a yet later txg. This would write a "create object 672f18faf3fSek * N" record to the intent log. Normally, this would be 673f18faf3fSek * fine because the spa_sync() would have written out 674f18faf3fSek * the fact that object N is free, before we could write 675f18faf3fSek * the "create object N" intent log record. 676f18faf3fSek * 677f18faf3fSek * But when we are in ziltest mode, we advance the "open 678f18faf3fSek * txg" without actually spa_sync()-ing the changes to 679f18faf3fSek * disk. So we would see that object N is still 680f18faf3fSek * allocated and in the unlinked set, and there is an 681f18faf3fSek * intent log record saying to allocate it. 682f18faf3fSek */ 683f18faf3fSek zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 684f18faf3fSek zfs_replay_vector); 685f18faf3fSek 686f18faf3fSek zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 687f18faf3fSek } 688f18faf3fSek 689f18faf3fSek if (!zil_disable) 690f18faf3fSek zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 691f18faf3fSek 692f18faf3fSek return (0); 693f18faf3fSek } 694f18faf3fSek 695ea8dc4b6Seschrock static int 696ea8dc4b6Seschrock zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr) 697ea8dc4b6Seschrock { 698ea8dc4b6Seschrock dev_t mount_dev; 699ea8dc4b6Seschrock uint64_t recordsize, readonly; 700ea8dc4b6Seschrock int error = 0; 701ea8dc4b6Seschrock int mode; 702ea8dc4b6Seschrock zfsvfs_t *zfsvfs; 703ea8dc4b6Seschrock znode_t *zp = NULL; 704ea8dc4b6Seschrock 705ea8dc4b6Seschrock ASSERT(vfsp); 706ea8dc4b6Seschrock ASSERT(osname); 707fa9e4066Sahrens 708fa9e4066Sahrens /* 709fa9e4066Sahrens * Initialize the zfs-specific filesystem structure. 710fa9e4066Sahrens * Should probably make this a kmem cache, shuffle fields, 711ea8dc4b6Seschrock * and just bzero up to z_hold_mtx[]. 712fa9e4066Sahrens */ 713fa9e4066Sahrens zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 714fa9e4066Sahrens zfsvfs->z_vfs = vfsp; 715fa9e4066Sahrens zfsvfs->z_parent = zfsvfs; 716fa9e4066Sahrens zfsvfs->z_assign = TXG_NOWAIT; 717fa9e4066Sahrens zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 718a0965f35Sbonwick zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 719fa9e4066Sahrens 720fa9e4066Sahrens mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 721fa9e4066Sahrens list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 722fa9e4066Sahrens offsetof(znode_t, z_link_node)); 723f18faf3fSek rrw_init(&zfsvfs->z_teardown_lock); 724f18faf3fSek rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 725fa9e4066Sahrens 726ea8dc4b6Seschrock /* Initialize the generic filesystem structure. */ 727fa9e4066Sahrens vfsp->vfs_bcount = 0; 728fa9e4066Sahrens vfsp->vfs_data = NULL; 729fa9e4066Sahrens 730ea8dc4b6Seschrock if (zfs_create_unique_device(&mount_dev) == -1) { 731ea8dc4b6Seschrock error = ENODEV; 732ea8dc4b6Seschrock goto out; 733ea8dc4b6Seschrock } 734fa9e4066Sahrens ASSERT(vfs_devismounted(mount_dev) == 0); 735fa9e4066Sahrens 736ea8dc4b6Seschrock if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 737ea8dc4b6Seschrock NULL)) 738ea8dc4b6Seschrock goto out; 739fa9e4066Sahrens 740fa9e4066Sahrens vfsp->vfs_dev = mount_dev; 741fa9e4066Sahrens vfsp->vfs_fstype = zfsfstype; 742fa9e4066Sahrens vfsp->vfs_bsize = recordsize; 743fa9e4066Sahrens vfsp->vfs_flag |= VFS_NOTRUNC; 744fa9e4066Sahrens vfsp->vfs_data = zfsvfs; 745fa9e4066Sahrens 746ea8dc4b6Seschrock if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 747fa9e4066Sahrens goto out; 748fa9e4066Sahrens 749fa9e4066Sahrens if (readonly) 750fa9e4066Sahrens mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 751fa9e4066Sahrens else 752fa9e4066Sahrens mode = DS_MODE_PRIMARY; 753fa9e4066Sahrens 754fa9e4066Sahrens error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 755fa9e4066Sahrens if (error == EROFS) { 756fa9e4066Sahrens mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 757fa9e4066Sahrens error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 758fa9e4066Sahrens &zfsvfs->z_os); 759fa9e4066Sahrens } 760fa9e4066Sahrens 761fa9e4066Sahrens if (error) 762fa9e4066Sahrens goto out; 763fa9e4066Sahrens 764fa9e4066Sahrens if (error = zfs_init_fs(zfsvfs, &zp, cr)) 765fa9e4066Sahrens goto out; 766fa9e4066Sahrens 767ea8dc4b6Seschrock /* The call to zfs_init_fs leaves the vnode held, release it here. */ 768ea8dc4b6Seschrock VN_RELE(ZTOV(zp)); 769ea8dc4b6Seschrock 770*da6c28aaSamw /* 771*da6c28aaSamw * Set features for file system. 772*da6c28aaSamw */ 773*da6c28aaSamw zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 774*da6c28aaSamw if (zfsvfs->z_use_fuids) { 775*da6c28aaSamw vfs_set_feature(vfsp, VFSFT_XVATTR); 776*da6c28aaSamw vfs_set_feature(vfsp, VFSFT_ACEMASKONACCESS); 777*da6c28aaSamw vfs_set_feature(vfsp, VFSFT_ACLONCREATE); 778*da6c28aaSamw } 779*da6c28aaSamw 780*da6c28aaSamw /* 781*da6c28aaSamw * Set normalization regardless of whether or not the object 782*da6c28aaSamw * set is a snapshot. Snapshots and clones need to have 783*da6c28aaSamw * identical normalization as did the file system they 784*da6c28aaSamw * originated from. 785*da6c28aaSamw */ 786*da6c28aaSamw if ((error = zfs_normalization_set(osname, zfsvfs)) != 0) 787*da6c28aaSamw goto out; 788*da6c28aaSamw 789ea8dc4b6Seschrock if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 790*da6c28aaSamw uint64_t pval; 7917b55fa8eSck 792fa9e4066Sahrens ASSERT(mode & DS_MODE_READONLY); 793fa9e4066Sahrens atime_changed_cb(zfsvfs, B_FALSE); 794fa9e4066Sahrens readonly_changed_cb(zfsvfs, B_TRUE); 795*da6c28aaSamw if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) 7967b55fa8eSck goto out; 797*da6c28aaSamw xattr_changed_cb(zfsvfs, pval); 798fa9e4066Sahrens zfsvfs->z_issnap = B_TRUE; 799fa9e4066Sahrens } else { 800f18faf3fSek error = zfsvfs_setup(zfsvfs, B_TRUE); 801ea8dc4b6Seschrock } 802fa9e4066Sahrens 803ea8dc4b6Seschrock if (!zfsvfs->z_issnap) 804ea8dc4b6Seschrock zfsctl_create(zfsvfs); 805ea8dc4b6Seschrock out: 806ea8dc4b6Seschrock if (error) { 807ea8dc4b6Seschrock if (zfsvfs->z_os) 808ea8dc4b6Seschrock dmu_objset_close(zfsvfs->z_os); 809c25056deSgw mutex_destroy(&zfsvfs->z_znodes_lock); 810c25056deSgw list_destroy(&zfsvfs->z_all_znodes); 811f18faf3fSek rrw_destroy(&zfsvfs->z_teardown_lock); 812f18faf3fSek rw_destroy(&zfsvfs->z_teardown_inactive_lock); 813ea8dc4b6Seschrock kmem_free(zfsvfs, sizeof (zfsvfs_t)); 814ea8dc4b6Seschrock } else { 815ea8dc4b6Seschrock atomic_add_32(&zfs_active_fs_count, 1); 816ea8dc4b6Seschrock } 817fa9e4066Sahrens 818ea8dc4b6Seschrock return (error); 819ea8dc4b6Seschrock } 820ea8dc4b6Seschrock 821ea8dc4b6Seschrock void 822ea8dc4b6Seschrock zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 823ea8dc4b6Seschrock { 824ea8dc4b6Seschrock objset_t *os = zfsvfs->z_os; 825ea8dc4b6Seschrock struct dsl_dataset *ds; 826ea8dc4b6Seschrock 827ea8dc4b6Seschrock /* 828ea8dc4b6Seschrock * Unregister properties. 829ea8dc4b6Seschrock */ 830ea8dc4b6Seschrock if (!dmu_objset_is_snapshot(os)) { 831fa9e4066Sahrens ds = dmu_objset_ds(os); 832ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 833fa9e4066Sahrens zfsvfs) == 0); 834fa9e4066Sahrens 8357b55fa8eSck VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 8367b55fa8eSck zfsvfs) == 0); 8377b55fa8eSck 838ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 839fa9e4066Sahrens zfsvfs) == 0); 840fa9e4066Sahrens 841ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 842fa9e4066Sahrens zfsvfs) == 0); 843fa9e4066Sahrens 844ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, 845fa9e4066Sahrens zfsvfs) == 0); 846fa9e4066Sahrens 847ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 848fa9e4066Sahrens zfsvfs) == 0); 849fa9e4066Sahrens 850ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 851fa9e4066Sahrens zfsvfs) == 0); 852fa9e4066Sahrens 853ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 854fa9e4066Sahrens zfsvfs) == 0); 855fa9e4066Sahrens 856ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 857fa9e4066Sahrens zfsvfs) == 0); 858fa9e4066Sahrens 859ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "aclinherit", 860fa9e4066Sahrens acl_inherit_changed_cb, zfsvfs) == 0); 861*da6c28aaSamw 862*da6c28aaSamw VERIFY(dsl_prop_unregister(ds, "vscan", 863*da6c28aaSamw vscan_changed_cb, zfsvfs) == 0); 864ea8dc4b6Seschrock } 865ea8dc4b6Seschrock } 866fa9e4066Sahrens 867b1b8ab34Slling /* 868b1b8ab34Slling * Convert a decimal digit string to a uint64_t integer. 869b1b8ab34Slling */ 870b1b8ab34Slling static int 871b1b8ab34Slling str_to_uint64(char *str, uint64_t *objnum) 872b1b8ab34Slling { 873b1b8ab34Slling uint64_t num = 0; 874b1b8ab34Slling 875b1b8ab34Slling while (*str) { 876b1b8ab34Slling if (*str < '0' || *str > '9') 877b1b8ab34Slling return (EINVAL); 878b1b8ab34Slling 879b1b8ab34Slling num = num*10 + *str++ - '0'; 880b1b8ab34Slling } 881b1b8ab34Slling 882b1b8ab34Slling *objnum = num; 883b1b8ab34Slling return (0); 884b1b8ab34Slling } 885b1b8ab34Slling 886b1b8ab34Slling /* 887b1b8ab34Slling * The boot path passed from the boot loader is in the form of 888b1b8ab34Slling * "rootpool-name/root-filesystem-object-number'. Convert this 889b1b8ab34Slling * string to a dataset name: "rootpool-name/root-filesystem-name". 890b1b8ab34Slling */ 891b1b8ab34Slling static int 892b1b8ab34Slling parse_bootpath(char *bpath, char *outpath) 893b1b8ab34Slling { 894b1b8ab34Slling char *slashp; 895b1b8ab34Slling uint64_t objnum; 896b1b8ab34Slling int error; 897b1b8ab34Slling 898b1b8ab34Slling if (*bpath == 0 || *bpath == '/') 899b1b8ab34Slling return (EINVAL); 900b1b8ab34Slling 901b1b8ab34Slling slashp = strchr(bpath, '/'); 902b1b8ab34Slling 903b1b8ab34Slling /* if no '/', just return the pool name */ 904b1b8ab34Slling if (slashp == NULL) { 905b1b8ab34Slling (void) strcpy(outpath, bpath); 906b1b8ab34Slling return (0); 907b1b8ab34Slling } 908b1b8ab34Slling 909b1b8ab34Slling if (error = str_to_uint64(slashp+1, &objnum)) 910b1b8ab34Slling return (error); 911b1b8ab34Slling 912b1b8ab34Slling *slashp = '\0'; 913b1b8ab34Slling error = dsl_dsobj_to_dsname(bpath, objnum, outpath); 914b1b8ab34Slling *slashp = '/'; 915b1b8ab34Slling 916b1b8ab34Slling return (error); 917b1b8ab34Slling } 918b1b8ab34Slling 919ea8dc4b6Seschrock static int 920ea8dc4b6Seschrock zfs_mountroot(vfs_t *vfsp, enum whymountroot why) 921ea8dc4b6Seschrock { 922ea8dc4b6Seschrock int error = 0; 923ea8dc4b6Seschrock int ret = 0; 924ea8dc4b6Seschrock static int zfsrootdone = 0; 925ea8dc4b6Seschrock zfsvfs_t *zfsvfs = NULL; 926ea8dc4b6Seschrock znode_t *zp = NULL; 927ea8dc4b6Seschrock vnode_t *vp = NULL; 928b1b8ab34Slling char *zfs_bootpath; 929ea8dc4b6Seschrock 930ea8dc4b6Seschrock ASSERT(vfsp); 931ea8dc4b6Seschrock 932ea8dc4b6Seschrock /* 933b1b8ab34Slling * The filesystem that we mount as root is defined in the 934b1b8ab34Slling * "zfs-bootfs" property. 935ea8dc4b6Seschrock */ 936ea8dc4b6Seschrock if (why == ROOT_INIT) { 937ea8dc4b6Seschrock if (zfsrootdone++) 938ea8dc4b6Seschrock return (EBUSY); 939fa9e4066Sahrens 940b1b8ab34Slling if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), 941b1b8ab34Slling DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bootpath) != 942b1b8ab34Slling DDI_SUCCESS) 943b1b8ab34Slling return (EIO); 944b1b8ab34Slling 945b1b8ab34Slling error = parse_bootpath(zfs_bootpath, rootfs.bo_name); 946b1b8ab34Slling ddi_prop_free(zfs_bootpath); 947b1b8ab34Slling 948b1b8ab34Slling if (error) 949b1b8ab34Slling return (error); 950fa9e4066Sahrens 951ea8dc4b6Seschrock if (error = vfs_lock(vfsp)) 952ea8dc4b6Seschrock return (error); 953fa9e4066Sahrens 954b1b8ab34Slling if (error = zfs_domount(vfsp, rootfs.bo_name, CRED())) 955ea8dc4b6Seschrock goto out; 956ea8dc4b6Seschrock 957ea8dc4b6Seschrock zfsvfs = (zfsvfs_t *)vfsp->vfs_data; 958ea8dc4b6Seschrock ASSERT(zfsvfs); 959ea8dc4b6Seschrock if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) 960ea8dc4b6Seschrock goto out; 961ea8dc4b6Seschrock 962ea8dc4b6Seschrock vp = ZTOV(zp); 963ea8dc4b6Seschrock mutex_enter(&vp->v_lock); 964ea8dc4b6Seschrock vp->v_flag |= VROOT; 965ea8dc4b6Seschrock mutex_exit(&vp->v_lock); 966ea8dc4b6Seschrock rootvp = vp; 967ea8dc4b6Seschrock 968ea8dc4b6Seschrock /* 969ea8dc4b6Seschrock * The zfs_zget call above returns with a hold on vp, we release 970ea8dc4b6Seschrock * it here. 971ea8dc4b6Seschrock */ 972fa9e4066Sahrens VN_RELE(vp); 973ea8dc4b6Seschrock 974ea8dc4b6Seschrock /* 975ea8dc4b6Seschrock * Mount root as readonly initially, it will be remouted 976ea8dc4b6Seschrock * read/write by /lib/svc/method/fs-usr. 977ea8dc4b6Seschrock */ 978ea8dc4b6Seschrock readonly_changed_cb(vfsp->vfs_data, B_TRUE); 979ea8dc4b6Seschrock vfs_add((struct vnode *)0, vfsp, 980ea8dc4b6Seschrock (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); 981ea8dc4b6Seschrock out: 982ea8dc4b6Seschrock vfs_unlock(vfsp); 983ea8dc4b6Seschrock ret = (error) ? error : 0; 984ea8dc4b6Seschrock return (ret); 985ea8dc4b6Seschrock } else if (why == ROOT_REMOUNT) { 986ea8dc4b6Seschrock readonly_changed_cb(vfsp->vfs_data, B_FALSE); 987ea8dc4b6Seschrock vfsp->vfs_flag |= VFS_REMOUNT; 988b510d378Slling 989b510d378Slling /* refresh mount options */ 990b510d378Slling zfs_unregister_callbacks(vfsp->vfs_data); 991b510d378Slling return (zfs_register_callbacks(vfsp)); 992b510d378Slling 993ea8dc4b6Seschrock } else if (why == ROOT_UNMOUNT) { 994ea8dc4b6Seschrock zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); 995ea8dc4b6Seschrock (void) zfs_sync(vfsp, 0, 0); 996ea8dc4b6Seschrock return (0); 997ea8dc4b6Seschrock } 998ea8dc4b6Seschrock 999ea8dc4b6Seschrock /* 1000ea8dc4b6Seschrock * if "why" is equal to anything else other than ROOT_INIT, 1001ea8dc4b6Seschrock * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. 1002ea8dc4b6Seschrock */ 1003ea8dc4b6Seschrock return (ENOTSUP); 1004ea8dc4b6Seschrock } 1005ea8dc4b6Seschrock 1006ea8dc4b6Seschrock /*ARGSUSED*/ 1007ea8dc4b6Seschrock static int 1008ea8dc4b6Seschrock zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 1009ea8dc4b6Seschrock { 1010ea8dc4b6Seschrock char *osname; 1011ea8dc4b6Seschrock pathname_t spn; 1012ea8dc4b6Seschrock int error = 0; 1013ea8dc4b6Seschrock uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? 1014b1b8ab34Slling UIO_SYSSPACE : UIO_USERSPACE; 1015ea8dc4b6Seschrock int canwrite; 1016ea8dc4b6Seschrock 1017ea8dc4b6Seschrock if (mvp->v_type != VDIR) 1018ea8dc4b6Seschrock return (ENOTDIR); 1019ea8dc4b6Seschrock 1020ea8dc4b6Seschrock mutex_enter(&mvp->v_lock); 1021ea8dc4b6Seschrock if ((uap->flags & MS_REMOUNT) == 0 && 1022ea8dc4b6Seschrock (uap->flags & MS_OVERLAY) == 0 && 1023ea8dc4b6Seschrock (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 1024ea8dc4b6Seschrock mutex_exit(&mvp->v_lock); 1025ea8dc4b6Seschrock return (EBUSY); 1026ea8dc4b6Seschrock } 1027ea8dc4b6Seschrock mutex_exit(&mvp->v_lock); 1028ea8dc4b6Seschrock 1029ea8dc4b6Seschrock /* 1030ea8dc4b6Seschrock * ZFS does not support passing unparsed data in via MS_DATA. 1031ea8dc4b6Seschrock * Users should use the MS_OPTIONSTR interface; this means 1032ea8dc4b6Seschrock * that all option parsing is already done and the options struct 1033ea8dc4b6Seschrock * can be interrogated. 1034ea8dc4b6Seschrock */ 1035ea8dc4b6Seschrock if ((uap->flags & MS_DATA) && uap->datalen > 0) 1036ea8dc4b6Seschrock return (EINVAL); 1037ea8dc4b6Seschrock 1038ea8dc4b6Seschrock /* 1039ea8dc4b6Seschrock * Get the objset name (the "special" mount argument). 1040ea8dc4b6Seschrock */ 1041ea8dc4b6Seschrock if (error = pn_get(uap->spec, fromspace, &spn)) 1042ea8dc4b6Seschrock return (error); 1043ea8dc4b6Seschrock 1044ea8dc4b6Seschrock osname = spn.pn_path; 1045ea8dc4b6Seschrock 1046ecd6cf80Smarks /* 1047ecd6cf80Smarks * Check for mount privilege? 1048ecd6cf80Smarks * 1049ecd6cf80Smarks * If we don't have privilege then see if 1050ecd6cf80Smarks * we have local permission to allow it 1051ecd6cf80Smarks */ 1052ecd6cf80Smarks error = secpolicy_fs_mount(cr, mvp, vfsp); 1053ecd6cf80Smarks if (error) { 1054ecd6cf80Smarks error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 1055ecd6cf80Smarks if (error == 0) { 1056ecd6cf80Smarks vattr_t vattr; 1057ecd6cf80Smarks 1058ecd6cf80Smarks /* 1059ecd6cf80Smarks * Make sure user is the owner of the mount point 1060ecd6cf80Smarks * or has sufficient privileges. 1061ecd6cf80Smarks */ 1062ecd6cf80Smarks 1063ecd6cf80Smarks vattr.va_mask = AT_UID; 1064ecd6cf80Smarks 1065*da6c28aaSamw if (error = VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) { 1066ecd6cf80Smarks goto out; 1067ecd6cf80Smarks } 1068ecd6cf80Smarks 1069ecd6cf80Smarks if (error = secpolicy_vnode_owner(cr, vattr.va_uid)) { 1070ecd6cf80Smarks goto out; 1071ecd6cf80Smarks } 1072ecd6cf80Smarks 1073*da6c28aaSamw if (error = VOP_ACCESS(mvp, VWRITE, 0, cr, NULL)) { 1074ecd6cf80Smarks goto out; 1075ecd6cf80Smarks } 1076ecd6cf80Smarks 1077ecd6cf80Smarks secpolicy_fs_mount_clearopts(cr, vfsp); 1078ecd6cf80Smarks } else { 1079ecd6cf80Smarks goto out; 1080ecd6cf80Smarks } 1081ecd6cf80Smarks } 1082ea8dc4b6Seschrock 1083ea8dc4b6Seschrock /* 1084ea8dc4b6Seschrock * Refuse to mount a filesystem if we are in a local zone and the 1085ea8dc4b6Seschrock * dataset is not visible. 1086ea8dc4b6Seschrock */ 1087ea8dc4b6Seschrock if (!INGLOBALZONE(curproc) && 1088ea8dc4b6Seschrock (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1089ea8dc4b6Seschrock error = EPERM; 1090ea8dc4b6Seschrock goto out; 1091ea8dc4b6Seschrock } 1092ea8dc4b6Seschrock 1093b510d378Slling /* 1094b510d378Slling * When doing a remount, we simply refresh our temporary properties 1095b510d378Slling * according to those options set in the current VFS options. 1096b510d378Slling */ 1097b510d378Slling if (uap->flags & MS_REMOUNT) { 1098b510d378Slling /* refresh mount options */ 1099b510d378Slling zfs_unregister_callbacks(vfsp->vfs_data); 1100b510d378Slling error = zfs_register_callbacks(vfsp); 1101b510d378Slling goto out; 1102b510d378Slling } 1103b510d378Slling 1104ea8dc4b6Seschrock error = zfs_domount(vfsp, osname, cr); 1105ea8dc4b6Seschrock 1106ea8dc4b6Seschrock out: 1107fa9e4066Sahrens pn_free(&spn); 1108fa9e4066Sahrens return (error); 1109fa9e4066Sahrens } 1110fa9e4066Sahrens 1111fa9e4066Sahrens static int 1112fa9e4066Sahrens zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) 1113fa9e4066Sahrens { 1114fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1115fa9e4066Sahrens dev32_t d32; 1116a2eea2e1Sahrens uint64_t refdbytes, availbytes, usedobjs, availobjs; 1117fa9e4066Sahrens 1118fa9e4066Sahrens ZFS_ENTER(zfsvfs); 1119fa9e4066Sahrens 1120a2eea2e1Sahrens dmu_objset_space(zfsvfs->z_os, 1121a2eea2e1Sahrens &refdbytes, &availbytes, &usedobjs, &availobjs); 1122fa9e4066Sahrens 1123fa9e4066Sahrens /* 1124fa9e4066Sahrens * The underlying storage pool actually uses multiple block sizes. 1125fa9e4066Sahrens * We report the fragsize as the smallest block size we support, 1126fa9e4066Sahrens * and we report our blocksize as the filesystem's maximum blocksize. 1127fa9e4066Sahrens */ 1128fa9e4066Sahrens statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; 1129fa9e4066Sahrens statp->f_bsize = zfsvfs->z_max_blksz; 1130fa9e4066Sahrens 1131fa9e4066Sahrens /* 1132fa9e4066Sahrens * The following report "total" blocks of various kinds in the 1133fa9e4066Sahrens * file system, but reported in terms of f_frsize - the 1134fa9e4066Sahrens * "fragment" size. 1135fa9e4066Sahrens */ 1136fa9e4066Sahrens 1137a2eea2e1Sahrens statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1138a2eea2e1Sahrens statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT; 1139fa9e4066Sahrens statp->f_bavail = statp->f_bfree; /* no root reservation */ 1140fa9e4066Sahrens 1141fa9e4066Sahrens /* 1142fa9e4066Sahrens * statvfs() should really be called statufs(), because it assumes 1143fa9e4066Sahrens * static metadata. ZFS doesn't preallocate files, so the best 1144fa9e4066Sahrens * we can do is report the max that could possibly fit in f_files, 1145fa9e4066Sahrens * and that minus the number actually used in f_ffree. 1146fa9e4066Sahrens * For f_ffree, report the smaller of the number of object available 1147fa9e4066Sahrens * and the number of blocks (each object will take at least a block). 1148fa9e4066Sahrens */ 1149a2eea2e1Sahrens statp->f_ffree = MIN(availobjs, statp->f_bfree); 1150fa9e4066Sahrens statp->f_favail = statp->f_ffree; /* no "root reservation" */ 1151a2eea2e1Sahrens statp->f_files = statp->f_ffree + usedobjs; 1152fa9e4066Sahrens 1153fa9e4066Sahrens (void) cmpldev(&d32, vfsp->vfs_dev); 1154fa9e4066Sahrens statp->f_fsid = d32; 1155fa9e4066Sahrens 1156fa9e4066Sahrens /* 1157fa9e4066Sahrens * We're a zfs filesystem. 1158fa9e4066Sahrens */ 1159fa9e4066Sahrens (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); 1160fa9e4066Sahrens 1161a5be7ebbSmarks statp->f_flag = vf_to_stf(vfsp->vfs_flag); 1162fa9e4066Sahrens 1163fa9e4066Sahrens statp->f_namemax = ZFS_MAXNAMELEN; 1164fa9e4066Sahrens 1165fa9e4066Sahrens /* 1166fa9e4066Sahrens * We have all of 32 characters to stuff a string here. 1167fa9e4066Sahrens * Is there anything useful we could/should provide? 1168fa9e4066Sahrens */ 1169fa9e4066Sahrens bzero(statp->f_fstr, sizeof (statp->f_fstr)); 1170fa9e4066Sahrens 1171fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1172fa9e4066Sahrens return (0); 1173fa9e4066Sahrens } 1174fa9e4066Sahrens 1175fa9e4066Sahrens static int 1176fa9e4066Sahrens zfs_root(vfs_t *vfsp, vnode_t **vpp) 1177fa9e4066Sahrens { 1178fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1179fa9e4066Sahrens znode_t *rootzp; 1180fa9e4066Sahrens int error; 1181fa9e4066Sahrens 1182fa9e4066Sahrens ZFS_ENTER(zfsvfs); 1183fa9e4066Sahrens 1184fa9e4066Sahrens error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1185fa9e4066Sahrens if (error == 0) 1186fa9e4066Sahrens *vpp = ZTOV(rootzp); 1187fa9e4066Sahrens 1188fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1189fa9e4066Sahrens return (error); 1190fa9e4066Sahrens } 1191fa9e4066Sahrens 1192f18faf3fSek /* 1193f18faf3fSek * Teardown the zfsvfs::z_os. 1194f18faf3fSek * 1195f18faf3fSek * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' 1196f18faf3fSek * and 'z_teardown_inactive_lock' held. 1197f18faf3fSek */ 1198f18faf3fSek static int 1199f18faf3fSek zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1200f18faf3fSek { 1201f18faf3fSek objset_t *os = zfsvfs->z_os; 1202f18faf3fSek znode_t *zp, *nextzp; 1203f18faf3fSek znode_t markerzp; 1204f18faf3fSek 1205f18faf3fSek rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1206f18faf3fSek 1207f18faf3fSek if (!unmounting) { 1208f18faf3fSek /* 1209f18faf3fSek * We purge the parent filesystem's vfsp as the parent 1210f18faf3fSek * filesystem and all of its snapshots have their vnode's 1211f18faf3fSek * v_vfsp set to the parent's filesystem's vfsp. Note, 1212f18faf3fSek * 'z_parent' is self referential for non-snapshots. 1213f18faf3fSek */ 1214f18faf3fSek (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1215f18faf3fSek } 1216f18faf3fSek 1217f18faf3fSek /* 1218f18faf3fSek * Close the zil. NB: Can't close the zil while zfs_inactive 1219f18faf3fSek * threads are blocked as zil_close can call zfs_inactive. 1220f18faf3fSek */ 1221f18faf3fSek if (zfsvfs->z_log) { 1222f18faf3fSek zil_close(zfsvfs->z_log); 1223f18faf3fSek zfsvfs->z_log = NULL; 1224f18faf3fSek } 1225f18faf3fSek 1226f18faf3fSek rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 1227f18faf3fSek 1228f18faf3fSek /* 1229f18faf3fSek * If we are not unmounting (ie: online recv) and someone already 1230f18faf3fSek * unmounted this file system while we were doing the switcheroo, 1231f18faf3fSek * or a reopen of z_os failed then just bail out now. 1232f18faf3fSek */ 1233f18faf3fSek if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1234f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 1235f18faf3fSek rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1236f18faf3fSek return (EIO); 1237f18faf3fSek } 1238f18faf3fSek 1239f18faf3fSek /* 1240f18faf3fSek * At this point there are no vops active, and any new vops will 1241f18faf3fSek * fail with EIO since we have z_teardown_lock for writer (only 1242f18faf3fSek * relavent for forced unmount). 1243f18faf3fSek * 1244f18faf3fSek * Release all holds on dbufs. 1245f18faf3fSek * Note, the dmu can still callback via znode_pageout_func() 1246f18faf3fSek * which can zfs_znode_free() the znode. So we lock 1247f18faf3fSek * z_all_znodes; search the list for a held dbuf; drop the lock 1248f18faf3fSek * (we know zp can't disappear if we hold a dbuf lock) then 1249f18faf3fSek * regrab the lock and restart. 1250f18faf3fSek * 1251f18faf3fSek * Since we have to restart the search after finding each held dbuf, 1252f18faf3fSek * we do two things to speed up searching: we insert a dummy znode 1253f18faf3fSek * ('markerzp') to detect the original tail of the list, and move 1254f18faf3fSek * non-held znodes to the end of the list. Once we hit 'markerzp', 1255f18faf3fSek * we know we've looked at each znode and can break out. 1256f18faf3fSek */ 1257f18faf3fSek mutex_enter(&zfsvfs->z_znodes_lock); 1258f18faf3fSek list_insert_tail(&zfsvfs->z_all_znodes, &markerzp); 1259f18faf3fSek for (zp = list_head(&zfsvfs->z_all_znodes); zp != &markerzp; 1260f18faf3fSek zp = nextzp) { 1261f18faf3fSek nextzp = list_next(&zfsvfs->z_all_znodes, zp); 1262f18faf3fSek if (zp->z_dbuf_held) { 1263f18faf3fSek /* dbufs should only be held when force unmounting */ 1264f18faf3fSek zp->z_dbuf_held = 0; 1265f18faf3fSek mutex_exit(&zfsvfs->z_znodes_lock); 1266f18faf3fSek dmu_buf_rele(zp->z_dbuf, NULL); 1267f18faf3fSek /* Start again */ 1268f18faf3fSek mutex_enter(&zfsvfs->z_znodes_lock); 1269f18faf3fSek nextzp = list_head(&zfsvfs->z_all_znodes); 1270f18faf3fSek } else { 1271f18faf3fSek list_remove(&zfsvfs->z_all_znodes, zp); 1272f18faf3fSek list_insert_tail(&zfsvfs->z_all_znodes, zp); 1273f18faf3fSek } 1274f18faf3fSek } 1275f18faf3fSek list_remove(&zfsvfs->z_all_znodes, &markerzp); 1276f18faf3fSek mutex_exit(&zfsvfs->z_znodes_lock); 1277f18faf3fSek 1278f18faf3fSek /* 1279f18faf3fSek * If we are unmounting, set the unmounted flag and let new vops 1280f18faf3fSek * unblock. zfs_inactive will have the unmounted behavior, and all 1281f18faf3fSek * other vops will fail with EIO. 1282f18faf3fSek */ 1283f18faf3fSek if (unmounting) { 1284f18faf3fSek zfsvfs->z_unmounted = B_TRUE; 1285f18faf3fSek rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1286f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 1287f18faf3fSek } 1288f18faf3fSek 1289f18faf3fSek /* 1290f18faf3fSek * z_os will be NULL if there was an error in attempting to reopen 1291f18faf3fSek * zfsvfs, so just return as the properties had already been 1292f18faf3fSek * unregistered and cached data had been evicted before. 1293f18faf3fSek */ 1294f18faf3fSek if (zfsvfs->z_os == NULL) 1295f18faf3fSek return (0); 1296f18faf3fSek 1297f18faf3fSek /* 1298f18faf3fSek * Unregister properties. 1299f18faf3fSek */ 1300f18faf3fSek zfs_unregister_callbacks(zfsvfs); 1301f18faf3fSek 1302f18faf3fSek /* 1303f18faf3fSek * Evict cached data 1304f18faf3fSek */ 1305f18faf3fSek (void) dmu_objset_evict_dbufs(os); 1306f18faf3fSek 1307f18faf3fSek return (0); 1308f18faf3fSek } 1309f18faf3fSek 1310fa9e4066Sahrens /*ARGSUSED*/ 1311fa9e4066Sahrens static int 1312fa9e4066Sahrens zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) 1313fa9e4066Sahrens { 1314fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1315f18faf3fSek objset_t *os; 1316fa9e4066Sahrens int ret; 1317fa9e4066Sahrens 1318ecd6cf80Smarks ret = secpolicy_fs_unmount(cr, vfsp); 1319ecd6cf80Smarks if (ret) { 1320ecd6cf80Smarks ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 1321ecd6cf80Smarks ZFS_DELEG_PERM_MOUNT, cr); 1322ecd6cf80Smarks if (ret) 1323ecd6cf80Smarks return (ret); 1324ecd6cf80Smarks } 1325033f9833Sek 1326ed097989Sek /* 1327ed097989Sek * We purge the parent filesystem's vfsp as the parent filesystem 1328ed097989Sek * and all of its snapshots have their vnode's v_vfsp set to the 1329ed097989Sek * parent's filesystem's vfsp. Note, 'z_parent' is self 1330ed097989Sek * referential for non-snapshots. 1331ed097989Sek */ 1332ed097989Sek (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1333033f9833Sek 1334fa9e4066Sahrens /* 1335fa9e4066Sahrens * Unmount any snapshots mounted under .zfs before unmounting the 1336fa9e4066Sahrens * dataset itself. 1337fa9e4066Sahrens */ 1338fa9e4066Sahrens if (zfsvfs->z_ctldir != NULL && 1339ecd6cf80Smarks (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) { 1340fa9e4066Sahrens return (ret); 1341ecd6cf80Smarks } 1342fa9e4066Sahrens 134391ebeef5Sahrens if (!(fflag & MS_FORCE)) { 1344fa9e4066Sahrens /* 134591ebeef5Sahrens * Check the number of active vnodes in the file system. 134691ebeef5Sahrens * Our count is maintained in the vfs structure, but the 134791ebeef5Sahrens * number is off by 1 to indicate a hold on the vfs 134891ebeef5Sahrens * structure itself. 134991ebeef5Sahrens * 135091ebeef5Sahrens * The '.zfs' directory maintains a reference of its 135191ebeef5Sahrens * own, and any active references underneath are 135291ebeef5Sahrens * reflected in the vnode count. 1353fa9e4066Sahrens */ 135491ebeef5Sahrens if (zfsvfs->z_ctldir == NULL) { 135591ebeef5Sahrens if (vfsp->vfs_count > 1) 135691ebeef5Sahrens return (EBUSY); 135791ebeef5Sahrens } else { 135891ebeef5Sahrens if (vfsp->vfs_count > 2 || 1359f18faf3fSek zfsvfs->z_ctldir->v_count > 1) 136091ebeef5Sahrens return (EBUSY); 1361fa9e4066Sahrens } 136291ebeef5Sahrens } 1363fa9e4066Sahrens 136491ebeef5Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 136591ebeef5Sahrens 1366f18faf3fSek VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 1367f18faf3fSek os = zfsvfs->z_os; 136891ebeef5Sahrens 136991ebeef5Sahrens /* 1370f18faf3fSek * z_os will be NULL if there was an error in 1371f18faf3fSek * attempting to reopen zfsvfs. 137291ebeef5Sahrens */ 1373f18faf3fSek if (os != NULL) { 1374f18faf3fSek /* 1375f18faf3fSek * Unset the objset user_ptr. 1376f18faf3fSek */ 1377f18faf3fSek mutex_enter(&os->os->os_user_ptr_lock); 1378f18faf3fSek dmu_objset_set_user(os, NULL); 1379f18faf3fSek mutex_exit(&os->os->os_user_ptr_lock); 138091ebeef5Sahrens 1381f18faf3fSek /* 1382f18faf3fSek * Finally close the objset 1383f18faf3fSek */ 1384f18faf3fSek dmu_objset_close(os); 138591ebeef5Sahrens } 138691ebeef5Sahrens 138791ebeef5Sahrens /* 138891ebeef5Sahrens * We can now safely destroy the '.zfs' directory node. 138991ebeef5Sahrens */ 139091ebeef5Sahrens if (zfsvfs->z_ctldir != NULL) 139191ebeef5Sahrens zfsctl_destroy(zfsvfs); 1392fa9e4066Sahrens 1393fa9e4066Sahrens return (0); 1394fa9e4066Sahrens } 1395fa9e4066Sahrens 1396fa9e4066Sahrens static int 1397fa9e4066Sahrens zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1398fa9e4066Sahrens { 1399fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1400fa9e4066Sahrens znode_t *zp; 1401fa9e4066Sahrens uint64_t object = 0; 1402fa9e4066Sahrens uint64_t fid_gen = 0; 1403fa9e4066Sahrens uint64_t gen_mask; 1404fa9e4066Sahrens uint64_t zp_gen; 1405fa9e4066Sahrens int i, err; 1406fa9e4066Sahrens 1407fa9e4066Sahrens *vpp = NULL; 1408fa9e4066Sahrens 1409fa9e4066Sahrens ZFS_ENTER(zfsvfs); 1410fa9e4066Sahrens 1411fa9e4066Sahrens if (fidp->fid_len == LONG_FID_LEN) { 1412fa9e4066Sahrens zfid_long_t *zlfid = (zfid_long_t *)fidp; 1413fa9e4066Sahrens uint64_t objsetid = 0; 1414fa9e4066Sahrens uint64_t setgen = 0; 1415fa9e4066Sahrens 1416fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1417fa9e4066Sahrens objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1418fa9e4066Sahrens 1419fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1420fa9e4066Sahrens setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1421fa9e4066Sahrens 1422fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1423fa9e4066Sahrens 1424fa9e4066Sahrens err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1425fa9e4066Sahrens if (err) 1426fa9e4066Sahrens return (EINVAL); 1427fa9e4066Sahrens ZFS_ENTER(zfsvfs); 1428fa9e4066Sahrens } 1429fa9e4066Sahrens 1430fa9e4066Sahrens if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1431fa9e4066Sahrens zfid_short_t *zfid = (zfid_short_t *)fidp; 1432fa9e4066Sahrens 1433fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 1434fa9e4066Sahrens object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1435fa9e4066Sahrens 1436fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 1437fa9e4066Sahrens fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1438fa9e4066Sahrens } else { 1439fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1440fa9e4066Sahrens return (EINVAL); 1441fa9e4066Sahrens } 1442fa9e4066Sahrens 1443fa9e4066Sahrens /* A zero fid_gen means we are in the .zfs control directories */ 1444fa9e4066Sahrens if (fid_gen == 0 && 1445fa9e4066Sahrens (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1446fa9e4066Sahrens *vpp = zfsvfs->z_ctldir; 1447fa9e4066Sahrens ASSERT(*vpp != NULL); 1448fa9e4066Sahrens if (object == ZFSCTL_INO_SNAPDIR) { 1449fa9e4066Sahrens VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1450*da6c28aaSamw 0, NULL, NULL, NULL, NULL, NULL) == 0); 1451fa9e4066Sahrens } else { 1452fa9e4066Sahrens VN_HOLD(*vpp); 1453fa9e4066Sahrens } 1454fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1455fa9e4066Sahrens return (0); 1456fa9e4066Sahrens } 1457fa9e4066Sahrens 1458fa9e4066Sahrens gen_mask = -1ULL >> (64 - 8 * i); 1459fa9e4066Sahrens 1460fa9e4066Sahrens dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1461fa9e4066Sahrens if (err = zfs_zget(zfsvfs, object, &zp)) { 1462fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1463fa9e4066Sahrens return (err); 1464fa9e4066Sahrens } 1465fa9e4066Sahrens zp_gen = zp->z_phys->zp_gen & gen_mask; 1466fa9e4066Sahrens if (zp_gen == 0) 1467fa9e4066Sahrens zp_gen = 1; 1468893a6d32Sahrens if (zp->z_unlinked || zp_gen != fid_gen) { 1469fa9e4066Sahrens dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1470fa9e4066Sahrens VN_RELE(ZTOV(zp)); 1471fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1472fa9e4066Sahrens return (EINVAL); 1473fa9e4066Sahrens } 1474fa9e4066Sahrens 1475fa9e4066Sahrens *vpp = ZTOV(zp); 1476fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1477fa9e4066Sahrens return (0); 1478fa9e4066Sahrens } 1479fa9e4066Sahrens 1480f18faf3fSek /* 1481f18faf3fSek * Block out VOPs and close zfsvfs_t::z_os 1482f18faf3fSek * 1483f18faf3fSek * Note, if successful, then we return with the 'z_teardown_lock' and 1484f18faf3fSek * 'z_teardown_inactive_lock' write held. 1485f18faf3fSek */ 1486f18faf3fSek int 1487f18faf3fSek zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *mode) 1488f18faf3fSek { 1489f18faf3fSek int error; 1490f18faf3fSek 1491f18faf3fSek if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1492f18faf3fSek return (error); 1493f18faf3fSek 1494f18faf3fSek *mode = zfsvfs->z_os->os_mode; 1495f18faf3fSek dmu_objset_name(zfsvfs->z_os, name); 1496f18faf3fSek dmu_objset_close(zfsvfs->z_os); 1497f18faf3fSek 1498f18faf3fSek return (0); 1499f18faf3fSek } 1500f18faf3fSek 1501f18faf3fSek /* 1502f18faf3fSek * Reopen zfsvfs_t::z_os and release VOPs. 1503f18faf3fSek */ 1504f18faf3fSek int 1505f18faf3fSek zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) 1506f18faf3fSek { 1507f18faf3fSek int err; 1508f18faf3fSek 1509f18faf3fSek ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); 1510f18faf3fSek ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 1511f18faf3fSek 1512f18faf3fSek err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 1513f18faf3fSek if (err) { 1514f18faf3fSek zfsvfs->z_os = NULL; 1515f18faf3fSek } else { 1516f18faf3fSek znode_t *zp; 1517f18faf3fSek 1518f18faf3fSek VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 1519f18faf3fSek 1520f18faf3fSek /* 1521f18faf3fSek * Attempt to re-establish all the active znodes with 1522f18faf3fSek * their dbufs. If a zfs_rezget() fails, then we'll let 1523f18faf3fSek * any potential callers discover that via ZFS_ENTER_VERIFY_VP 1524f18faf3fSek * when they try to use their znode. 1525f18faf3fSek */ 1526f18faf3fSek mutex_enter(&zfsvfs->z_znodes_lock); 1527f18faf3fSek for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1528f18faf3fSek zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1529f18faf3fSek ASSERT(!zp->z_dbuf_held); 1530f18faf3fSek (void) zfs_rezget(zp); 1531f18faf3fSek } 1532f18faf3fSek mutex_exit(&zfsvfs->z_znodes_lock); 1533f18faf3fSek 1534f18faf3fSek } 1535f18faf3fSek 1536f18faf3fSek /* release the VOPs */ 1537f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 1538f18faf3fSek rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1539f18faf3fSek 1540f18faf3fSek if (err) { 1541f18faf3fSek /* 1542f18faf3fSek * Since we couldn't reopen zfsvfs::z_os, force 1543f18faf3fSek * unmount this file system. 1544f18faf3fSek */ 1545f18faf3fSek if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) 1546f18faf3fSek (void) dounmount(zfsvfs->z_vfs, MS_FORCE, CRED()); 1547f18faf3fSek } 1548f18faf3fSek return (err); 1549f18faf3fSek } 1550f18faf3fSek 1551fa9e4066Sahrens static void 1552fa9e4066Sahrens zfs_freevfs(vfs_t *vfsp) 1553fa9e4066Sahrens { 1554fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1555c25056deSgw int i; 1556c25056deSgw 1557c25056deSgw for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1558c25056deSgw mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1559fa9e4066Sahrens 156091ebeef5Sahrens mutex_destroy(&zfsvfs->z_znodes_lock); 1561c25056deSgw list_destroy(&zfsvfs->z_all_znodes); 1562f18faf3fSek rrw_destroy(&zfsvfs->z_teardown_lock); 1563f18faf3fSek rw_destroy(&zfsvfs->z_teardown_inactive_lock); 1564*da6c28aaSamw zfs_fuid_destroy(zfsvfs); 1565fa9e4066Sahrens kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1566fa9e4066Sahrens 1567fa9e4066Sahrens atomic_add_32(&zfs_active_fs_count, -1); 1568fa9e4066Sahrens } 1569fa9e4066Sahrens 1570fa9e4066Sahrens /* 1571fa9e4066Sahrens * VFS_INIT() initialization. Note that there is no VFS_FINI(), 1572fa9e4066Sahrens * so we can't safely do any non-idempotent initialization here. 1573fa9e4066Sahrens * Leave that to zfs_init() and zfs_fini(), which are called 1574fa9e4066Sahrens * from the module's _init() and _fini() entry points. 1575fa9e4066Sahrens */ 1576fa9e4066Sahrens /*ARGSUSED*/ 1577fa9e4066Sahrens static int 1578fa9e4066Sahrens zfs_vfsinit(int fstype, char *name) 1579fa9e4066Sahrens { 1580fa9e4066Sahrens int error; 1581fa9e4066Sahrens 1582fa9e4066Sahrens zfsfstype = fstype; 1583fa9e4066Sahrens 1584fa9e4066Sahrens /* 1585fa9e4066Sahrens * Setup vfsops and vnodeops tables. 1586fa9e4066Sahrens */ 1587fa9e4066Sahrens error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); 1588fa9e4066Sahrens if (error != 0) { 1589fa9e4066Sahrens cmn_err(CE_WARN, "zfs: bad vfs ops template"); 1590fa9e4066Sahrens } 1591fa9e4066Sahrens 1592fa9e4066Sahrens error = zfs_create_op_tables(); 1593fa9e4066Sahrens if (error) { 1594fa9e4066Sahrens zfs_remove_op_tables(); 1595fa9e4066Sahrens cmn_err(CE_WARN, "zfs: bad vnode ops template"); 1596fa9e4066Sahrens (void) vfs_freevfsops_by_type(zfsfstype); 1597fa9e4066Sahrens return (error); 1598fa9e4066Sahrens } 1599fa9e4066Sahrens 1600fa9e4066Sahrens mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); 1601fa9e4066Sahrens 1602fa9e4066Sahrens /* 1603a0965f35Sbonwick * Unique major number for all zfs mounts. 1604a0965f35Sbonwick * If we run out of 32-bit minors, we'll getudev() another major. 1605fa9e4066Sahrens */ 1606a0965f35Sbonwick zfs_major = ddi_name_to_major(ZFS_DRIVER); 1607a0965f35Sbonwick zfs_minor = ZFS_MIN_MINOR; 1608fa9e4066Sahrens 1609fa9e4066Sahrens return (0); 1610fa9e4066Sahrens } 1611fa9e4066Sahrens 1612fa9e4066Sahrens void 1613fa9e4066Sahrens zfs_init(void) 1614fa9e4066Sahrens { 1615fa9e4066Sahrens /* 1616fa9e4066Sahrens * Initialize .zfs directory structures 1617fa9e4066Sahrens */ 1618fa9e4066Sahrens zfsctl_init(); 1619fa9e4066Sahrens 1620fa9e4066Sahrens /* 1621fa9e4066Sahrens * Initialize znode cache, vnode ops, etc... 1622fa9e4066Sahrens */ 1623fa9e4066Sahrens zfs_znode_init(); 1624fa9e4066Sahrens } 1625fa9e4066Sahrens 1626fa9e4066Sahrens void 1627fa9e4066Sahrens zfs_fini(void) 1628fa9e4066Sahrens { 1629fa9e4066Sahrens zfsctl_fini(); 1630fa9e4066Sahrens zfs_znode_fini(); 1631fa9e4066Sahrens } 1632fa9e4066Sahrens 1633fa9e4066Sahrens int 1634fa9e4066Sahrens zfs_busy(void) 1635fa9e4066Sahrens { 1636fa9e4066Sahrens return (zfs_active_fs_count != 0); 1637fa9e4066Sahrens } 1638fa9e4066Sahrens 1639e7437265Sahrens int 1640bd00f61bSrm zfs_get_version(objset_t *os, uint64_t *version) 1641e7437265Sahrens { 1642e7437265Sahrens int error; 1643e7437265Sahrens 1644bd00f61bSrm error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, version); 1645e7437265Sahrens return (error); 1646e7437265Sahrens } 1647e7437265Sahrens 1648e7437265Sahrens int 1649e7437265Sahrens zfs_set_version(const char *name, uint64_t newvers) 1650e7437265Sahrens { 1651e7437265Sahrens int error; 1652e7437265Sahrens objset_t *os; 1653e7437265Sahrens dmu_tx_t *tx; 1654e7437265Sahrens uint64_t curvers; 1655e7437265Sahrens 1656e7437265Sahrens /* 1657e7437265Sahrens * XXX for now, require that the filesystem be unmounted. Would 1658e7437265Sahrens * be nice to find the zfsvfs_t and just update that if 1659e7437265Sahrens * possible. 1660e7437265Sahrens */ 1661e7437265Sahrens 1662e7437265Sahrens if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 1663e7437265Sahrens return (EINVAL); 1664e7437265Sahrens 1665e7437265Sahrens error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_PRIMARY, &os); 1666e7437265Sahrens if (error) 1667e7437265Sahrens return (error); 1668e7437265Sahrens 1669e7437265Sahrens error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 1670e7437265Sahrens 8, 1, &curvers); 1671e7437265Sahrens if (error) 1672e7437265Sahrens goto out; 1673e7437265Sahrens if (newvers < curvers) { 1674e7437265Sahrens error = EINVAL; 1675e7437265Sahrens goto out; 1676e7437265Sahrens } 1677e7437265Sahrens 1678e7437265Sahrens tx = dmu_tx_create(os); 1679e7437265Sahrens dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR); 1680e7437265Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 1681e7437265Sahrens if (error) { 1682e7437265Sahrens dmu_tx_abort(tx); 1683e7437265Sahrens goto out; 1684e7437265Sahrens } 1685e7437265Sahrens error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, 1686e7437265Sahrens &newvers, tx); 1687e7437265Sahrens 1688e7437265Sahrens spa_history_internal_log(LOG_DS_UPGRADE, 1689e7437265Sahrens dmu_objset_spa(os), tx, CRED(), 1690e7437265Sahrens "oldver=%llu newver=%llu dataset = %llu", curvers, newvers, 1691e7437265Sahrens dmu_objset_id(os)); 1692e7437265Sahrens dmu_tx_commit(tx); 1693e7437265Sahrens 1694e7437265Sahrens out: 1695e7437265Sahrens dmu_objset_close(os); 1696e7437265Sahrens return (error); 1697e7437265Sahrens } 1698e7437265Sahrens 1699fa9e4066Sahrens static vfsdef_t vfw = { 1700fa9e4066Sahrens VFSDEF_VERSION, 1701fa9e4066Sahrens MNTTYPE_ZFS, 1702fa9e4066Sahrens zfs_vfsinit, 1703*da6c28aaSamw VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS| 1704*da6c28aaSamw VSW_XID, 1705fa9e4066Sahrens &zfs_mntopts 1706fa9e4066Sahrens }; 1707fa9e4066Sahrens 1708fa9e4066Sahrens struct modlfs zfs_modlfs = { 1709e7437265Sahrens &mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw 1710fa9e4066Sahrens }; 1711