1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5033f9833Sek * Common Development and Distribution License (the "License"). 6033f9833Sek * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22dc7cd546SMark Shellenbaum * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23fa9e4066Sahrens */ 24fa9e4066Sahrens 2555da60b9SMark J Musante /* Portions Copyright 2010 Robert Milkowski */ 2655da60b9SMark J Musante 27fa9e4066Sahrens #include <sys/types.h> 28fa9e4066Sahrens #include <sys/param.h> 29fa9e4066Sahrens #include <sys/systm.h> 30fa9e4066Sahrens #include <sys/sysmacros.h> 31fa9e4066Sahrens #include <sys/kmem.h> 32fa9e4066Sahrens #include <sys/pathname.h> 33fa9e4066Sahrens #include <sys/vnode.h> 34fa9e4066Sahrens #include <sys/vfs.h> 35aa59c4cbSrsb #include <sys/vfs_opreg.h> 36fa9e4066Sahrens #include <sys/mntent.h> 37fa9e4066Sahrens #include <sys/mount.h> 38fa9e4066Sahrens #include <sys/cmn_err.h> 39fa9e4066Sahrens #include "fs/fs_subr.h" 40fa9e4066Sahrens #include <sys/zfs_znode.h> 41893a6d32Sahrens #include <sys/zfs_dir.h> 42fa9e4066Sahrens #include <sys/zil.h> 43fa9e4066Sahrens #include <sys/fs/zfs.h> 44fa9e4066Sahrens #include <sys/dmu.h> 45fa9e4066Sahrens #include <sys/dsl_prop.h> 46b1b8ab34Slling #include <sys/dsl_dataset.h> 47ecd6cf80Smarks #include <sys/dsl_deleg.h> 48fa9e4066Sahrens #include <sys/spa.h> 49fa9e4066Sahrens #include <sys/zap.h> 500a586ceaSMark Shellenbaum #include <sys/sa.h> 51fa9e4066Sahrens #include <sys/varargs.h> 52fa9e4066Sahrens #include <sys/policy.h> 53fa9e4066Sahrens #include <sys/atomic.h> 54fa9e4066Sahrens #include <sys/mkdev.h> 55fa9e4066Sahrens #include <sys/modctl.h> 56ecd6cf80Smarks #include <sys/refstr.h> 57fa9e4066Sahrens #include <sys/zfs_ioctl.h> 58fa9e4066Sahrens #include <sys/zfs_ctldir.h> 59da6c28aaSamw #include <sys/zfs_fuid.h> 60ea8dc4b6Seschrock #include <sys/bootconf.h> 61a0965f35Sbonwick #include <sys/sunddi.h> 62033f9833Sek #include <sys/dnlc.h> 63f18faf3fSek #include <sys/dmu_objset.h> 64e7cbe64fSgw #include <sys/spa_boot.h> 650a586ceaSMark Shellenbaum #include <sys/sa.h> 660a586ceaSMark Shellenbaum #include "zfs_comutil.h" 67fa9e4066Sahrens 68fa9e4066Sahrens int zfsfstype; 69fa9e4066Sahrens vfsops_t *zfs_vfsops = NULL; 70a0965f35Sbonwick static major_t zfs_major; 71fa9e4066Sahrens static minor_t zfs_minor; 72fa9e4066Sahrens static kmutex_t zfs_dev_mtx; 73fa9e4066Sahrens 7454d692b7SGeorge Wilson extern int sys_shutdown; 7554d692b7SGeorge Wilson 76fa9e4066Sahrens static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); 77fa9e4066Sahrens static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); 78ea8dc4b6Seschrock static int zfs_mountroot(vfs_t *vfsp, enum whymountroot); 79fa9e4066Sahrens static int zfs_root(vfs_t *vfsp, vnode_t **vpp); 80fa9e4066Sahrens static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); 81fa9e4066Sahrens static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); 82fa9e4066Sahrens static void zfs_freevfs(vfs_t *vfsp); 83fa9e4066Sahrens 84fa9e4066Sahrens static const fs_operation_def_t zfs_vfsops_template[] = { 85aa59c4cbSrsb VFSNAME_MOUNT, { .vfs_mount = zfs_mount }, 86aa59c4cbSrsb VFSNAME_MOUNTROOT, { .vfs_mountroot = zfs_mountroot }, 87aa59c4cbSrsb VFSNAME_UNMOUNT, { .vfs_unmount = zfs_umount }, 88aa59c4cbSrsb VFSNAME_ROOT, { .vfs_root = zfs_root }, 89aa59c4cbSrsb VFSNAME_STATVFS, { .vfs_statvfs = zfs_statvfs }, 90aa59c4cbSrsb VFSNAME_SYNC, { .vfs_sync = zfs_sync }, 91aa59c4cbSrsb VFSNAME_VGET, { .vfs_vget = zfs_vget }, 92aa59c4cbSrsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 93aa59c4cbSrsb NULL, NULL 94fa9e4066Sahrens }; 95fa9e4066Sahrens 96fa9e4066Sahrens static const fs_operation_def_t zfs_vfsops_eio_template[] = { 97aa59c4cbSrsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 98aa59c4cbSrsb NULL, NULL 99fa9e4066Sahrens }; 100fa9e4066Sahrens 101fa9e4066Sahrens /* 102fa9e4066Sahrens * We need to keep a count of active fs's. 103fa9e4066Sahrens * This is necessary to prevent our module 104fa9e4066Sahrens * from being unloaded after a umount -f 105fa9e4066Sahrens */ 106fa9e4066Sahrens static uint32_t zfs_active_fs_count = 0; 107fa9e4066Sahrens 108fa9e4066Sahrens static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; 109fa9e4066Sahrens static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; 1107b55fa8eSck static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 1117b55fa8eSck static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 112fa9e4066Sahrens 1137b55fa8eSck /* 114b510d378Slling * MO_DEFAULT is not used since the default value is determined 115b510d378Slling * by the equivalent property. 1167b55fa8eSck */ 117fa9e4066Sahrens static mntopt_t mntopts[] = { 1187b55fa8eSck { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL }, 1197b55fa8eSck { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL }, 120b510d378Slling { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL }, 121fa9e4066Sahrens { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } 122fa9e4066Sahrens }; 123fa9e4066Sahrens 124fa9e4066Sahrens static mntopts_t zfs_mntopts = { 125fa9e4066Sahrens sizeof (mntopts) / sizeof (mntopt_t), 126fa9e4066Sahrens mntopts 127fa9e4066Sahrens }; 128fa9e4066Sahrens 129fa9e4066Sahrens /*ARGSUSED*/ 130fa9e4066Sahrens int 131fa9e4066Sahrens zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 132fa9e4066Sahrens { 133fa9e4066Sahrens /* 134fa9e4066Sahrens * Data integrity is job one. We don't want a compromised kernel 135fa9e4066Sahrens * writing to the storage pool, so we never sync during panic. 136fa9e4066Sahrens */ 137fa9e4066Sahrens if (panicstr) 138fa9e4066Sahrens return (0); 139fa9e4066Sahrens 140fa9e4066Sahrens /* 141fa9e4066Sahrens * SYNC_ATTR is used by fsflush() to force old filesystems like UFS 142fa9e4066Sahrens * to sync metadata, which they would otherwise cache indefinitely. 143fa9e4066Sahrens * Semantically, the only requirement is that the sync be initiated. 144fa9e4066Sahrens * The DMU syncs out txgs frequently, so there's nothing to do. 145fa9e4066Sahrens */ 146fa9e4066Sahrens if (flag & SYNC_ATTR) 147fa9e4066Sahrens return (0); 148fa9e4066Sahrens 149fa9e4066Sahrens if (vfsp != NULL) { 150fa9e4066Sahrens /* 151fa9e4066Sahrens * Sync a specific filesystem. 152fa9e4066Sahrens */ 153fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 15454d692b7SGeorge Wilson dsl_pool_t *dp; 155fa9e4066Sahrens 156fa9e4066Sahrens ZFS_ENTER(zfsvfs); 15754d692b7SGeorge Wilson dp = dmu_objset_pool(zfsvfs->z_os); 15854d692b7SGeorge Wilson 15954d692b7SGeorge Wilson /* 16054d692b7SGeorge Wilson * If the system is shutting down, then skip any 16154d692b7SGeorge Wilson * filesystems which may exist on a suspended pool. 16254d692b7SGeorge Wilson */ 16354d692b7SGeorge Wilson if (sys_shutdown && spa_suspended(dp->dp_spa)) { 16454d692b7SGeorge Wilson ZFS_EXIT(zfsvfs); 16554d692b7SGeorge Wilson return (0); 16654d692b7SGeorge Wilson } 16754d692b7SGeorge Wilson 168fa9e4066Sahrens if (zfsvfs->z_log != NULL) 169*5002558fSNeil Perrin zil_commit(zfsvfs->z_log, 0); 17055da60b9SMark J Musante 171fa9e4066Sahrens ZFS_EXIT(zfsvfs); 172fa9e4066Sahrens } else { 173fa9e4066Sahrens /* 174fa9e4066Sahrens * Sync all ZFS filesystems. This is what happens when you 175fa9e4066Sahrens * run sync(1M). Unlike other filesystems, ZFS honors the 176fa9e4066Sahrens * request by waiting for all pools to commit all dirty data. 177fa9e4066Sahrens */ 178fa9e4066Sahrens spa_sync_allpools(); 179fa9e4066Sahrens } 180fa9e4066Sahrens 181fa9e4066Sahrens return (0); 182fa9e4066Sahrens } 183fa9e4066Sahrens 184ea8dc4b6Seschrock static int 185ea8dc4b6Seschrock zfs_create_unique_device(dev_t *dev) 186ea8dc4b6Seschrock { 187ea8dc4b6Seschrock major_t new_major; 188ea8dc4b6Seschrock 189ea8dc4b6Seschrock do { 190ea8dc4b6Seschrock ASSERT3U(zfs_minor, <=, MAXMIN32); 191ea8dc4b6Seschrock minor_t start = zfs_minor; 192ea8dc4b6Seschrock do { 193ea8dc4b6Seschrock mutex_enter(&zfs_dev_mtx); 194ea8dc4b6Seschrock if (zfs_minor >= MAXMIN32) { 195ea8dc4b6Seschrock /* 196ea8dc4b6Seschrock * If we're still using the real major 197ea8dc4b6Seschrock * keep out of /dev/zfs and /dev/zvol minor 198ea8dc4b6Seschrock * number space. If we're using a getudev()'ed 199ea8dc4b6Seschrock * major number, we can use all of its minors. 200ea8dc4b6Seschrock */ 201ea8dc4b6Seschrock if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) 202ea8dc4b6Seschrock zfs_minor = ZFS_MIN_MINOR; 203ea8dc4b6Seschrock else 204ea8dc4b6Seschrock zfs_minor = 0; 205ea8dc4b6Seschrock } else { 206ea8dc4b6Seschrock zfs_minor++; 207ea8dc4b6Seschrock } 208ea8dc4b6Seschrock *dev = makedevice(zfs_major, zfs_minor); 209ea8dc4b6Seschrock mutex_exit(&zfs_dev_mtx); 210ea8dc4b6Seschrock } while (vfs_devismounted(*dev) && zfs_minor != start); 211ea8dc4b6Seschrock if (zfs_minor == start) { 212ea8dc4b6Seschrock /* 213ea8dc4b6Seschrock * We are using all ~262,000 minor numbers for the 214ea8dc4b6Seschrock * current major number. Create a new major number. 215ea8dc4b6Seschrock */ 216ea8dc4b6Seschrock if ((new_major = getudev()) == (major_t)-1) { 217ea8dc4b6Seschrock cmn_err(CE_WARN, 218ea8dc4b6Seschrock "zfs_mount: Can't get unique major " 219ea8dc4b6Seschrock "device number."); 220ea8dc4b6Seschrock return (-1); 221ea8dc4b6Seschrock } 222ea8dc4b6Seschrock mutex_enter(&zfs_dev_mtx); 223ea8dc4b6Seschrock zfs_major = new_major; 224ea8dc4b6Seschrock zfs_minor = 0; 225ea8dc4b6Seschrock 226ea8dc4b6Seschrock mutex_exit(&zfs_dev_mtx); 227ea8dc4b6Seschrock } else { 228ea8dc4b6Seschrock break; 229ea8dc4b6Seschrock } 230ea8dc4b6Seschrock /* CONSTANTCONDITION */ 231ea8dc4b6Seschrock } while (1); 232ea8dc4b6Seschrock 233ea8dc4b6Seschrock return (0); 234ea8dc4b6Seschrock } 235ea8dc4b6Seschrock 236fa9e4066Sahrens static void 237fa9e4066Sahrens atime_changed_cb(void *arg, uint64_t newval) 238fa9e4066Sahrens { 239fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 240fa9e4066Sahrens 241fa9e4066Sahrens if (newval == TRUE) { 242fa9e4066Sahrens zfsvfs->z_atime = TRUE; 243fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 244fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 245fa9e4066Sahrens } else { 246fa9e4066Sahrens zfsvfs->z_atime = FALSE; 247fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 248fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 249fa9e4066Sahrens } 250fa9e4066Sahrens } 251fa9e4066Sahrens 2527b55fa8eSck static void 2537b55fa8eSck xattr_changed_cb(void *arg, uint64_t newval) 2547b55fa8eSck { 2557b55fa8eSck zfsvfs_t *zfsvfs = arg; 2567b55fa8eSck 2577b55fa8eSck if (newval == TRUE) { 2587b55fa8eSck /* XXX locking on vfs_flag? */ 2597b55fa8eSck zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 2607b55fa8eSck vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 2617b55fa8eSck vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 2627b55fa8eSck } else { 2637b55fa8eSck /* XXX locking on vfs_flag? */ 2647b55fa8eSck zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 2657b55fa8eSck vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 2667b55fa8eSck vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 2677b55fa8eSck } 2687b55fa8eSck } 2697b55fa8eSck 270fa9e4066Sahrens static void 271fa9e4066Sahrens blksz_changed_cb(void *arg, uint64_t newval) 272fa9e4066Sahrens { 273fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 274fa9e4066Sahrens 275fa9e4066Sahrens if (newval < SPA_MINBLOCKSIZE || 276fa9e4066Sahrens newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 277fa9e4066Sahrens newval = SPA_MAXBLOCKSIZE; 278fa9e4066Sahrens 279fa9e4066Sahrens zfsvfs->z_max_blksz = newval; 280fa9e4066Sahrens zfsvfs->z_vfs->vfs_bsize = newval; 281fa9e4066Sahrens } 282fa9e4066Sahrens 283fa9e4066Sahrens static void 284fa9e4066Sahrens readonly_changed_cb(void *arg, uint64_t newval) 285fa9e4066Sahrens { 286fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 287fa9e4066Sahrens 288fa9e4066Sahrens if (newval) { 289fa9e4066Sahrens /* XXX locking on vfs_flag? */ 290fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 291fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 292fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 293fa9e4066Sahrens } else { 294fa9e4066Sahrens /* XXX locking on vfs_flag? */ 295fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 296fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 297fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 298fa9e4066Sahrens } 299fa9e4066Sahrens } 300fa9e4066Sahrens 301fa9e4066Sahrens static void 302fa9e4066Sahrens devices_changed_cb(void *arg, uint64_t newval) 303fa9e4066Sahrens { 304fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 305fa9e4066Sahrens 306fa9e4066Sahrens if (newval == FALSE) { 307fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; 308fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); 309fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); 310fa9e4066Sahrens } else { 311fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; 312fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); 313fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); 314fa9e4066Sahrens } 315fa9e4066Sahrens } 316fa9e4066Sahrens 317fa9e4066Sahrens static void 318fa9e4066Sahrens setuid_changed_cb(void *arg, uint64_t newval) 319fa9e4066Sahrens { 320fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 321fa9e4066Sahrens 322fa9e4066Sahrens if (newval == FALSE) { 323fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 324fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 325fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 326fa9e4066Sahrens } else { 327fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 328fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 329fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 330fa9e4066Sahrens } 331fa9e4066Sahrens } 332fa9e4066Sahrens 333fa9e4066Sahrens static void 334fa9e4066Sahrens exec_changed_cb(void *arg, uint64_t newval) 335fa9e4066Sahrens { 336fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 337fa9e4066Sahrens 338fa9e4066Sahrens if (newval == FALSE) { 339fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 340fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 341fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 342fa9e4066Sahrens } else { 343fa9e4066Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 344fa9e4066Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 345fa9e4066Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 346fa9e4066Sahrens } 347fa9e4066Sahrens } 348fa9e4066Sahrens 349da6c28aaSamw /* 350da6c28aaSamw * The nbmand mount option can be changed at mount time. 351da6c28aaSamw * We can't allow it to be toggled on live file systems or incorrect 352da6c28aaSamw * behavior may be seen from cifs clients 353da6c28aaSamw * 354da6c28aaSamw * This property isn't registered via dsl_prop_register(), but this callback 355da6c28aaSamw * will be called when a file system is first mounted 356da6c28aaSamw */ 357da6c28aaSamw static void 358da6c28aaSamw nbmand_changed_cb(void *arg, uint64_t newval) 359da6c28aaSamw { 360da6c28aaSamw zfsvfs_t *zfsvfs = arg; 361da6c28aaSamw if (newval == FALSE) { 362da6c28aaSamw vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 363da6c28aaSamw vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 364da6c28aaSamw } else { 365da6c28aaSamw vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 366da6c28aaSamw vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 367da6c28aaSamw } 368da6c28aaSamw } 369da6c28aaSamw 370fa9e4066Sahrens static void 371fa9e4066Sahrens snapdir_changed_cb(void *arg, uint64_t newval) 372fa9e4066Sahrens { 373fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 374fa9e4066Sahrens 375fa9e4066Sahrens zfsvfs->z_show_ctldir = newval; 376fa9e4066Sahrens } 377fa9e4066Sahrens 378da6c28aaSamw static void 379da6c28aaSamw vscan_changed_cb(void *arg, uint64_t newval) 380da6c28aaSamw { 381da6c28aaSamw zfsvfs_t *zfsvfs = arg; 382da6c28aaSamw 383da6c28aaSamw zfsvfs->z_vscan = newval; 384da6c28aaSamw } 385da6c28aaSamw 386fa9e4066Sahrens static void 387fa9e4066Sahrens acl_inherit_changed_cb(void *arg, uint64_t newval) 388fa9e4066Sahrens { 389fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 390fa9e4066Sahrens 391fa9e4066Sahrens zfsvfs->z_acl_inherit = newval; 392fa9e4066Sahrens } 393fa9e4066Sahrens 394ea8dc4b6Seschrock static int 395ea8dc4b6Seschrock zfs_register_callbacks(vfs_t *vfsp) 396ea8dc4b6Seschrock { 397ea8dc4b6Seschrock struct dsl_dataset *ds = NULL; 398ea8dc4b6Seschrock objset_t *os = NULL; 399ea8dc4b6Seschrock zfsvfs_t *zfsvfs = NULL; 400da6c28aaSamw uint64_t nbmand; 401da6c28aaSamw int readonly, do_readonly = B_FALSE; 402da6c28aaSamw int setuid, do_setuid = B_FALSE; 403da6c28aaSamw int exec, do_exec = B_FALSE; 404da6c28aaSamw int devices, do_devices = B_FALSE; 405da6c28aaSamw int xattr, do_xattr = B_FALSE; 406da6c28aaSamw int atime, do_atime = B_FALSE; 407ea8dc4b6Seschrock int error = 0; 408ea8dc4b6Seschrock 409ea8dc4b6Seschrock ASSERT(vfsp); 410ea8dc4b6Seschrock zfsvfs = vfsp->vfs_data; 411ea8dc4b6Seschrock ASSERT(zfsvfs); 412ea8dc4b6Seschrock os = zfsvfs->z_os; 413fa9e4066Sahrens 414fa9e4066Sahrens /* 415ea8dc4b6Seschrock * The act of registering our callbacks will destroy any mount 416ea8dc4b6Seschrock * options we may have. In order to enable temporary overrides 4177b55fa8eSck * of mount options, we stash away the current values and 418ea8dc4b6Seschrock * restore them after we register the callbacks. 419fa9e4066Sahrens */ 420ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 421ea8dc4b6Seschrock readonly = B_TRUE; 422ea8dc4b6Seschrock do_readonly = B_TRUE; 423ea8dc4b6Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 424ea8dc4b6Seschrock readonly = B_FALSE; 425ea8dc4b6Seschrock do_readonly = B_TRUE; 426ea8dc4b6Seschrock } 427ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 428ea8dc4b6Seschrock devices = B_FALSE; 429ea8dc4b6Seschrock setuid = B_FALSE; 430ea8dc4b6Seschrock do_devices = B_TRUE; 431ea8dc4b6Seschrock do_setuid = B_TRUE; 432ea8dc4b6Seschrock } else { 433ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 434ea8dc4b6Seschrock devices = B_FALSE; 435ea8dc4b6Seschrock do_devices = B_TRUE; 436b1b8ab34Slling } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) { 437ea8dc4b6Seschrock devices = B_TRUE; 438ea8dc4b6Seschrock do_devices = B_TRUE; 439fa9e4066Sahrens } 440fa9e4066Sahrens 441ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 442ea8dc4b6Seschrock setuid = B_FALSE; 443ea8dc4b6Seschrock do_setuid = B_TRUE; 444ea8dc4b6Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 445ea8dc4b6Seschrock setuid = B_TRUE; 446ea8dc4b6Seschrock do_setuid = B_TRUE; 447fa9e4066Sahrens } 448ea8dc4b6Seschrock } 449ea8dc4b6Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 450ea8dc4b6Seschrock exec = B_FALSE; 451ea8dc4b6Seschrock do_exec = B_TRUE; 452ea8dc4b6Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 453ea8dc4b6Seschrock exec = B_TRUE; 454ea8dc4b6Seschrock do_exec = B_TRUE; 455fa9e4066Sahrens } 4567b55fa8eSck if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 4577b55fa8eSck xattr = B_FALSE; 4587b55fa8eSck do_xattr = B_TRUE; 4597b55fa8eSck } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 4607b55fa8eSck xattr = B_TRUE; 4617b55fa8eSck do_xattr = B_TRUE; 4627b55fa8eSck } 463b510d378Slling if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 464b510d378Slling atime = B_FALSE; 465b510d378Slling do_atime = B_TRUE; 466b510d378Slling } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 467b510d378Slling atime = B_TRUE; 468b510d378Slling do_atime = B_TRUE; 469b510d378Slling } 470fa9e4066Sahrens 471da6c28aaSamw /* 472da6c28aaSamw * nbmand is a special property. It can only be changed at 473da6c28aaSamw * mount time. 474da6c28aaSamw * 475da6c28aaSamw * This is weird, but it is documented to only be changeable 476da6c28aaSamw * at mount time. 477da6c28aaSamw */ 478da6c28aaSamw if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 479da6c28aaSamw nbmand = B_FALSE; 480da6c28aaSamw } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 481da6c28aaSamw nbmand = B_TRUE; 482da6c28aaSamw } else { 483da6c28aaSamw char osname[MAXNAMELEN]; 484da6c28aaSamw 485da6c28aaSamw dmu_objset_name(os, osname); 486da6c28aaSamw if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, 487bb0ade09Sahrens NULL)) { 488bb0ade09Sahrens return (error); 489bb0ade09Sahrens } 490da6c28aaSamw } 491da6c28aaSamw 492fa9e4066Sahrens /* 493ea8dc4b6Seschrock * Register property callbacks. 494ea8dc4b6Seschrock * 495ea8dc4b6Seschrock * It would probably be fine to just check for i/o error from 496ea8dc4b6Seschrock * the first prop_register(), but I guess I like to go 497ea8dc4b6Seschrock * overboard... 498fa9e4066Sahrens */ 499ea8dc4b6Seschrock ds = dmu_objset_ds(os); 500ea8dc4b6Seschrock error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 5017b55fa8eSck error = error ? error : dsl_prop_register(ds, 5027b55fa8eSck "xattr", xattr_changed_cb, zfsvfs); 503ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 504ea8dc4b6Seschrock "recordsize", blksz_changed_cb, zfsvfs); 505ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 506ea8dc4b6Seschrock "readonly", readonly_changed_cb, zfsvfs); 507ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 508ea8dc4b6Seschrock "devices", devices_changed_cb, zfsvfs); 509ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 510ea8dc4b6Seschrock "setuid", setuid_changed_cb, zfsvfs); 511ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 512ea8dc4b6Seschrock "exec", exec_changed_cb, zfsvfs); 513ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 514ea8dc4b6Seschrock "snapdir", snapdir_changed_cb, zfsvfs); 515ea8dc4b6Seschrock error = error ? error : dsl_prop_register(ds, 516ea8dc4b6Seschrock "aclinherit", acl_inherit_changed_cb, zfsvfs); 517da6c28aaSamw error = error ? error : dsl_prop_register(ds, 518da6c28aaSamw "vscan", vscan_changed_cb, zfsvfs); 519ea8dc4b6Seschrock if (error) 520ea8dc4b6Seschrock goto unregister; 521fa9e4066Sahrens 522ea8dc4b6Seschrock /* 523ea8dc4b6Seschrock * Invoke our callbacks to restore temporary mount options. 524ea8dc4b6Seschrock */ 525ea8dc4b6Seschrock if (do_readonly) 526ea8dc4b6Seschrock readonly_changed_cb(zfsvfs, readonly); 527ea8dc4b6Seschrock if (do_setuid) 528ea8dc4b6Seschrock setuid_changed_cb(zfsvfs, setuid); 529ea8dc4b6Seschrock if (do_exec) 530ea8dc4b6Seschrock exec_changed_cb(zfsvfs, exec); 531ea8dc4b6Seschrock if (do_devices) 532ea8dc4b6Seschrock devices_changed_cb(zfsvfs, devices); 5337b55fa8eSck if (do_xattr) 5347b55fa8eSck xattr_changed_cb(zfsvfs, xattr); 535b510d378Slling if (do_atime) 536b510d378Slling atime_changed_cb(zfsvfs, atime); 537fa9e4066Sahrens 538da6c28aaSamw nbmand_changed_cb(zfsvfs, nbmand); 539da6c28aaSamw 540ea8dc4b6Seschrock return (0); 541fa9e4066Sahrens 542ea8dc4b6Seschrock unregister: 543fa9e4066Sahrens /* 544ea8dc4b6Seschrock * We may attempt to unregister some callbacks that are not 545ea8dc4b6Seschrock * registered, but this is OK; it will simply return ENOMSG, 546ea8dc4b6Seschrock * which we will ignore. 547fa9e4066Sahrens */ 548ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 5497b55fa8eSck (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 550ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 551ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 552ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs); 553ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 554ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 555ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 556ea8dc4b6Seschrock (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 557ea8dc4b6Seschrock zfsvfs); 558da6c28aaSamw (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); 559ea8dc4b6Seschrock return (error); 560ea8dc4b6Seschrock 561ea8dc4b6Seschrock } 562ea8dc4b6Seschrock 5639966ca11SMatthew Ahrens static int 5640a586ceaSMark Shellenbaum zfs_space_delta_cb(dmu_object_type_t bonustype, void *data, 5659966ca11SMatthew Ahrens uint64_t *userp, uint64_t *groupp) 56614843421SMatthew Ahrens { 5670a586ceaSMark Shellenbaum znode_phys_t *znp = data; 5680a586ceaSMark Shellenbaum int error = 0; 56914843421SMatthew Ahrens 57006e0070dSMark Shellenbaum /* 57106e0070dSMark Shellenbaum * Is it a valid type of object to track? 57206e0070dSMark Shellenbaum */ 5730a586ceaSMark Shellenbaum if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA) 5749966ca11SMatthew Ahrens return (ENOENT); 57514843421SMatthew Ahrens 57606e0070dSMark Shellenbaum /* 57706e0070dSMark Shellenbaum * If we have a NULL data pointer 57806e0070dSMark Shellenbaum * then assume the id's aren't changing and 57906e0070dSMark Shellenbaum * return EEXIST to the dmu to let it know to 58006e0070dSMark Shellenbaum * use the same ids 58106e0070dSMark Shellenbaum */ 58206e0070dSMark Shellenbaum if (data == NULL) 58306e0070dSMark Shellenbaum return (EEXIST); 58406e0070dSMark Shellenbaum 5850a586ceaSMark Shellenbaum if (bonustype == DMU_OT_ZNODE) { 5860a586ceaSMark Shellenbaum *userp = znp->zp_uid; 5870a586ceaSMark Shellenbaum *groupp = znp->zp_gid; 5880a586ceaSMark Shellenbaum } else { 5890a586ceaSMark Shellenbaum int hdrsize; 5900a586ceaSMark Shellenbaum 5910a586ceaSMark Shellenbaum ASSERT(bonustype == DMU_OT_SA); 5920a586ceaSMark Shellenbaum hdrsize = sa_hdrsize(data); 5930a586ceaSMark Shellenbaum 5940a586ceaSMark Shellenbaum if (hdrsize != 0) { 5950a586ceaSMark Shellenbaum *userp = *((uint64_t *)((uintptr_t)data + hdrsize + 5960a586ceaSMark Shellenbaum SA_UID_OFFSET)); 5970a586ceaSMark Shellenbaum *groupp = *((uint64_t *)((uintptr_t)data + hdrsize + 5980a586ceaSMark Shellenbaum SA_GID_OFFSET)); 5990a586ceaSMark Shellenbaum } else { 60006e0070dSMark Shellenbaum /* 60106e0070dSMark Shellenbaum * This should only happen for newly created 60206e0070dSMark Shellenbaum * files that haven't had the znode data filled 60306e0070dSMark Shellenbaum * in yet. 60406e0070dSMark Shellenbaum */ 60506e0070dSMark Shellenbaum *userp = 0; 60606e0070dSMark Shellenbaum *groupp = 0; 6070a586ceaSMark Shellenbaum } 6080a586ceaSMark Shellenbaum } 6090a586ceaSMark Shellenbaum return (error); 61014843421SMatthew Ahrens } 61114843421SMatthew Ahrens 61214843421SMatthew Ahrens static void 61314843421SMatthew Ahrens fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr, 61414843421SMatthew Ahrens char *domainbuf, int buflen, uid_t *ridp) 61514843421SMatthew Ahrens { 61614843421SMatthew Ahrens uint64_t fuid; 61714843421SMatthew Ahrens const char *domain; 61814843421SMatthew Ahrens 61914843421SMatthew Ahrens fuid = strtonum(fuidstr, NULL); 62014843421SMatthew Ahrens 62114843421SMatthew Ahrens domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid)); 62214843421SMatthew Ahrens if (domain) 62314843421SMatthew Ahrens (void) strlcpy(domainbuf, domain, buflen); 62414843421SMatthew Ahrens else 62514843421SMatthew Ahrens domainbuf[0] = '\0'; 62614843421SMatthew Ahrens *ridp = FUID_RID(fuid); 62714843421SMatthew Ahrens } 62814843421SMatthew Ahrens 62914843421SMatthew Ahrens static uint64_t 63014843421SMatthew Ahrens zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type) 63114843421SMatthew Ahrens { 63214843421SMatthew Ahrens switch (type) { 63314843421SMatthew Ahrens case ZFS_PROP_USERUSED: 63414843421SMatthew Ahrens return (DMU_USERUSED_OBJECT); 63514843421SMatthew Ahrens case ZFS_PROP_GROUPUSED: 63614843421SMatthew Ahrens return (DMU_GROUPUSED_OBJECT); 63714843421SMatthew Ahrens case ZFS_PROP_USERQUOTA: 63814843421SMatthew Ahrens return (zfsvfs->z_userquota_obj); 63914843421SMatthew Ahrens case ZFS_PROP_GROUPQUOTA: 64014843421SMatthew Ahrens return (zfsvfs->z_groupquota_obj); 64114843421SMatthew Ahrens } 64214843421SMatthew Ahrens return (0); 64314843421SMatthew Ahrens } 64414843421SMatthew Ahrens 64514843421SMatthew Ahrens int 64614843421SMatthew Ahrens zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 64714843421SMatthew Ahrens uint64_t *cookiep, void *vbuf, uint64_t *bufsizep) 64814843421SMatthew Ahrens { 64914843421SMatthew Ahrens int error; 65014843421SMatthew Ahrens zap_cursor_t zc; 65114843421SMatthew Ahrens zap_attribute_t za; 65214843421SMatthew Ahrens zfs_useracct_t *buf = vbuf; 65314843421SMatthew Ahrens uint64_t obj; 65414843421SMatthew Ahrens 65514843421SMatthew Ahrens if (!dmu_objset_userspace_present(zfsvfs->z_os)) 65614843421SMatthew Ahrens return (ENOTSUP); 65714843421SMatthew Ahrens 65814843421SMatthew Ahrens obj = zfs_userquota_prop_to_obj(zfsvfs, type); 65914843421SMatthew Ahrens if (obj == 0) { 66014843421SMatthew Ahrens *bufsizep = 0; 66114843421SMatthew Ahrens return (0); 66214843421SMatthew Ahrens } 66314843421SMatthew Ahrens 66414843421SMatthew Ahrens for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep); 66514843421SMatthew Ahrens (error = zap_cursor_retrieve(&zc, &za)) == 0; 66614843421SMatthew Ahrens zap_cursor_advance(&zc)) { 66714843421SMatthew Ahrens if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) > 66814843421SMatthew Ahrens *bufsizep) 66914843421SMatthew Ahrens break; 67014843421SMatthew Ahrens 67114843421SMatthew Ahrens fuidstr_to_sid(zfsvfs, za.za_name, 67214843421SMatthew Ahrens buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid); 67314843421SMatthew Ahrens 67414843421SMatthew Ahrens buf->zu_space = za.za_first_integer; 67514843421SMatthew Ahrens buf++; 67614843421SMatthew Ahrens } 67714843421SMatthew Ahrens if (error == ENOENT) 67814843421SMatthew Ahrens error = 0; 67914843421SMatthew Ahrens 68014843421SMatthew Ahrens ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep); 68114843421SMatthew Ahrens *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf; 68214843421SMatthew Ahrens *cookiep = zap_cursor_serialize(&zc); 68314843421SMatthew Ahrens zap_cursor_fini(&zc); 68414843421SMatthew Ahrens return (error); 68514843421SMatthew Ahrens } 68614843421SMatthew Ahrens 68714843421SMatthew Ahrens /* 68814843421SMatthew Ahrens * buf must be big enough (eg, 32 bytes) 68914843421SMatthew Ahrens */ 69014843421SMatthew Ahrens static int 69114843421SMatthew Ahrens id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid, 69214843421SMatthew Ahrens char *buf, boolean_t addok) 69314843421SMatthew Ahrens { 69414843421SMatthew Ahrens uint64_t fuid; 69514843421SMatthew Ahrens int domainid = 0; 69614843421SMatthew Ahrens 69714843421SMatthew Ahrens if (domain && domain[0]) { 69814843421SMatthew Ahrens domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok); 69914843421SMatthew Ahrens if (domainid == -1) 70014843421SMatthew Ahrens return (ENOENT); 70114843421SMatthew Ahrens } 70214843421SMatthew Ahrens fuid = FUID_ENCODE(domainid, rid); 70314843421SMatthew Ahrens (void) sprintf(buf, "%llx", (longlong_t)fuid); 70414843421SMatthew Ahrens return (0); 70514843421SMatthew Ahrens } 70614843421SMatthew Ahrens 70714843421SMatthew Ahrens int 70814843421SMatthew Ahrens zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 70914843421SMatthew Ahrens const char *domain, uint64_t rid, uint64_t *valp) 71014843421SMatthew Ahrens { 71114843421SMatthew Ahrens char buf[32]; 71214843421SMatthew Ahrens int err; 71314843421SMatthew Ahrens uint64_t obj; 71414843421SMatthew Ahrens 71514843421SMatthew Ahrens *valp = 0; 71614843421SMatthew Ahrens 71714843421SMatthew Ahrens if (!dmu_objset_userspace_present(zfsvfs->z_os)) 71814843421SMatthew Ahrens return (ENOTSUP); 71914843421SMatthew Ahrens 72014843421SMatthew Ahrens obj = zfs_userquota_prop_to_obj(zfsvfs, type); 72114843421SMatthew Ahrens if (obj == 0) 72214843421SMatthew Ahrens return (0); 72314843421SMatthew Ahrens 72414843421SMatthew Ahrens err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE); 72514843421SMatthew Ahrens if (err) 72614843421SMatthew Ahrens return (err); 72714843421SMatthew Ahrens 72814843421SMatthew Ahrens err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp); 72914843421SMatthew Ahrens if (err == ENOENT) 73014843421SMatthew Ahrens err = 0; 73114843421SMatthew Ahrens return (err); 73214843421SMatthew Ahrens } 73314843421SMatthew Ahrens 73414843421SMatthew Ahrens int 73514843421SMatthew Ahrens zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 73614843421SMatthew Ahrens const char *domain, uint64_t rid, uint64_t quota) 73714843421SMatthew Ahrens { 73814843421SMatthew Ahrens char buf[32]; 73914843421SMatthew Ahrens int err; 74014843421SMatthew Ahrens dmu_tx_t *tx; 74114843421SMatthew Ahrens uint64_t *objp; 74214843421SMatthew Ahrens boolean_t fuid_dirtied; 74314843421SMatthew Ahrens 74414843421SMatthew Ahrens if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA) 74514843421SMatthew Ahrens return (EINVAL); 74614843421SMatthew Ahrens 74714843421SMatthew Ahrens if (zfsvfs->z_version < ZPL_VERSION_USERSPACE) 74814843421SMatthew Ahrens return (ENOTSUP); 74914843421SMatthew Ahrens 75014843421SMatthew Ahrens objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj : 75114843421SMatthew Ahrens &zfsvfs->z_groupquota_obj; 75214843421SMatthew Ahrens 75314843421SMatthew Ahrens err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE); 75414843421SMatthew Ahrens if (err) 75514843421SMatthew Ahrens return (err); 75614843421SMatthew Ahrens fuid_dirtied = zfsvfs->z_fuid_dirty; 75714843421SMatthew Ahrens 75814843421SMatthew Ahrens tx = dmu_tx_create(zfsvfs->z_os); 75914843421SMatthew Ahrens dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL); 76014843421SMatthew Ahrens if (*objp == 0) { 76114843421SMatthew Ahrens dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 76214843421SMatthew Ahrens zfs_userquota_prop_prefixes[type]); 76314843421SMatthew Ahrens } 76414843421SMatthew Ahrens if (fuid_dirtied) 76514843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 76614843421SMatthew Ahrens err = dmu_tx_assign(tx, TXG_WAIT); 76714843421SMatthew Ahrens if (err) { 76814843421SMatthew Ahrens dmu_tx_abort(tx); 76914843421SMatthew Ahrens return (err); 77014843421SMatthew Ahrens } 77114843421SMatthew Ahrens 77214843421SMatthew Ahrens mutex_enter(&zfsvfs->z_lock); 77314843421SMatthew Ahrens if (*objp == 0) { 77414843421SMatthew Ahrens *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA, 77514843421SMatthew Ahrens DMU_OT_NONE, 0, tx); 77614843421SMatthew Ahrens VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, 77714843421SMatthew Ahrens zfs_userquota_prop_prefixes[type], 8, 1, objp, tx)); 77814843421SMatthew Ahrens } 77914843421SMatthew Ahrens mutex_exit(&zfsvfs->z_lock); 78014843421SMatthew Ahrens 78114843421SMatthew Ahrens if (quota == 0) { 78214843421SMatthew Ahrens err = zap_remove(zfsvfs->z_os, *objp, buf, tx); 78314843421SMatthew Ahrens if (err == ENOENT) 78414843421SMatthew Ahrens err = 0; 78514843421SMatthew Ahrens } else { 78614843421SMatthew Ahrens err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx); 78714843421SMatthew Ahrens } 78814843421SMatthew Ahrens ASSERT(err == 0); 78914843421SMatthew Ahrens if (fuid_dirtied) 79014843421SMatthew Ahrens zfs_fuid_sync(zfsvfs, tx); 79114843421SMatthew Ahrens dmu_tx_commit(tx); 79214843421SMatthew Ahrens return (err); 79314843421SMatthew Ahrens } 79414843421SMatthew Ahrens 79514843421SMatthew Ahrens boolean_t 7960a586ceaSMark Shellenbaum zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid) 79714843421SMatthew Ahrens { 79814843421SMatthew Ahrens char buf[32]; 79914843421SMatthew Ahrens uint64_t used, quota, usedobj, quotaobj; 80014843421SMatthew Ahrens int err; 80114843421SMatthew Ahrens 80214843421SMatthew Ahrens usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 80314843421SMatthew Ahrens quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 80414843421SMatthew Ahrens 80514843421SMatthew Ahrens if (quotaobj == 0 || zfsvfs->z_replay) 80614843421SMatthew Ahrens return (B_FALSE); 80714843421SMatthew Ahrens 80814843421SMatthew Ahrens (void) sprintf(buf, "%llx", (longlong_t)fuid); 80914843421SMatthew Ahrens err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a); 81014843421SMatthew Ahrens if (err != 0) 81114843421SMatthew Ahrens return (B_FALSE); 81214843421SMatthew Ahrens 81314843421SMatthew Ahrens err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used); 81414843421SMatthew Ahrens if (err != 0) 81514843421SMatthew Ahrens return (B_FALSE); 81614843421SMatthew Ahrens return (used >= quota); 81714843421SMatthew Ahrens } 81814843421SMatthew Ahrens 8190a586ceaSMark Shellenbaum boolean_t 8200a586ceaSMark Shellenbaum zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup) 8210a586ceaSMark Shellenbaum { 8220a586ceaSMark Shellenbaum uint64_t fuid; 8230a586ceaSMark Shellenbaum uint64_t quotaobj; 8240a586ceaSMark Shellenbaum uid_t id; 8250a586ceaSMark Shellenbaum 8260a586ceaSMark Shellenbaum quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 8270a586ceaSMark Shellenbaum 8280a586ceaSMark Shellenbaum id = isgroup ? zp->z_gid : zp->z_uid; 8290a586ceaSMark Shellenbaum 8300a586ceaSMark Shellenbaum if (quotaobj == 0 || zfsvfs->z_replay) 8310a586ceaSMark Shellenbaum return (B_FALSE); 8320a586ceaSMark Shellenbaum 8330a586ceaSMark Shellenbaum if (IS_EPHEMERAL(id)) { 8340a586ceaSMark Shellenbaum VERIFY(0 == sa_lookup(zp->z_sa_hdl, 8350a586ceaSMark Shellenbaum isgroup ? SA_ZPL_GID(zfsvfs) : SA_ZPL_UID(zfsvfs), 8360a586ceaSMark Shellenbaum &fuid, sizeof (fuid))); 8370a586ceaSMark Shellenbaum } else { 8380a586ceaSMark Shellenbaum fuid = (uint64_t)id; 8390a586ceaSMark Shellenbaum } 8400a586ceaSMark Shellenbaum 8410a586ceaSMark Shellenbaum return (zfs_fuid_overquota(zfsvfs, isgroup, fuid)); 8420a586ceaSMark Shellenbaum } 8430a586ceaSMark Shellenbaum 84414843421SMatthew Ahrens int 845af4c679fSSean McEnroe zfsvfs_create(const char *osname, zfsvfs_t **zfvp) 84614843421SMatthew Ahrens { 84714843421SMatthew Ahrens objset_t *os; 84814843421SMatthew Ahrens zfsvfs_t *zfsvfs; 84914843421SMatthew Ahrens uint64_t zval; 85014843421SMatthew Ahrens int i, error; 8510a586ceaSMark Shellenbaum uint64_t sa_obj; 85214843421SMatthew Ahrens 853503ad85cSMatthew Ahrens zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 85414843421SMatthew Ahrens 855503ad85cSMatthew Ahrens /* 856503ad85cSMatthew Ahrens * We claim to always be readonly so we can open snapshots; 857503ad85cSMatthew Ahrens * other ZPL code will prevent us from writing to snapshots. 858503ad85cSMatthew Ahrens */ 859503ad85cSMatthew Ahrens error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os); 860503ad85cSMatthew Ahrens if (error) { 861503ad85cSMatthew Ahrens kmem_free(zfsvfs, sizeof (zfsvfs_t)); 86214843421SMatthew Ahrens return (error); 863503ad85cSMatthew Ahrens } 86414843421SMatthew Ahrens 86514843421SMatthew Ahrens /* 86614843421SMatthew Ahrens * Initialize the zfs-specific filesystem structure. 86714843421SMatthew Ahrens * Should probably make this a kmem cache, shuffle fields, 86814843421SMatthew Ahrens * and just bzero up to z_hold_mtx[]. 86914843421SMatthew Ahrens */ 87014843421SMatthew Ahrens zfsvfs->z_vfs = NULL; 87114843421SMatthew Ahrens zfsvfs->z_parent = zfsvfs; 87214843421SMatthew Ahrens zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 87314843421SMatthew Ahrens zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 87414843421SMatthew Ahrens zfsvfs->z_os = os; 87514843421SMatthew Ahrens 87614843421SMatthew Ahrens error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 87714843421SMatthew Ahrens if (error) { 87814843421SMatthew Ahrens goto out; 879dc7cd546SMark Shellenbaum } else if (zfsvfs->z_version > 880dc7cd546SMark Shellenbaum zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { 881dc7cd546SMark Shellenbaum (void) printf("Can't mount a version %lld file system " 882dc7cd546SMark Shellenbaum "on a version %lld pool\n. Pool must be upgraded to mount " 883dc7cd546SMark Shellenbaum "this file system.", (u_longlong_t)zfsvfs->z_version, 884dc7cd546SMark Shellenbaum (u_longlong_t)spa_version(dmu_objset_spa(os))); 88514843421SMatthew Ahrens error = ENOTSUP; 88614843421SMatthew Ahrens goto out; 88714843421SMatthew Ahrens } 88814843421SMatthew Ahrens if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0) 88914843421SMatthew Ahrens goto out; 89014843421SMatthew Ahrens zfsvfs->z_norm = (int)zval; 89114843421SMatthew Ahrens 89214843421SMatthew Ahrens if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0) 89314843421SMatthew Ahrens goto out; 89414843421SMatthew Ahrens zfsvfs->z_utf8 = (zval != 0); 89514843421SMatthew Ahrens 89614843421SMatthew Ahrens if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0) 89714843421SMatthew Ahrens goto out; 89814843421SMatthew Ahrens zfsvfs->z_case = (uint_t)zval; 89914843421SMatthew Ahrens 90014843421SMatthew Ahrens /* 90114843421SMatthew Ahrens * Fold case on file systems that are always or sometimes case 90214843421SMatthew Ahrens * insensitive. 90314843421SMatthew Ahrens */ 90414843421SMatthew Ahrens if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 90514843421SMatthew Ahrens zfsvfs->z_case == ZFS_CASE_MIXED) 90614843421SMatthew Ahrens zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 90714843421SMatthew Ahrens 90814843421SMatthew Ahrens zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 9090a586ceaSMark Shellenbaum zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 9100a586ceaSMark Shellenbaum 9110a586ceaSMark Shellenbaum if (zfsvfs->z_use_sa) { 9120a586ceaSMark Shellenbaum /* should either have both of these objects or none */ 9130a586ceaSMark Shellenbaum error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, 9140a586ceaSMark Shellenbaum &sa_obj); 9150a586ceaSMark Shellenbaum if (error) 9160a586ceaSMark Shellenbaum return (error); 9170a586ceaSMark Shellenbaum } else { 9180a586ceaSMark Shellenbaum /* 9190a586ceaSMark Shellenbaum * Pre SA versions file systems should never touch 9200a586ceaSMark Shellenbaum * either the attribute registration or layout objects. 9210a586ceaSMark Shellenbaum */ 9220a586ceaSMark Shellenbaum sa_obj = 0; 9230a586ceaSMark Shellenbaum } 9240a586ceaSMark Shellenbaum 9251d8ccc7bSMark Shellenbaum error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 9261d8ccc7bSMark Shellenbaum &zfsvfs->z_attr_table); 9271d8ccc7bSMark Shellenbaum if (error) 9281d8ccc7bSMark Shellenbaum goto out; 9290a586ceaSMark Shellenbaum 9300a586ceaSMark Shellenbaum if (zfsvfs->z_version >= ZPL_VERSION_SA) 9310a586ceaSMark Shellenbaum sa_register_update_callback(os, zfs_sa_upgrade); 93214843421SMatthew Ahrens 93314843421SMatthew Ahrens error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 93414843421SMatthew Ahrens &zfsvfs->z_root); 93514843421SMatthew Ahrens if (error) 93614843421SMatthew Ahrens goto out; 93714843421SMatthew Ahrens ASSERT(zfsvfs->z_root != 0); 93814843421SMatthew Ahrens 93914843421SMatthew Ahrens error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 94014843421SMatthew Ahrens &zfsvfs->z_unlinkedobj); 94114843421SMatthew Ahrens if (error) 94214843421SMatthew Ahrens goto out; 94314843421SMatthew Ahrens 94414843421SMatthew Ahrens error = zap_lookup(os, MASTER_NODE_OBJ, 94514843421SMatthew Ahrens zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 94614843421SMatthew Ahrens 8, 1, &zfsvfs->z_userquota_obj); 94714843421SMatthew Ahrens if (error && error != ENOENT) 94814843421SMatthew Ahrens goto out; 94914843421SMatthew Ahrens 95014843421SMatthew Ahrens error = zap_lookup(os, MASTER_NODE_OBJ, 95114843421SMatthew Ahrens zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 95214843421SMatthew Ahrens 8, 1, &zfsvfs->z_groupquota_obj); 95314843421SMatthew Ahrens if (error && error != ENOENT) 95414843421SMatthew Ahrens goto out; 95514843421SMatthew Ahrens 95614843421SMatthew Ahrens error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 95714843421SMatthew Ahrens &zfsvfs->z_fuid_obj); 95814843421SMatthew Ahrens if (error && error != ENOENT) 95914843421SMatthew Ahrens goto out; 96014843421SMatthew Ahrens 96114843421SMatthew Ahrens error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 96214843421SMatthew Ahrens &zfsvfs->z_shares_dir); 96314843421SMatthew Ahrens if (error && error != ENOENT) 96414843421SMatthew Ahrens goto out; 96514843421SMatthew Ahrens 96614843421SMatthew Ahrens mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 96714843421SMatthew Ahrens mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 96814843421SMatthew Ahrens list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 96914843421SMatthew Ahrens offsetof(znode_t, z_link_node)); 97014843421SMatthew Ahrens rrw_init(&zfsvfs->z_teardown_lock); 97114843421SMatthew Ahrens rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 97214843421SMatthew Ahrens rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 97314843421SMatthew Ahrens for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 97414843421SMatthew Ahrens mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 97514843421SMatthew Ahrens 976af4c679fSSean McEnroe *zfvp = zfsvfs; 97714843421SMatthew Ahrens return (0); 97814843421SMatthew Ahrens 97914843421SMatthew Ahrens out: 980503ad85cSMatthew Ahrens dmu_objset_disown(os, zfsvfs); 981af4c679fSSean McEnroe *zfvp = NULL; 98214843421SMatthew Ahrens kmem_free(zfsvfs, sizeof (zfsvfs_t)); 98314843421SMatthew Ahrens return (error); 98414843421SMatthew Ahrens } 98514843421SMatthew Ahrens 986f18faf3fSek static int 987f18faf3fSek zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 988f18faf3fSek { 989f18faf3fSek int error; 990f18faf3fSek 991f18faf3fSek error = zfs_register_callbacks(zfsvfs->z_vfs); 992f18faf3fSek if (error) 993f18faf3fSek return (error); 994f18faf3fSek 995f18faf3fSek /* 996f18faf3fSek * Set the objset user_ptr to track its zfsvfs. 997f18faf3fSek */ 998503ad85cSMatthew Ahrens mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 999f18faf3fSek dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1000503ad85cSMatthew Ahrens mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1001f18faf3fSek 1002377c02aaSNeil Perrin zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 1003377c02aaSNeil Perrin 1004f18faf3fSek /* 1005f18faf3fSek * If we are not mounting (ie: online recv), then we don't 1006f18faf3fSek * have to worry about replaying the log as we blocked all 1007f18faf3fSek * operations out since we closed the ZIL. 1008f18faf3fSek */ 1009f18faf3fSek if (mounting) { 1010a6e57bd4SNeil Perrin boolean_t readonly; 1011a6e57bd4SNeil Perrin 1012f18faf3fSek /* 1013f18faf3fSek * During replay we remove the read only flag to 1014f18faf3fSek * allow replays to succeed. 1015f18faf3fSek */ 1016f18faf3fSek readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 10171209a471SNeil Perrin if (readonly != 0) 10181209a471SNeil Perrin zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 10191209a471SNeil Perrin else 10201209a471SNeil Perrin zfs_unlinked_drain(zfsvfs); 1021f18faf3fSek 102255da60b9SMark J Musante /* 102355da60b9SMark J Musante * Parse and replay the intent log. 102455da60b9SMark J Musante * 102555da60b9SMark J Musante * Because of ziltest, this must be done after 102655da60b9SMark J Musante * zfs_unlinked_drain(). (Further note: ziltest 102755da60b9SMark J Musante * doesn't use readonly mounts, where 102855da60b9SMark J Musante * zfs_unlinked_drain() isn't called.) This is because 102955da60b9SMark J Musante * ziltest causes spa_sync() to think it's committed, 103055da60b9SMark J Musante * but actually it is not, so the intent log contains 103155da60b9SMark J Musante * many txg's worth of changes. 103255da60b9SMark J Musante * 103355da60b9SMark J Musante * In particular, if object N is in the unlinked set in 103455da60b9SMark J Musante * the last txg to actually sync, then it could be 103555da60b9SMark J Musante * actually freed in a later txg and then reallocated 103655da60b9SMark J Musante * in a yet later txg. This would write a "create 103755da60b9SMark J Musante * object N" record to the intent log. Normally, this 103855da60b9SMark J Musante * would be fine because the spa_sync() would have 103955da60b9SMark J Musante * written out the fact that object N is free, before 104055da60b9SMark J Musante * we could write the "create object N" intent log 104155da60b9SMark J Musante * record. 104255da60b9SMark J Musante * 104355da60b9SMark J Musante * But when we are in ziltest mode, we advance the "open 104455da60b9SMark J Musante * txg" without actually spa_sync()-ing the changes to 104555da60b9SMark J Musante * disk. So we would see that object N is still 104655da60b9SMark J Musante * allocated and in the unlinked set, and there is an 104755da60b9SMark J Musante * intent log record saying to allocate it. 104855da60b9SMark J Musante */ 104955da60b9SMark J Musante if (zil_replay_disable) { 105055da60b9SMark J Musante zil_destroy(zfsvfs->z_log, B_FALSE); 105155da60b9SMark J Musante } else { 10521209a471SNeil Perrin zfsvfs->z_replay = B_TRUE; 10531209a471SNeil Perrin zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector); 10541209a471SNeil Perrin zfsvfs->z_replay = B_FALSE; 10551209a471SNeil Perrin } 1056f18faf3fSek zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 1057f18faf3fSek } 1058f18faf3fSek 1059f18faf3fSek return (0); 1060f18faf3fSek } 1061f18faf3fSek 106214843421SMatthew Ahrens void 106314843421SMatthew Ahrens zfsvfs_free(zfsvfs_t *zfsvfs) 106447f263f4Sek { 106514843421SMatthew Ahrens int i; 10664e9583b2STom Erickson extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */ 10674e9583b2STom Erickson 10684e9583b2STom Erickson /* 10694e9583b2STom Erickson * This is a barrier to prevent the filesystem from going away in 10704e9583b2STom Erickson * zfs_znode_move() until we can safely ensure that the filesystem is 10714e9583b2STom Erickson * not unmounted. We consider the filesystem valid before the barrier 10724e9583b2STom Erickson * and invalid after the barrier. 10734e9583b2STom Erickson */ 10744e9583b2STom Erickson rw_enter(&zfsvfs_lock, RW_READER); 10754e9583b2STom Erickson rw_exit(&zfsvfs_lock); 107614843421SMatthew Ahrens 107714843421SMatthew Ahrens zfs_fuid_destroy(zfsvfs); 107814843421SMatthew Ahrens 107947f263f4Sek mutex_destroy(&zfsvfs->z_znodes_lock); 10809e1320c0SMark Shellenbaum mutex_destroy(&zfsvfs->z_lock); 108147f263f4Sek list_destroy(&zfsvfs->z_all_znodes); 108247f263f4Sek rrw_destroy(&zfsvfs->z_teardown_lock); 108347f263f4Sek rw_destroy(&zfsvfs->z_teardown_inactive_lock); 108447f263f4Sek rw_destroy(&zfsvfs->z_fuid_lock); 108514843421SMatthew Ahrens for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 108614843421SMatthew Ahrens mutex_destroy(&zfsvfs->z_hold_mtx[i]); 108747f263f4Sek kmem_free(zfsvfs, sizeof (zfsvfs_t)); 108847f263f4Sek } 108947f263f4Sek 109014843421SMatthew Ahrens static void 109114843421SMatthew Ahrens zfs_set_fuid_feature(zfsvfs_t *zfsvfs) 109214843421SMatthew Ahrens { 109314843421SMatthew Ahrens zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 109414843421SMatthew Ahrens if (zfsvfs->z_use_fuids && zfsvfs->z_vfs) { 109514843421SMatthew Ahrens vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR); 109614843421SMatthew Ahrens vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); 109714843421SMatthew Ahrens vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); 109814843421SMatthew Ahrens vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); 1099e802abbdSTim Haley vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); 11007a286c47SDai Ngo vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE); 110114843421SMatthew Ahrens } 11020a586ceaSMark Shellenbaum zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 110314843421SMatthew Ahrens } 110414843421SMatthew Ahrens 1105ea8dc4b6Seschrock static int 1106088f3894Sahrens zfs_domount(vfs_t *vfsp, char *osname) 1107ea8dc4b6Seschrock { 1108ea8dc4b6Seschrock dev_t mount_dev; 110914843421SMatthew Ahrens uint64_t recordsize, fsid_guid; 1110ea8dc4b6Seschrock int error = 0; 1111ea8dc4b6Seschrock zfsvfs_t *zfsvfs; 1112ea8dc4b6Seschrock 1113ea8dc4b6Seschrock ASSERT(vfsp); 1114ea8dc4b6Seschrock ASSERT(osname); 1115fa9e4066Sahrens 1116503ad85cSMatthew Ahrens error = zfsvfs_create(osname, &zfsvfs); 111714843421SMatthew Ahrens if (error) 111814843421SMatthew Ahrens return (error); 1119fa9e4066Sahrens zfsvfs->z_vfs = vfsp; 1120fa9e4066Sahrens 1121ea8dc4b6Seschrock /* Initialize the generic filesystem structure. */ 1122fa9e4066Sahrens vfsp->vfs_bcount = 0; 1123fa9e4066Sahrens vfsp->vfs_data = NULL; 1124fa9e4066Sahrens 1125ea8dc4b6Seschrock if (zfs_create_unique_device(&mount_dev) == -1) { 1126ea8dc4b6Seschrock error = ENODEV; 1127ea8dc4b6Seschrock goto out; 1128ea8dc4b6Seschrock } 1129fa9e4066Sahrens ASSERT(vfs_devismounted(mount_dev) == 0); 1130fa9e4066Sahrens 1131ea8dc4b6Seschrock if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 1132ea8dc4b6Seschrock NULL)) 1133ea8dc4b6Seschrock goto out; 1134fa9e4066Sahrens 1135fa9e4066Sahrens vfsp->vfs_dev = mount_dev; 1136fa9e4066Sahrens vfsp->vfs_fstype = zfsfstype; 1137fa9e4066Sahrens vfsp->vfs_bsize = recordsize; 1138fa9e4066Sahrens vfsp->vfs_flag |= VFS_NOTRUNC; 1139fa9e4066Sahrens vfsp->vfs_data = zfsvfs; 1140fa9e4066Sahrens 114114843421SMatthew Ahrens /* 114214843421SMatthew Ahrens * The fsid is 64 bits, composed of an 8-bit fs type, which 114314843421SMatthew Ahrens * separates our fsid from any other filesystem types, and a 114414843421SMatthew Ahrens * 56-bit objset unique ID. The objset unique ID is unique to 114514843421SMatthew Ahrens * all objsets open on this system, provided by unique_create(). 114614843421SMatthew Ahrens * The 8-bit fs type must be put in the low bits of fsid[1] 114714843421SMatthew Ahrens * because that's where other Solaris filesystems put it. 114814843421SMatthew Ahrens */ 114914843421SMatthew Ahrens fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 115014843421SMatthew Ahrens ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0); 115114843421SMatthew Ahrens vfsp->vfs_fsid.val[0] = fsid_guid; 115214843421SMatthew Ahrens vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) | 115314843421SMatthew Ahrens zfsfstype & 0xFF; 1154ea8dc4b6Seschrock 1155da6c28aaSamw /* 1156da6c28aaSamw * Set features for file system. 1157da6c28aaSamw */ 115814843421SMatthew Ahrens zfs_set_fuid_feature(zfsvfs); 1159de8267e0Stimh if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 1160de8267e0Stimh vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1161de8267e0Stimh vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1162de8267e0Stimh vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 1163de8267e0Stimh } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 1164de8267e0Stimh vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1165de8267e0Stimh vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1166de8267e0Stimh } 1167c242f9a0Schunli zhang - Sun Microsystems - Irvine United States vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED); 1168da6c28aaSamw 1169ea8dc4b6Seschrock if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1170da6c28aaSamw uint64_t pval; 11717b55fa8eSck 1172fa9e4066Sahrens atime_changed_cb(zfsvfs, B_FALSE); 1173fa9e4066Sahrens readonly_changed_cb(zfsvfs, B_TRUE); 1174da6c28aaSamw if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) 11757b55fa8eSck goto out; 1176da6c28aaSamw xattr_changed_cb(zfsvfs, pval); 1177fa9e4066Sahrens zfsvfs->z_issnap = B_TRUE; 1178b9deb9cbSMark J Musante zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED; 1179777badbaSMatthew Ahrens 1180503ad85cSMatthew Ahrens mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1181777badbaSMatthew Ahrens dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1182503ad85cSMatthew Ahrens mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1183fa9e4066Sahrens } else { 1184f18faf3fSek error = zfsvfs_setup(zfsvfs, B_TRUE); 1185ea8dc4b6Seschrock } 1186fa9e4066Sahrens 1187ea8dc4b6Seschrock if (!zfsvfs->z_issnap) 1188ea8dc4b6Seschrock zfsctl_create(zfsvfs); 1189ea8dc4b6Seschrock out: 1190ea8dc4b6Seschrock if (error) { 1191503ad85cSMatthew Ahrens dmu_objset_disown(zfsvfs->z_os, zfsvfs); 119214843421SMatthew Ahrens zfsvfs_free(zfsvfs); 1193ea8dc4b6Seschrock } else { 1194ea8dc4b6Seschrock atomic_add_32(&zfs_active_fs_count, 1); 1195ea8dc4b6Seschrock } 1196fa9e4066Sahrens 1197ea8dc4b6Seschrock return (error); 1198ea8dc4b6Seschrock } 1199ea8dc4b6Seschrock 1200ea8dc4b6Seschrock void 1201ea8dc4b6Seschrock zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1202ea8dc4b6Seschrock { 1203ea8dc4b6Seschrock objset_t *os = zfsvfs->z_os; 1204ea8dc4b6Seschrock struct dsl_dataset *ds; 1205ea8dc4b6Seschrock 1206ea8dc4b6Seschrock /* 1207ea8dc4b6Seschrock * Unregister properties. 1208ea8dc4b6Seschrock */ 1209ea8dc4b6Seschrock if (!dmu_objset_is_snapshot(os)) { 1210fa9e4066Sahrens ds = dmu_objset_ds(os); 1211ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 1212fa9e4066Sahrens zfsvfs) == 0); 1213fa9e4066Sahrens 12147b55fa8eSck VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 12157b55fa8eSck zfsvfs) == 0); 12167b55fa8eSck 1217ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 1218fa9e4066Sahrens zfsvfs) == 0); 1219fa9e4066Sahrens 1220ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 1221fa9e4066Sahrens zfsvfs) == 0); 1222fa9e4066Sahrens 1223ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, 1224fa9e4066Sahrens zfsvfs) == 0); 1225fa9e4066Sahrens 1226ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 1227fa9e4066Sahrens zfsvfs) == 0); 1228fa9e4066Sahrens 1229ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 1230fa9e4066Sahrens zfsvfs) == 0); 1231fa9e4066Sahrens 1232ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 1233fa9e4066Sahrens zfsvfs) == 0); 1234fa9e4066Sahrens 1235ea8dc4b6Seschrock VERIFY(dsl_prop_unregister(ds, "aclinherit", 1236fa9e4066Sahrens acl_inherit_changed_cb, zfsvfs) == 0); 1237da6c28aaSamw 1238da6c28aaSamw VERIFY(dsl_prop_unregister(ds, "vscan", 1239da6c28aaSamw vscan_changed_cb, zfsvfs) == 0); 1240ea8dc4b6Seschrock } 1241ea8dc4b6Seschrock } 1242fa9e4066Sahrens 1243b1b8ab34Slling /* 1244b1b8ab34Slling * Convert a decimal digit string to a uint64_t integer. 1245b1b8ab34Slling */ 1246b1b8ab34Slling static int 1247b1b8ab34Slling str_to_uint64(char *str, uint64_t *objnum) 1248b1b8ab34Slling { 1249b1b8ab34Slling uint64_t num = 0; 1250b1b8ab34Slling 1251b1b8ab34Slling while (*str) { 1252b1b8ab34Slling if (*str < '0' || *str > '9') 1253b1b8ab34Slling return (EINVAL); 1254b1b8ab34Slling 1255b1b8ab34Slling num = num*10 + *str++ - '0'; 1256b1b8ab34Slling } 1257b1b8ab34Slling 1258b1b8ab34Slling *objnum = num; 1259b1b8ab34Slling return (0); 1260b1b8ab34Slling } 1261b1b8ab34Slling 1262b1b8ab34Slling /* 1263b1b8ab34Slling * The boot path passed from the boot loader is in the form of 1264b1b8ab34Slling * "rootpool-name/root-filesystem-object-number'. Convert this 1265b1b8ab34Slling * string to a dataset name: "rootpool-name/root-filesystem-name". 1266b1b8ab34Slling */ 1267b1b8ab34Slling static int 1268e7cbe64fSgw zfs_parse_bootfs(char *bpath, char *outpath) 1269b1b8ab34Slling { 1270b1b8ab34Slling char *slashp; 1271b1b8ab34Slling uint64_t objnum; 1272b1b8ab34Slling int error; 1273b1b8ab34Slling 1274b1b8ab34Slling if (*bpath == 0 || *bpath == '/') 1275b1b8ab34Slling return (EINVAL); 1276b1b8ab34Slling 127719397407SSherry Moore (void) strcpy(outpath, bpath); 127819397407SSherry Moore 1279b1b8ab34Slling slashp = strchr(bpath, '/'); 1280b1b8ab34Slling 1281b1b8ab34Slling /* if no '/', just return the pool name */ 1282b1b8ab34Slling if (slashp == NULL) { 1283b1b8ab34Slling return (0); 1284b1b8ab34Slling } 1285b1b8ab34Slling 128619397407SSherry Moore /* if not a number, just return the root dataset name */ 128719397407SSherry Moore if (str_to_uint64(slashp+1, &objnum)) { 128819397407SSherry Moore return (0); 128919397407SSherry Moore } 1290b1b8ab34Slling 1291b1b8ab34Slling *slashp = '\0'; 1292b1b8ab34Slling error = dsl_dsobj_to_dsname(bpath, objnum, outpath); 1293b1b8ab34Slling *slashp = '/'; 1294b1b8ab34Slling 1295b1b8ab34Slling return (error); 1296b1b8ab34Slling } 1297b1b8ab34Slling 12984201a95eSRic Aleshire /* 12994201a95eSRic Aleshire * zfs_check_global_label: 13004201a95eSRic Aleshire * Check that the hex label string is appropriate for the dataset 13014201a95eSRic Aleshire * being mounted into the global_zone proper. 13024201a95eSRic Aleshire * 13034201a95eSRic Aleshire * Return an error if the hex label string is not default or 13044201a95eSRic Aleshire * admin_low/admin_high. For admin_low labels, the corresponding 13054201a95eSRic Aleshire * dataset must be readonly. 13064201a95eSRic Aleshire */ 13074201a95eSRic Aleshire int 13084201a95eSRic Aleshire zfs_check_global_label(const char *dsname, const char *hexsl) 13094201a95eSRic Aleshire { 13104201a95eSRic Aleshire if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0) 13114201a95eSRic Aleshire return (0); 13124201a95eSRic Aleshire if (strcasecmp(hexsl, ADMIN_HIGH) == 0) 13134201a95eSRic Aleshire return (0); 13144201a95eSRic Aleshire if (strcasecmp(hexsl, ADMIN_LOW) == 0) { 13154201a95eSRic Aleshire /* must be readonly */ 13164201a95eSRic Aleshire uint64_t rdonly; 13174201a95eSRic Aleshire 13184201a95eSRic Aleshire if (dsl_prop_get_integer(dsname, 13194201a95eSRic Aleshire zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL)) 13204201a95eSRic Aleshire return (EACCES); 13214201a95eSRic Aleshire return (rdonly ? 0 : EACCES); 13224201a95eSRic Aleshire } 13234201a95eSRic Aleshire return (EACCES); 13244201a95eSRic Aleshire } 13254201a95eSRic Aleshire 13264201a95eSRic Aleshire /* 13274201a95eSRic Aleshire * zfs_mount_label_policy: 13284201a95eSRic Aleshire * Determine whether the mount is allowed according to MAC check. 13294201a95eSRic Aleshire * by comparing (where appropriate) label of the dataset against 13304201a95eSRic Aleshire * the label of the zone being mounted into. If the dataset has 13314201a95eSRic Aleshire * no label, create one. 13324201a95eSRic Aleshire * 13334201a95eSRic Aleshire * Returns: 13344201a95eSRic Aleshire * 0 : access allowed 13354201a95eSRic Aleshire * >0 : error code, such as EACCES 13364201a95eSRic Aleshire */ 13374201a95eSRic Aleshire static int 13384201a95eSRic Aleshire zfs_mount_label_policy(vfs_t *vfsp, char *osname) 13394201a95eSRic Aleshire { 13404201a95eSRic Aleshire int error, retv; 13414201a95eSRic Aleshire zone_t *mntzone = NULL; 13424201a95eSRic Aleshire ts_label_t *mnt_tsl; 13434201a95eSRic Aleshire bslabel_t *mnt_sl; 13444201a95eSRic Aleshire bslabel_t ds_sl; 13454201a95eSRic Aleshire char ds_hexsl[MAXNAMELEN]; 13464201a95eSRic Aleshire 13474201a95eSRic Aleshire retv = EACCES; /* assume the worst */ 13484201a95eSRic Aleshire 13494201a95eSRic Aleshire /* 13504201a95eSRic Aleshire * Start by getting the dataset label if it exists. 13514201a95eSRic Aleshire */ 13524201a95eSRic Aleshire error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), 13534201a95eSRic Aleshire 1, sizeof (ds_hexsl), &ds_hexsl, NULL); 13544201a95eSRic Aleshire if (error) 13554201a95eSRic Aleshire return (EACCES); 13564201a95eSRic Aleshire 13574201a95eSRic Aleshire /* 13584201a95eSRic Aleshire * If labeling is NOT enabled, then disallow the mount of datasets 13594201a95eSRic Aleshire * which have a non-default label already. No other label checks 13604201a95eSRic Aleshire * are needed. 13614201a95eSRic Aleshire */ 13624201a95eSRic Aleshire if (!is_system_labeled()) { 13634201a95eSRic Aleshire if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) 13644201a95eSRic Aleshire return (0); 13654201a95eSRic Aleshire return (EACCES); 13664201a95eSRic Aleshire } 13674201a95eSRic Aleshire 13684201a95eSRic Aleshire /* 13694201a95eSRic Aleshire * Get the label of the mountpoint. If mounting into the global 13704201a95eSRic Aleshire * zone (i.e. mountpoint is not within an active zone and the 13714201a95eSRic Aleshire * zoned property is off), the label must be default or 13724201a95eSRic Aleshire * admin_low/admin_high only; no other checks are needed. 13734201a95eSRic Aleshire */ 13744201a95eSRic Aleshire mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE); 13754201a95eSRic Aleshire if (mntzone->zone_id == GLOBAL_ZONEID) { 13764201a95eSRic Aleshire uint64_t zoned; 13774201a95eSRic Aleshire 13784201a95eSRic Aleshire zone_rele(mntzone); 13794201a95eSRic Aleshire 13804201a95eSRic Aleshire if (dsl_prop_get_integer(osname, 13814201a95eSRic Aleshire zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL)) 13824201a95eSRic Aleshire return (EACCES); 13834201a95eSRic Aleshire if (!zoned) 13844201a95eSRic Aleshire return (zfs_check_global_label(osname, ds_hexsl)); 13854201a95eSRic Aleshire else 13864201a95eSRic Aleshire /* 13874201a95eSRic Aleshire * This is the case of a zone dataset being mounted 13884201a95eSRic Aleshire * initially, before the zone has been fully created; 13894201a95eSRic Aleshire * allow this mount into global zone. 13904201a95eSRic Aleshire */ 13914201a95eSRic Aleshire return (0); 13924201a95eSRic Aleshire } 13934201a95eSRic Aleshire 13944201a95eSRic Aleshire mnt_tsl = mntzone->zone_slabel; 13954201a95eSRic Aleshire ASSERT(mnt_tsl != NULL); 13964201a95eSRic Aleshire label_hold(mnt_tsl); 13974201a95eSRic Aleshire mnt_sl = label2bslabel(mnt_tsl); 13984201a95eSRic Aleshire 13994201a95eSRic Aleshire if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) { 14004201a95eSRic Aleshire /* 14014201a95eSRic Aleshire * The dataset doesn't have a real label, so fabricate one. 14024201a95eSRic Aleshire */ 14034201a95eSRic Aleshire char *str = NULL; 14044201a95eSRic Aleshire 14054201a95eSRic Aleshire if (l_to_str_internal(mnt_sl, &str) == 0 && 14064201a95eSRic Aleshire dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), 140792241e0bSTom Erickson ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0) 14084201a95eSRic Aleshire retv = 0; 14094201a95eSRic Aleshire if (str != NULL) 14104201a95eSRic Aleshire kmem_free(str, strlen(str) + 1); 14114201a95eSRic Aleshire } else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) { 14124201a95eSRic Aleshire /* 14134201a95eSRic Aleshire * Now compare labels to complete the MAC check. If the 14144201a95eSRic Aleshire * labels are equal then allow access. If the mountpoint 14154201a95eSRic Aleshire * label dominates the dataset label, allow readonly access. 14164201a95eSRic Aleshire * Otherwise, access is denied. 14174201a95eSRic Aleshire */ 14184201a95eSRic Aleshire if (blequal(mnt_sl, &ds_sl)) 14194201a95eSRic Aleshire retv = 0; 14204201a95eSRic Aleshire else if (bldominates(mnt_sl, &ds_sl)) { 14214201a95eSRic Aleshire vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 14224201a95eSRic Aleshire retv = 0; 14234201a95eSRic Aleshire } 14244201a95eSRic Aleshire } 14254201a95eSRic Aleshire 14264201a95eSRic Aleshire label_rele(mnt_tsl); 14274201a95eSRic Aleshire zone_rele(mntzone); 14284201a95eSRic Aleshire return (retv); 14294201a95eSRic Aleshire } 14304201a95eSRic Aleshire 1431ea8dc4b6Seschrock static int 1432ea8dc4b6Seschrock zfs_mountroot(vfs_t *vfsp, enum whymountroot why) 1433ea8dc4b6Seschrock { 1434ea8dc4b6Seschrock int error = 0; 1435ea8dc4b6Seschrock static int zfsrootdone = 0; 1436ea8dc4b6Seschrock zfsvfs_t *zfsvfs = NULL; 1437ea8dc4b6Seschrock znode_t *zp = NULL; 1438ea8dc4b6Seschrock vnode_t *vp = NULL; 1439e7cbe64fSgw char *zfs_bootfs; 1440051aabe6Staylor char *zfs_devid; 1441ea8dc4b6Seschrock 1442ea8dc4b6Seschrock ASSERT(vfsp); 1443ea8dc4b6Seschrock 1444ea8dc4b6Seschrock /* 1445b1b8ab34Slling * The filesystem that we mount as root is defined in the 1446e7cbe64fSgw * boot property "zfs-bootfs" with a format of 1447e7cbe64fSgw * "poolname/root-dataset-objnum". 1448ea8dc4b6Seschrock */ 1449ea8dc4b6Seschrock if (why == ROOT_INIT) { 1450ea8dc4b6Seschrock if (zfsrootdone++) 1451ea8dc4b6Seschrock return (EBUSY); 1452e7cbe64fSgw /* 1453e7cbe64fSgw * the process of doing a spa_load will require the 1454e7cbe64fSgw * clock to be set before we could (for example) do 1455e7cbe64fSgw * something better by looking at the timestamp on 1456e7cbe64fSgw * an uberblock, so just set it to -1. 1457e7cbe64fSgw */ 1458e7cbe64fSgw clkset(-1); 1459fa9e4066Sahrens 1460051aabe6Staylor if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) { 1461051aabe6Staylor cmn_err(CE_NOTE, "spa_get_bootfs: can not get " 1462051aabe6Staylor "bootfs name"); 1463e7cbe64fSgw return (EINVAL); 1464986fd29aSsetje } 1465051aabe6Staylor zfs_devid = spa_get_bootprop("diskdevid"); 1466051aabe6Staylor error = spa_import_rootpool(rootfs.bo_name, zfs_devid); 1467051aabe6Staylor if (zfs_devid) 1468051aabe6Staylor spa_free_bootprop(zfs_devid); 1469051aabe6Staylor if (error) { 1470051aabe6Staylor spa_free_bootprop(zfs_bootfs); 1471051aabe6Staylor cmn_err(CE_NOTE, "spa_import_rootpool: error %d", 1472e7cbe64fSgw error); 1473e7cbe64fSgw return (error); 1474e7cbe64fSgw } 1475e7cbe64fSgw if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) { 1476051aabe6Staylor spa_free_bootprop(zfs_bootfs); 1477051aabe6Staylor cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d", 1478e7cbe64fSgw error); 1479b1b8ab34Slling return (error); 1480e7cbe64fSgw } 1481e7cbe64fSgw 1482051aabe6Staylor spa_free_bootprop(zfs_bootfs); 1483fa9e4066Sahrens 1484ea8dc4b6Seschrock if (error = vfs_lock(vfsp)) 1485ea8dc4b6Seschrock return (error); 1486fa9e4066Sahrens 1487088f3894Sahrens if (error = zfs_domount(vfsp, rootfs.bo_name)) { 1488051aabe6Staylor cmn_err(CE_NOTE, "zfs_domount: error %d", error); 1489ea8dc4b6Seschrock goto out; 1490e7cbe64fSgw } 1491ea8dc4b6Seschrock 1492ea8dc4b6Seschrock zfsvfs = (zfsvfs_t *)vfsp->vfs_data; 1493ea8dc4b6Seschrock ASSERT(zfsvfs); 1494e7cbe64fSgw if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) { 1495051aabe6Staylor cmn_err(CE_NOTE, "zfs_zget: error %d", error); 1496ea8dc4b6Seschrock goto out; 1497e7cbe64fSgw } 1498ea8dc4b6Seschrock 1499ea8dc4b6Seschrock vp = ZTOV(zp); 1500ea8dc4b6Seschrock mutex_enter(&vp->v_lock); 1501ea8dc4b6Seschrock vp->v_flag |= VROOT; 1502ea8dc4b6Seschrock mutex_exit(&vp->v_lock); 1503ea8dc4b6Seschrock rootvp = vp; 1504ea8dc4b6Seschrock 1505ea8dc4b6Seschrock /* 150640d3dfe1Smarks * Leave rootvp held. The root file system is never unmounted. 1507ea8dc4b6Seschrock */ 1508ea8dc4b6Seschrock 1509ea8dc4b6Seschrock vfs_add((struct vnode *)0, vfsp, 1510ea8dc4b6Seschrock (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); 1511ea8dc4b6Seschrock out: 1512ea8dc4b6Seschrock vfs_unlock(vfsp); 1513e7cbe64fSgw return (error); 1514ea8dc4b6Seschrock } else if (why == ROOT_REMOUNT) { 1515ea8dc4b6Seschrock readonly_changed_cb(vfsp->vfs_data, B_FALSE); 1516ea8dc4b6Seschrock vfsp->vfs_flag |= VFS_REMOUNT; 1517b510d378Slling 1518b510d378Slling /* refresh mount options */ 1519b510d378Slling zfs_unregister_callbacks(vfsp->vfs_data); 1520b510d378Slling return (zfs_register_callbacks(vfsp)); 1521b510d378Slling 1522ea8dc4b6Seschrock } else if (why == ROOT_UNMOUNT) { 1523ea8dc4b6Seschrock zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); 1524ea8dc4b6Seschrock (void) zfs_sync(vfsp, 0, 0); 1525ea8dc4b6Seschrock return (0); 1526ea8dc4b6Seschrock } 1527ea8dc4b6Seschrock 1528ea8dc4b6Seschrock /* 1529ea8dc4b6Seschrock * if "why" is equal to anything else other than ROOT_INIT, 1530ea8dc4b6Seschrock * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. 1531ea8dc4b6Seschrock */ 1532ea8dc4b6Seschrock return (ENOTSUP); 1533ea8dc4b6Seschrock } 1534ea8dc4b6Seschrock 1535ea8dc4b6Seschrock /*ARGSUSED*/ 1536ea8dc4b6Seschrock static int 1537ea8dc4b6Seschrock zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 1538ea8dc4b6Seschrock { 1539ea8dc4b6Seschrock char *osname; 1540ea8dc4b6Seschrock pathname_t spn; 1541ea8dc4b6Seschrock int error = 0; 1542ea8dc4b6Seschrock uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? 1543b1b8ab34Slling UIO_SYSSPACE : UIO_USERSPACE; 1544ea8dc4b6Seschrock int canwrite; 1545ea8dc4b6Seschrock 1546ea8dc4b6Seschrock if (mvp->v_type != VDIR) 1547ea8dc4b6Seschrock return (ENOTDIR); 1548ea8dc4b6Seschrock 1549ea8dc4b6Seschrock mutex_enter(&mvp->v_lock); 1550ea8dc4b6Seschrock if ((uap->flags & MS_REMOUNT) == 0 && 1551ea8dc4b6Seschrock (uap->flags & MS_OVERLAY) == 0 && 1552ea8dc4b6Seschrock (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 1553ea8dc4b6Seschrock mutex_exit(&mvp->v_lock); 1554ea8dc4b6Seschrock return (EBUSY); 1555ea8dc4b6Seschrock } 1556ea8dc4b6Seschrock mutex_exit(&mvp->v_lock); 1557ea8dc4b6Seschrock 1558ea8dc4b6Seschrock /* 1559ea8dc4b6Seschrock * ZFS does not support passing unparsed data in via MS_DATA. 1560ea8dc4b6Seschrock * Users should use the MS_OPTIONSTR interface; this means 1561ea8dc4b6Seschrock * that all option parsing is already done and the options struct 1562ea8dc4b6Seschrock * can be interrogated. 1563ea8dc4b6Seschrock */ 1564ea8dc4b6Seschrock if ((uap->flags & MS_DATA) && uap->datalen > 0) 1565ea8dc4b6Seschrock return (EINVAL); 1566ea8dc4b6Seschrock 1567ea8dc4b6Seschrock /* 1568ea8dc4b6Seschrock * Get the objset name (the "special" mount argument). 1569ea8dc4b6Seschrock */ 1570ea8dc4b6Seschrock if (error = pn_get(uap->spec, fromspace, &spn)) 1571ea8dc4b6Seschrock return (error); 1572ea8dc4b6Seschrock 1573ea8dc4b6Seschrock osname = spn.pn_path; 1574ea8dc4b6Seschrock 1575ecd6cf80Smarks /* 1576ecd6cf80Smarks * Check for mount privilege? 1577ecd6cf80Smarks * 1578ecd6cf80Smarks * If we don't have privilege then see if 1579ecd6cf80Smarks * we have local permission to allow it 1580ecd6cf80Smarks */ 1581ecd6cf80Smarks error = secpolicy_fs_mount(cr, mvp, vfsp); 1582ecd6cf80Smarks if (error) { 158398679b56SMark Shellenbaum if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) == 0) { 1584ecd6cf80Smarks vattr_t vattr; 1585ecd6cf80Smarks 1586ecd6cf80Smarks /* 1587ecd6cf80Smarks * Make sure user is the owner of the mount point 1588ecd6cf80Smarks * or has sufficient privileges. 1589ecd6cf80Smarks */ 1590ecd6cf80Smarks 1591ecd6cf80Smarks vattr.va_mask = AT_UID; 1592ecd6cf80Smarks 159398679b56SMark Shellenbaum if (VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) { 1594ecd6cf80Smarks goto out; 1595ecd6cf80Smarks } 1596ecd6cf80Smarks 15972459a9eaSmarks if (secpolicy_vnode_owner(cr, vattr.va_uid) != 0 && 15982459a9eaSmarks VOP_ACCESS(mvp, VWRITE, 0, cr, NULL) != 0) { 1599ecd6cf80Smarks goto out; 1600ecd6cf80Smarks } 1601ecd6cf80Smarks secpolicy_fs_mount_clearopts(cr, vfsp); 1602ecd6cf80Smarks } else { 1603ecd6cf80Smarks goto out; 1604ecd6cf80Smarks } 1605ecd6cf80Smarks } 1606ea8dc4b6Seschrock 1607ea8dc4b6Seschrock /* 1608ea8dc4b6Seschrock * Refuse to mount a filesystem if we are in a local zone and the 1609ea8dc4b6Seschrock * dataset is not visible. 1610ea8dc4b6Seschrock */ 1611ea8dc4b6Seschrock if (!INGLOBALZONE(curproc) && 1612ea8dc4b6Seschrock (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1613ea8dc4b6Seschrock error = EPERM; 1614ea8dc4b6Seschrock goto out; 1615ea8dc4b6Seschrock } 1616ea8dc4b6Seschrock 16174201a95eSRic Aleshire error = zfs_mount_label_policy(vfsp, osname); 16184201a95eSRic Aleshire if (error) 16194201a95eSRic Aleshire goto out; 16204201a95eSRic Aleshire 1621b510d378Slling /* 1622b510d378Slling * When doing a remount, we simply refresh our temporary properties 1623b510d378Slling * according to those options set in the current VFS options. 1624b510d378Slling */ 1625b510d378Slling if (uap->flags & MS_REMOUNT) { 1626b510d378Slling /* refresh mount options */ 1627b510d378Slling zfs_unregister_callbacks(vfsp->vfs_data); 1628b510d378Slling error = zfs_register_callbacks(vfsp); 1629b510d378Slling goto out; 1630b510d378Slling } 1631b510d378Slling 1632088f3894Sahrens error = zfs_domount(vfsp, osname); 1633ea8dc4b6Seschrock 1634142ae85dSChris Kirby /* 1635142ae85dSChris Kirby * Add an extra VFS_HOLD on our parent vfs so that it can't 1636142ae85dSChris Kirby * disappear due to a forced unmount. 1637142ae85dSChris Kirby */ 1638984a131bSChris Kirby if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap) 1639142ae85dSChris Kirby VFS_HOLD(mvp->v_vfsp); 1640142ae85dSChris Kirby 1641ea8dc4b6Seschrock out: 1642fa9e4066Sahrens pn_free(&spn); 1643fa9e4066Sahrens return (error); 1644fa9e4066Sahrens } 1645fa9e4066Sahrens 1646fa9e4066Sahrens static int 1647fa9e4066Sahrens zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) 1648fa9e4066Sahrens { 1649fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1650fa9e4066Sahrens dev32_t d32; 1651a2eea2e1Sahrens uint64_t refdbytes, availbytes, usedobjs, availobjs; 1652fa9e4066Sahrens 1653fa9e4066Sahrens ZFS_ENTER(zfsvfs); 1654fa9e4066Sahrens 1655a2eea2e1Sahrens dmu_objset_space(zfsvfs->z_os, 1656a2eea2e1Sahrens &refdbytes, &availbytes, &usedobjs, &availobjs); 1657fa9e4066Sahrens 1658fa9e4066Sahrens /* 1659fa9e4066Sahrens * The underlying storage pool actually uses multiple block sizes. 1660fa9e4066Sahrens * We report the fragsize as the smallest block size we support, 1661fa9e4066Sahrens * and we report our blocksize as the filesystem's maximum blocksize. 1662fa9e4066Sahrens */ 1663fa9e4066Sahrens statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; 1664fa9e4066Sahrens statp->f_bsize = zfsvfs->z_max_blksz; 1665fa9e4066Sahrens 1666fa9e4066Sahrens /* 1667fa9e4066Sahrens * The following report "total" blocks of various kinds in the 1668fa9e4066Sahrens * file system, but reported in terms of f_frsize - the 1669fa9e4066Sahrens * "fragment" size. 1670fa9e4066Sahrens */ 1671fa9e4066Sahrens 1672a2eea2e1Sahrens statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1673a2eea2e1Sahrens statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT; 1674fa9e4066Sahrens statp->f_bavail = statp->f_bfree; /* no root reservation */ 1675fa9e4066Sahrens 1676fa9e4066Sahrens /* 1677fa9e4066Sahrens * statvfs() should really be called statufs(), because it assumes 1678fa9e4066Sahrens * static metadata. ZFS doesn't preallocate files, so the best 1679fa9e4066Sahrens * we can do is report the max that could possibly fit in f_files, 1680fa9e4066Sahrens * and that minus the number actually used in f_ffree. 1681fa9e4066Sahrens * For f_ffree, report the smaller of the number of object available 1682fa9e4066Sahrens * and the number of blocks (each object will take at least a block). 1683fa9e4066Sahrens */ 1684a2eea2e1Sahrens statp->f_ffree = MIN(availobjs, statp->f_bfree); 1685fa9e4066Sahrens statp->f_favail = statp->f_ffree; /* no "root reservation" */ 1686a2eea2e1Sahrens statp->f_files = statp->f_ffree + usedobjs; 1687fa9e4066Sahrens 1688fa9e4066Sahrens (void) cmpldev(&d32, vfsp->vfs_dev); 1689fa9e4066Sahrens statp->f_fsid = d32; 1690fa9e4066Sahrens 1691fa9e4066Sahrens /* 1692fa9e4066Sahrens * We're a zfs filesystem. 1693fa9e4066Sahrens */ 1694fa9e4066Sahrens (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); 1695fa9e4066Sahrens 1696a5be7ebbSmarks statp->f_flag = vf_to_stf(vfsp->vfs_flag); 1697fa9e4066Sahrens 1698fa9e4066Sahrens statp->f_namemax = ZFS_MAXNAMELEN; 1699fa9e4066Sahrens 1700fa9e4066Sahrens /* 1701fa9e4066Sahrens * We have all of 32 characters to stuff a string here. 1702fa9e4066Sahrens * Is there anything useful we could/should provide? 1703fa9e4066Sahrens */ 1704fa9e4066Sahrens bzero(statp->f_fstr, sizeof (statp->f_fstr)); 1705fa9e4066Sahrens 1706fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1707fa9e4066Sahrens return (0); 1708fa9e4066Sahrens } 1709fa9e4066Sahrens 1710fa9e4066Sahrens static int 1711fa9e4066Sahrens zfs_root(vfs_t *vfsp, vnode_t **vpp) 1712fa9e4066Sahrens { 1713fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1714fa9e4066Sahrens znode_t *rootzp; 1715fa9e4066Sahrens int error; 1716fa9e4066Sahrens 1717fa9e4066Sahrens ZFS_ENTER(zfsvfs); 1718fa9e4066Sahrens 1719fa9e4066Sahrens error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1720fa9e4066Sahrens if (error == 0) 1721fa9e4066Sahrens *vpp = ZTOV(rootzp); 1722fa9e4066Sahrens 1723fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1724fa9e4066Sahrens return (error); 1725fa9e4066Sahrens } 1726fa9e4066Sahrens 1727f18faf3fSek /* 1728f18faf3fSek * Teardown the zfsvfs::z_os. 1729f18faf3fSek * 1730f18faf3fSek * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' 1731f18faf3fSek * and 'z_teardown_inactive_lock' held. 1732f18faf3fSek */ 1733f18faf3fSek static int 1734f18faf3fSek zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1735f18faf3fSek { 1736874395d5Smaybee znode_t *zp; 1737f18faf3fSek 1738f18faf3fSek rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1739f18faf3fSek 1740f18faf3fSek if (!unmounting) { 1741f18faf3fSek /* 1742f18faf3fSek * We purge the parent filesystem's vfsp as the parent 1743f18faf3fSek * filesystem and all of its snapshots have their vnode's 1744f18faf3fSek * v_vfsp set to the parent's filesystem's vfsp. Note, 1745f18faf3fSek * 'z_parent' is self referential for non-snapshots. 1746f18faf3fSek */ 1747f18faf3fSek (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1748f18faf3fSek } 1749f18faf3fSek 1750f18faf3fSek /* 1751f18faf3fSek * Close the zil. NB: Can't close the zil while zfs_inactive 1752f18faf3fSek * threads are blocked as zil_close can call zfs_inactive. 1753f18faf3fSek */ 1754f18faf3fSek if (zfsvfs->z_log) { 1755f18faf3fSek zil_close(zfsvfs->z_log); 1756f18faf3fSek zfsvfs->z_log = NULL; 1757f18faf3fSek } 1758f18faf3fSek 1759f18faf3fSek rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 1760f18faf3fSek 1761f18faf3fSek /* 1762f18faf3fSek * If we are not unmounting (ie: online recv) and someone already 1763f18faf3fSek * unmounted this file system while we were doing the switcheroo, 1764f18faf3fSek * or a reopen of z_os failed then just bail out now. 1765f18faf3fSek */ 1766f18faf3fSek if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1767f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 1768f18faf3fSek rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1769f18faf3fSek return (EIO); 1770f18faf3fSek } 1771f18faf3fSek 1772f18faf3fSek /* 1773f18faf3fSek * At this point there are no vops active, and any new vops will 1774f18faf3fSek * fail with EIO since we have z_teardown_lock for writer (only 1775f18faf3fSek * relavent for forced unmount). 1776f18faf3fSek * 1777f18faf3fSek * Release all holds on dbufs. 1778f18faf3fSek */ 1779f18faf3fSek mutex_enter(&zfsvfs->z_znodes_lock); 1780874395d5Smaybee for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 1781874395d5Smaybee zp = list_next(&zfsvfs->z_all_znodes, zp)) 17820a586ceaSMark Shellenbaum if (zp->z_sa_hdl) { 1783874395d5Smaybee ASSERT(ZTOV(zp)->v_count > 0); 1784874395d5Smaybee zfs_znode_dmu_fini(zp); 1785f18faf3fSek } 1786f18faf3fSek mutex_exit(&zfsvfs->z_znodes_lock); 1787f18faf3fSek 1788f18faf3fSek /* 1789f18faf3fSek * If we are unmounting, set the unmounted flag and let new vops 1790f18faf3fSek * unblock. zfs_inactive will have the unmounted behavior, and all 1791f18faf3fSek * other vops will fail with EIO. 1792f18faf3fSek */ 1793f18faf3fSek if (unmounting) { 1794f18faf3fSek zfsvfs->z_unmounted = B_TRUE; 1795f18faf3fSek rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1796f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 1797f18faf3fSek } 1798f18faf3fSek 1799f18faf3fSek /* 1800f18faf3fSek * z_os will be NULL if there was an error in attempting to reopen 1801f18faf3fSek * zfsvfs, so just return as the properties had already been 1802f18faf3fSek * unregistered and cached data had been evicted before. 1803f18faf3fSek */ 1804f18faf3fSek if (zfsvfs->z_os == NULL) 1805f18faf3fSek return (0); 1806f18faf3fSek 1807f18faf3fSek /* 1808f18faf3fSek * Unregister properties. 1809f18faf3fSek */ 1810f18faf3fSek zfs_unregister_callbacks(zfsvfs); 1811f18faf3fSek 1812f18faf3fSek /* 1813f18faf3fSek * Evict cached data 1814f18faf3fSek */ 181547f263f4Sek if (dmu_objset_evict_dbufs(zfsvfs->z_os)) { 1816d3248e8bSmaybee txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 181747f263f4Sek (void) dmu_objset_evict_dbufs(zfsvfs->z_os); 1818d3248e8bSmaybee } 1819f18faf3fSek 1820f18faf3fSek return (0); 1821f18faf3fSek } 1822f18faf3fSek 1823fa9e4066Sahrens /*ARGSUSED*/ 1824fa9e4066Sahrens static int 1825fa9e4066Sahrens zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) 1826fa9e4066Sahrens { 1827fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1828f18faf3fSek objset_t *os; 1829fa9e4066Sahrens int ret; 1830fa9e4066Sahrens 1831ecd6cf80Smarks ret = secpolicy_fs_unmount(cr, vfsp); 1832ecd6cf80Smarks if (ret) { 183398679b56SMark Shellenbaum if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 183498679b56SMark Shellenbaum ZFS_DELEG_PERM_MOUNT, cr)) 1835ecd6cf80Smarks return (ret); 1836ecd6cf80Smarks } 1837033f9833Sek 1838ed097989Sek /* 1839ed097989Sek * We purge the parent filesystem's vfsp as the parent filesystem 1840ed097989Sek * and all of its snapshots have their vnode's v_vfsp set to the 1841ed097989Sek * parent's filesystem's vfsp. Note, 'z_parent' is self 1842ed097989Sek * referential for non-snapshots. 1843ed097989Sek */ 1844ed097989Sek (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1845033f9833Sek 1846fa9e4066Sahrens /* 1847fa9e4066Sahrens * Unmount any snapshots mounted under .zfs before unmounting the 1848fa9e4066Sahrens * dataset itself. 1849fa9e4066Sahrens */ 1850fa9e4066Sahrens if (zfsvfs->z_ctldir != NULL && 1851ecd6cf80Smarks (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) { 1852fa9e4066Sahrens return (ret); 1853ecd6cf80Smarks } 1854fa9e4066Sahrens 185591ebeef5Sahrens if (!(fflag & MS_FORCE)) { 1856fa9e4066Sahrens /* 185791ebeef5Sahrens * Check the number of active vnodes in the file system. 185891ebeef5Sahrens * Our count is maintained in the vfs structure, but the 185991ebeef5Sahrens * number is off by 1 to indicate a hold on the vfs 186091ebeef5Sahrens * structure itself. 186191ebeef5Sahrens * 186291ebeef5Sahrens * The '.zfs' directory maintains a reference of its 186391ebeef5Sahrens * own, and any active references underneath are 186491ebeef5Sahrens * reflected in the vnode count. 1865fa9e4066Sahrens */ 186691ebeef5Sahrens if (zfsvfs->z_ctldir == NULL) { 186791ebeef5Sahrens if (vfsp->vfs_count > 1) 186891ebeef5Sahrens return (EBUSY); 186991ebeef5Sahrens } else { 187091ebeef5Sahrens if (vfsp->vfs_count > 2 || 1871f18faf3fSek zfsvfs->z_ctldir->v_count > 1) 187291ebeef5Sahrens return (EBUSY); 1873fa9e4066Sahrens } 187491ebeef5Sahrens } 1875fa9e4066Sahrens 187691ebeef5Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 187791ebeef5Sahrens 1878f18faf3fSek VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 1879f18faf3fSek os = zfsvfs->z_os; 188091ebeef5Sahrens 188191ebeef5Sahrens /* 1882f18faf3fSek * z_os will be NULL if there was an error in 1883f18faf3fSek * attempting to reopen zfsvfs. 188491ebeef5Sahrens */ 1885f18faf3fSek if (os != NULL) { 1886f18faf3fSek /* 1887f18faf3fSek * Unset the objset user_ptr. 1888f18faf3fSek */ 1889503ad85cSMatthew Ahrens mutex_enter(&os->os_user_ptr_lock); 1890f18faf3fSek dmu_objset_set_user(os, NULL); 1891503ad85cSMatthew Ahrens mutex_exit(&os->os_user_ptr_lock); 189291ebeef5Sahrens 1893f18faf3fSek /* 1894745cd3c5Smaybee * Finally release the objset 1895f18faf3fSek */ 1896503ad85cSMatthew Ahrens dmu_objset_disown(os, zfsvfs); 189791ebeef5Sahrens } 189891ebeef5Sahrens 189991ebeef5Sahrens /* 190091ebeef5Sahrens * We can now safely destroy the '.zfs' directory node. 190191ebeef5Sahrens */ 190291ebeef5Sahrens if (zfsvfs->z_ctldir != NULL) 190391ebeef5Sahrens zfsctl_destroy(zfsvfs); 1904fa9e4066Sahrens 1905fa9e4066Sahrens return (0); 1906fa9e4066Sahrens } 1907fa9e4066Sahrens 1908fa9e4066Sahrens static int 1909fa9e4066Sahrens zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1910fa9e4066Sahrens { 1911fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1912fa9e4066Sahrens znode_t *zp; 1913fa9e4066Sahrens uint64_t object = 0; 1914fa9e4066Sahrens uint64_t fid_gen = 0; 1915fa9e4066Sahrens uint64_t gen_mask; 1916fa9e4066Sahrens uint64_t zp_gen; 1917fa9e4066Sahrens int i, err; 1918fa9e4066Sahrens 1919fa9e4066Sahrens *vpp = NULL; 1920fa9e4066Sahrens 1921fa9e4066Sahrens ZFS_ENTER(zfsvfs); 1922fa9e4066Sahrens 1923fa9e4066Sahrens if (fidp->fid_len == LONG_FID_LEN) { 1924fa9e4066Sahrens zfid_long_t *zlfid = (zfid_long_t *)fidp; 1925fa9e4066Sahrens uint64_t objsetid = 0; 1926fa9e4066Sahrens uint64_t setgen = 0; 1927fa9e4066Sahrens 1928fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1929fa9e4066Sahrens objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1930fa9e4066Sahrens 1931fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1932fa9e4066Sahrens setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1933fa9e4066Sahrens 1934fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1935fa9e4066Sahrens 1936fa9e4066Sahrens err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1937fa9e4066Sahrens if (err) 1938fa9e4066Sahrens return (EINVAL); 1939fa9e4066Sahrens ZFS_ENTER(zfsvfs); 1940fa9e4066Sahrens } 1941fa9e4066Sahrens 1942fa9e4066Sahrens if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1943fa9e4066Sahrens zfid_short_t *zfid = (zfid_short_t *)fidp; 1944fa9e4066Sahrens 1945fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 1946fa9e4066Sahrens object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1947fa9e4066Sahrens 1948fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 1949fa9e4066Sahrens fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1950fa9e4066Sahrens } else { 1951fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1952fa9e4066Sahrens return (EINVAL); 1953fa9e4066Sahrens } 1954fa9e4066Sahrens 1955fa9e4066Sahrens /* A zero fid_gen means we are in the .zfs control directories */ 1956fa9e4066Sahrens if (fid_gen == 0 && 1957fa9e4066Sahrens (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1958fa9e4066Sahrens *vpp = zfsvfs->z_ctldir; 1959fa9e4066Sahrens ASSERT(*vpp != NULL); 1960fa9e4066Sahrens if (object == ZFSCTL_INO_SNAPDIR) { 1961fa9e4066Sahrens VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1962da6c28aaSamw 0, NULL, NULL, NULL, NULL, NULL) == 0); 1963fa9e4066Sahrens } else { 1964fa9e4066Sahrens VN_HOLD(*vpp); 1965fa9e4066Sahrens } 1966fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1967fa9e4066Sahrens return (0); 1968fa9e4066Sahrens } 1969fa9e4066Sahrens 1970fa9e4066Sahrens gen_mask = -1ULL >> (64 - 8 * i); 1971fa9e4066Sahrens 1972fa9e4066Sahrens dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1973fa9e4066Sahrens if (err = zfs_zget(zfsvfs, object, &zp)) { 1974fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1975fa9e4066Sahrens return (err); 1976fa9e4066Sahrens } 19770a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, 19780a586ceaSMark Shellenbaum sizeof (uint64_t)); 19790a586ceaSMark Shellenbaum zp_gen = zp_gen & gen_mask; 1980fa9e4066Sahrens if (zp_gen == 0) 1981fa9e4066Sahrens zp_gen = 1; 1982893a6d32Sahrens if (zp->z_unlinked || zp_gen != fid_gen) { 1983fa9e4066Sahrens dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1984fa9e4066Sahrens VN_RELE(ZTOV(zp)); 1985fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1986fa9e4066Sahrens return (EINVAL); 1987fa9e4066Sahrens } 1988fa9e4066Sahrens 1989fa9e4066Sahrens *vpp = ZTOV(zp); 1990fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1991fa9e4066Sahrens return (0); 1992fa9e4066Sahrens } 1993fa9e4066Sahrens 1994f18faf3fSek /* 1995f18faf3fSek * Block out VOPs and close zfsvfs_t::z_os 1996f18faf3fSek * 1997f18faf3fSek * Note, if successful, then we return with the 'z_teardown_lock' and 1998f18faf3fSek * 'z_teardown_inactive_lock' write held. 1999f18faf3fSek */ 2000f18faf3fSek int 2001503ad85cSMatthew Ahrens zfs_suspend_fs(zfsvfs_t *zfsvfs) 2002f18faf3fSek { 2003f18faf3fSek int error; 2004f18faf3fSek 2005f18faf3fSek if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 2006f18faf3fSek return (error); 2007503ad85cSMatthew Ahrens dmu_objset_disown(zfsvfs->z_os, zfsvfs); 2008f18faf3fSek 2009f18faf3fSek return (0); 2010f18faf3fSek } 2011f18faf3fSek 2012f18faf3fSek /* 2013f18faf3fSek * Reopen zfsvfs_t::z_os and release VOPs. 2014f18faf3fSek */ 2015f18faf3fSek int 2016503ad85cSMatthew Ahrens zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname) 2017f18faf3fSek { 20180a586ceaSMark Shellenbaum int err, err2; 2019f18faf3fSek 2020f18faf3fSek ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); 2021f18faf3fSek ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 2022f18faf3fSek 2023503ad85cSMatthew Ahrens err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs, 2024503ad85cSMatthew Ahrens &zfsvfs->z_os); 2025f18faf3fSek if (err) { 2026f18faf3fSek zfsvfs->z_os = NULL; 2027f18faf3fSek } else { 2028f18faf3fSek znode_t *zp; 20290a586ceaSMark Shellenbaum uint64_t sa_obj = 0; 20300a586ceaSMark Shellenbaum 20310a586ceaSMark Shellenbaum err2 = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ, 20320a586ceaSMark Shellenbaum ZFS_SA_ATTRS, 8, 1, &sa_obj); 20330a586ceaSMark Shellenbaum 20340a586ceaSMark Shellenbaum if ((err || err2) && zfsvfs->z_version >= ZPL_VERSION_SA) 20350a586ceaSMark Shellenbaum goto bail; 20360a586ceaSMark Shellenbaum 20370a586ceaSMark Shellenbaum 20381d8ccc7bSMark Shellenbaum if ((err = sa_setup(zfsvfs->z_os, sa_obj, 20391d8ccc7bSMark Shellenbaum zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table)) != 0) 20401d8ccc7bSMark Shellenbaum goto bail; 2041f18faf3fSek 2042f18faf3fSek VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 2043f18faf3fSek 2044f18faf3fSek /* 2045f18faf3fSek * Attempt to re-establish all the active znodes with 2046f18faf3fSek * their dbufs. If a zfs_rezget() fails, then we'll let 2047f18faf3fSek * any potential callers discover that via ZFS_ENTER_VERIFY_VP 2048f18faf3fSek * when they try to use their znode. 2049f18faf3fSek */ 2050f18faf3fSek mutex_enter(&zfsvfs->z_znodes_lock); 2051f18faf3fSek for (zp = list_head(&zfsvfs->z_all_znodes); zp; 2052f18faf3fSek zp = list_next(&zfsvfs->z_all_znodes, zp)) { 2053f18faf3fSek (void) zfs_rezget(zp); 2054f18faf3fSek } 2055f18faf3fSek mutex_exit(&zfsvfs->z_znodes_lock); 2056f18faf3fSek 2057f18faf3fSek } 2058f18faf3fSek 20590a586ceaSMark Shellenbaum bail: 2060f18faf3fSek /* release the VOPs */ 2061f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 2062f18faf3fSek rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 2063f18faf3fSek 2064f18faf3fSek if (err) { 2065f18faf3fSek /* 2066f18faf3fSek * Since we couldn't reopen zfsvfs::z_os, force 2067f18faf3fSek * unmount this file system. 2068f18faf3fSek */ 2069f18faf3fSek if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) 2070f18faf3fSek (void) dounmount(zfsvfs->z_vfs, MS_FORCE, CRED()); 2071f18faf3fSek } 2072f18faf3fSek return (err); 2073f18faf3fSek } 2074f18faf3fSek 2075fa9e4066Sahrens static void 2076fa9e4066Sahrens zfs_freevfs(vfs_t *vfsp) 2077fa9e4066Sahrens { 2078fa9e4066Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 2079142ae85dSChris Kirby 2080142ae85dSChris Kirby /* 2081142ae85dSChris Kirby * If this is a snapshot, we have an extra VFS_HOLD on our parent 2082f80ce222SChris Kirby * from zfs_mount(). Release it here. If we came through 2083f80ce222SChris Kirby * zfs_mountroot() instead, we didn't grab an extra hold, so 2084f80ce222SChris Kirby * skip the VFS_RELE for rootvfs. 2085142ae85dSChris Kirby */ 2086f80ce222SChris Kirby if (zfsvfs->z_issnap && (vfsp != rootvfs)) 2087142ae85dSChris Kirby VFS_RELE(zfsvfs->z_parent->z_vfs); 2088142ae85dSChris Kirby 208914843421SMatthew Ahrens zfsvfs_free(zfsvfs); 2090fa9e4066Sahrens 2091fa9e4066Sahrens atomic_add_32(&zfs_active_fs_count, -1); 2092fa9e4066Sahrens } 2093fa9e4066Sahrens 2094fa9e4066Sahrens /* 2095fa9e4066Sahrens * VFS_INIT() initialization. Note that there is no VFS_FINI(), 2096fa9e4066Sahrens * so we can't safely do any non-idempotent initialization here. 2097fa9e4066Sahrens * Leave that to zfs_init() and zfs_fini(), which are called 2098fa9e4066Sahrens * from the module's _init() and _fini() entry points. 2099fa9e4066Sahrens */ 2100fa9e4066Sahrens /*ARGSUSED*/ 2101fa9e4066Sahrens static int 2102fa9e4066Sahrens zfs_vfsinit(int fstype, char *name) 2103fa9e4066Sahrens { 2104fa9e4066Sahrens int error; 2105fa9e4066Sahrens 2106fa9e4066Sahrens zfsfstype = fstype; 2107fa9e4066Sahrens 2108fa9e4066Sahrens /* 2109fa9e4066Sahrens * Setup vfsops and vnodeops tables. 2110fa9e4066Sahrens */ 2111fa9e4066Sahrens error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); 2112fa9e4066Sahrens if (error != 0) { 2113fa9e4066Sahrens cmn_err(CE_WARN, "zfs: bad vfs ops template"); 2114fa9e4066Sahrens } 2115fa9e4066Sahrens 2116fa9e4066Sahrens error = zfs_create_op_tables(); 2117fa9e4066Sahrens if (error) { 2118fa9e4066Sahrens zfs_remove_op_tables(); 2119fa9e4066Sahrens cmn_err(CE_WARN, "zfs: bad vnode ops template"); 2120fa9e4066Sahrens (void) vfs_freevfsops_by_type(zfsfstype); 2121fa9e4066Sahrens return (error); 2122fa9e4066Sahrens } 2123fa9e4066Sahrens 2124fa9e4066Sahrens mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); 2125fa9e4066Sahrens 2126fa9e4066Sahrens /* 2127a0965f35Sbonwick * Unique major number for all zfs mounts. 2128a0965f35Sbonwick * If we run out of 32-bit minors, we'll getudev() another major. 2129fa9e4066Sahrens */ 2130a0965f35Sbonwick zfs_major = ddi_name_to_major(ZFS_DRIVER); 2131a0965f35Sbonwick zfs_minor = ZFS_MIN_MINOR; 2132fa9e4066Sahrens 2133fa9e4066Sahrens return (0); 2134fa9e4066Sahrens } 2135fa9e4066Sahrens 2136fa9e4066Sahrens void 2137fa9e4066Sahrens zfs_init(void) 2138fa9e4066Sahrens { 2139fa9e4066Sahrens /* 2140fa9e4066Sahrens * Initialize .zfs directory structures 2141fa9e4066Sahrens */ 2142fa9e4066Sahrens zfsctl_init(); 2143fa9e4066Sahrens 2144fa9e4066Sahrens /* 2145fa9e4066Sahrens * Initialize znode cache, vnode ops, etc... 2146fa9e4066Sahrens */ 2147fa9e4066Sahrens zfs_znode_init(); 214814843421SMatthew Ahrens 214914843421SMatthew Ahrens dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); 2150fa9e4066Sahrens } 2151fa9e4066Sahrens 2152fa9e4066Sahrens void 2153fa9e4066Sahrens zfs_fini(void) 2154fa9e4066Sahrens { 2155fa9e4066Sahrens zfsctl_fini(); 2156fa9e4066Sahrens zfs_znode_fini(); 2157fa9e4066Sahrens } 2158fa9e4066Sahrens 2159fa9e4066Sahrens int 2160fa9e4066Sahrens zfs_busy(void) 2161fa9e4066Sahrens { 2162fa9e4066Sahrens return (zfs_active_fs_count != 0); 2163fa9e4066Sahrens } 2164fa9e4066Sahrens 2165e7437265Sahrens int 216614843421SMatthew Ahrens zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 2167e7437265Sahrens { 2168e7437265Sahrens int error; 216914843421SMatthew Ahrens objset_t *os = zfsvfs->z_os; 2170e7437265Sahrens dmu_tx_t *tx; 2171e7437265Sahrens 2172e7437265Sahrens if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 2173e7437265Sahrens return (EINVAL); 2174e7437265Sahrens 217514843421SMatthew Ahrens if (newvers < zfsvfs->z_version) 217614843421SMatthew Ahrens return (EINVAL); 2177e7437265Sahrens 21780a586ceaSMark Shellenbaum if (zfs_spa_version_map(newvers) > 21790a586ceaSMark Shellenbaum spa_version(dmu_objset_spa(zfsvfs->z_os))) 21800a586ceaSMark Shellenbaum return (ENOTSUP); 21810a586ceaSMark Shellenbaum 2182e7437265Sahrens tx = dmu_tx_create(os); 218314843421SMatthew Ahrens dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 21840a586ceaSMark Shellenbaum if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 21850a586ceaSMark Shellenbaum dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 21860a586ceaSMark Shellenbaum ZFS_SA_ATTRS); 21870a586ceaSMark Shellenbaum dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 21880a586ceaSMark Shellenbaum } 2189e7437265Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 2190e7437265Sahrens if (error) { 2191e7437265Sahrens dmu_tx_abort(tx); 219214843421SMatthew Ahrens return (error); 219314843421SMatthew Ahrens } 21940a586ceaSMark Shellenbaum 219514843421SMatthew Ahrens error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 219614843421SMatthew Ahrens 8, 1, &newvers, tx); 219714843421SMatthew Ahrens 219814843421SMatthew Ahrens if (error) { 219914843421SMatthew Ahrens dmu_tx_commit(tx); 220014843421SMatthew Ahrens return (error); 2201e7437265Sahrens } 2202e7437265Sahrens 22030a586ceaSMark Shellenbaum if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 22040a586ceaSMark Shellenbaum uint64_t sa_obj; 22050a586ceaSMark Shellenbaum 22060a586ceaSMark Shellenbaum ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, 22070a586ceaSMark Shellenbaum SPA_VERSION_SA); 22080a586ceaSMark Shellenbaum sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 22090a586ceaSMark Shellenbaum DMU_OT_NONE, 0, tx); 22100a586ceaSMark Shellenbaum 22110a586ceaSMark Shellenbaum error = zap_add(os, MASTER_NODE_OBJ, 22120a586ceaSMark Shellenbaum ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 22130a586ceaSMark Shellenbaum ASSERT3U(error, ==, 0); 22140a586ceaSMark Shellenbaum 22150a586ceaSMark Shellenbaum VERIFY(0 == sa_set_sa_object(os, sa_obj)); 22160a586ceaSMark Shellenbaum sa_register_update_callback(os, zfs_sa_upgrade); 22170a586ceaSMark Shellenbaum } 22180a586ceaSMark Shellenbaum 22193f9d6ad7SLin Ling spa_history_log_internal(LOG_DS_UPGRADE, 22203f9d6ad7SLin Ling dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu", 222114843421SMatthew Ahrens zfsvfs->z_version, newvers, dmu_objset_id(os)); 222214843421SMatthew Ahrens 2223e7437265Sahrens dmu_tx_commit(tx); 2224e7437265Sahrens 222514843421SMatthew Ahrens zfsvfs->z_version = newvers; 222614843421SMatthew Ahrens 222714843421SMatthew Ahrens if (zfsvfs->z_version >= ZPL_VERSION_FUID) 222814843421SMatthew Ahrens zfs_set_fuid_feature(zfsvfs); 222914843421SMatthew Ahrens 223014843421SMatthew Ahrens return (0); 2231e7437265Sahrens } 2232e7437265Sahrens 2233de8267e0Stimh /* 2234de8267e0Stimh * Read a property stored within the master node. 2235de8267e0Stimh */ 2236de8267e0Stimh int 2237de8267e0Stimh zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 2238de8267e0Stimh { 2239de8267e0Stimh const char *pname; 22400a48a24eStimh int error = ENOENT; 2241de8267e0Stimh 2242de8267e0Stimh /* 2243de8267e0Stimh * Look up the file system's value for the property. For the 2244de8267e0Stimh * version property, we look up a slightly different string. 2245de8267e0Stimh */ 2246de8267e0Stimh if (prop == ZFS_PROP_VERSION) 2247de8267e0Stimh pname = ZPL_VERSION_STR; 2248de8267e0Stimh else 2249de8267e0Stimh pname = zfs_prop_to_name(prop); 2250de8267e0Stimh 22510a48a24eStimh if (os != NULL) 22520a48a24eStimh error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 2253de8267e0Stimh 2254aa60ed0eSmaybee if (error == ENOENT) { 2255de8267e0Stimh /* No value set, use the default value */ 2256de8267e0Stimh switch (prop) { 2257aa60ed0eSmaybee case ZFS_PROP_VERSION: 2258aa60ed0eSmaybee *value = ZPL_VERSION; 2259aa60ed0eSmaybee break; 2260de8267e0Stimh case ZFS_PROP_NORMALIZE: 2261de8267e0Stimh case ZFS_PROP_UTF8ONLY: 2262de8267e0Stimh *value = 0; 2263de8267e0Stimh break; 2264de8267e0Stimh case ZFS_PROP_CASE: 2265de8267e0Stimh *value = ZFS_CASE_SENSITIVE; 2266de8267e0Stimh break; 2267de8267e0Stimh default: 2268aa60ed0eSmaybee return (error); 2269de8267e0Stimh } 2270aa60ed0eSmaybee error = 0; 2271de8267e0Stimh } 2272aa60ed0eSmaybee return (error); 2273de8267e0Stimh } 2274de8267e0Stimh 2275fa9e4066Sahrens static vfsdef_t vfw = { 2276fa9e4066Sahrens VFSDEF_VERSION, 2277fa9e4066Sahrens MNTTYPE_ZFS, 2278fa9e4066Sahrens zfs_vfsinit, 2279da6c28aaSamw VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS| 22800fbb751dSJohn Levon VSW_XID|VSW_ZMOUNT, 2281fa9e4066Sahrens &zfs_mntopts 2282fa9e4066Sahrens }; 2283fa9e4066Sahrens 2284fa9e4066Sahrens struct modlfs zfs_modlfs = { 2285e7437265Sahrens &mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw 2286fa9e4066Sahrens }; 2287