1b9238976Sth /* 2b9238976Sth * CDDL HEADER START 3b9238976Sth * 4b9238976Sth * The contents of this file are subject to the terms of the 5b9238976Sth * Common Development and Distribution License (the "License"). 6b9238976Sth * You may not use this file except in compliance with the License. 7b9238976Sth * 8b9238976Sth * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9b9238976Sth * or http://www.opensolaris.org/os/licensing. 10b9238976Sth * See the License for the specific language governing permissions 11b9238976Sth * and limitations under the License. 12b9238976Sth * 13b9238976Sth * When distributing Covered Code, include this CDDL HEADER in each 14b9238976Sth * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15b9238976Sth * If applicable, add the following below this CDDL HEADER, with the 16b9238976Sth * fields enclosed by brackets "[]" replaced with your own identifying 17b9238976Sth * information: Portions Copyright [yyyy] [name of copyright owner] 18b9238976Sth * 19b9238976Sth * CDDL HEADER END 20b9238976Sth */ 21b9238976Sth 22b9238976Sth /* 23546a3997SThomas Haynes * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24b9238976Sth * Use is subject to license terms. 25b9238976Sth */ 26b9238976Sth 27b9238976Sth /* 28b9238976Sth * Support for ephemeral mounts, e.g. mirror-mounts. These mounts are 29b9238976Sth * triggered from a "stub" rnode via a special set of vnodeops. 30b9238976Sth */ 31b9238976Sth 32b9238976Sth #include <sys/param.h> 33b9238976Sth #include <sys/types.h> 34b9238976Sth #include <sys/systm.h> 35b9238976Sth #include <sys/cred.h> 36b9238976Sth #include <sys/time.h> 37b9238976Sth #include <sys/vnode.h> 38b9238976Sth #include <sys/vfs.h> 39b9238976Sth #include <sys/vfs_opreg.h> 40b9238976Sth #include <sys/file.h> 41b9238976Sth #include <sys/filio.h> 42b9238976Sth #include <sys/uio.h> 43b9238976Sth #include <sys/buf.h> 44b9238976Sth #include <sys/mman.h> 45b9238976Sth #include <sys/pathname.h> 46b9238976Sth #include <sys/dirent.h> 47b9238976Sth #include <sys/debug.h> 48b9238976Sth #include <sys/vmsystm.h> 49b9238976Sth #include <sys/fcntl.h> 50b9238976Sth #include <sys/flock.h> 51b9238976Sth #include <sys/swap.h> 52b9238976Sth #include <sys/errno.h> 53b9238976Sth #include <sys/strsubr.h> 54b9238976Sth #include <sys/sysmacros.h> 55b9238976Sth #include <sys/kmem.h> 56b9238976Sth #include <sys/mount.h> 57b9238976Sth #include <sys/cmn_err.h> 58b9238976Sth #include <sys/pathconf.h> 59b9238976Sth #include <sys/utsname.h> 60b9238976Sth #include <sys/dnlc.h> 61b9238976Sth #include <sys/acl.h> 62b9238976Sth #include <sys/systeminfo.h> 63b9238976Sth #include <sys/policy.h> 64b9238976Sth #include <sys/sdt.h> 65b9238976Sth #include <sys/list.h> 66b9238976Sth #include <sys/stat.h> 67b9238976Sth #include <sys/mntent.h> 68b9238976Sth 69b9238976Sth #include <rpc/types.h> 70b9238976Sth #include <rpc/auth.h> 71b9238976Sth #include <rpc/clnt.h> 72b9238976Sth 73b9238976Sth #include <nfs/nfs.h> 74b9238976Sth #include <nfs/nfs_clnt.h> 75b9238976Sth #include <nfs/nfs_acl.h> 76b9238976Sth #include <nfs/lm.h> 77b9238976Sth #include <nfs/nfs4.h> 78b9238976Sth #include <nfs/nfs4_kprot.h> 79b9238976Sth #include <nfs/rnode4.h> 80b9238976Sth #include <nfs/nfs4_clnt.h> 81b9238976Sth 82b9238976Sth #include <vm/hat.h> 83b9238976Sth #include <vm/as.h> 84b9238976Sth #include <vm/page.h> 85b9238976Sth #include <vm/pvn.h> 86b9238976Sth #include <vm/seg.h> 87b9238976Sth #include <vm/seg_map.h> 88b9238976Sth #include <vm/seg_kpm.h> 89b9238976Sth #include <vm/seg_vn.h> 90b9238976Sth 91b9238976Sth #include <fs/fs_subr.h> 92b9238976Sth 93b9238976Sth #include <sys/ddi.h> 94b9238976Sth #include <sys/int_fmtio.h> 95b9238976Sth 96f39b8789Sth #include <sys/sunddi.h> 97b9238976Sth 98546a3997SThomas Haynes #include <sys/priv_names.h> 99546a3997SThomas Haynes 100b9238976Sth /* 101b9238976Sth * The automatic unmounter thread stuff! 102b9238976Sth */ 103b9238976Sth static int nfs4_trigger_thread_timer = 20; /* in seconds */ 104b9238976Sth 105b9238976Sth /* 106b9238976Sth * Just a default.... 107b9238976Sth */ 108b9238976Sth static uint_t nfs4_trigger_mount_to = 240; 109b9238976Sth 110b9238976Sth typedef struct nfs4_trigger_globals { 111b9238976Sth kmutex_t ntg_forest_lock; 112b9238976Sth uint_t ntg_mount_to; 113b9238976Sth int ntg_thread_started; 114b9238976Sth nfs4_ephemeral_tree_t *ntg_forest; 115b9238976Sth } nfs4_trigger_globals_t; 116b9238976Sth 117b9238976Sth kmutex_t nfs4_ephemeral_thread_lock; 118b9238976Sth 119b9238976Sth zone_key_t nfs4_ephemeral_key = ZONE_KEY_UNINITIALIZED; 120b9238976Sth 121b9238976Sth static void nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *); 122b9238976Sth 123b9238976Sth /* 124b9238976Sth * Used for ephemeral mounts; contains data either duplicated from 125b9238976Sth * servinfo4_t, or hand-crafted, depending on type of ephemeral mount. 126b9238976Sth * 127b9238976Sth * It's intended that this structure is used solely for ephemeral 128b9238976Sth * mount-type specific data, for passing this data to 129b9238976Sth * nfs4_trigger_nargs_create(). 130b9238976Sth */ 131b9238976Sth typedef struct ephemeral_servinfo { 132b9238976Sth char *esi_hostname; 133b9238976Sth char *esi_netname; 134b9238976Sth char *esi_path; 135b9238976Sth int esi_path_len; 136b9238976Sth int esi_mount_flags; 137b9238976Sth struct netbuf *esi_addr; 138b9238976Sth struct netbuf *esi_syncaddr; 139b9238976Sth struct knetconfig *esi_knconf; 140b9238976Sth } ephemeral_servinfo_t; 141b9238976Sth 142b9238976Sth /* 143b9238976Sth * Collect together the mount-type specific and generic data args. 144b9238976Sth */ 145b9238976Sth typedef struct domount_args { 146b9238976Sth ephemeral_servinfo_t *dma_esi; 147b9238976Sth char *dma_hostlist; /* comma-sep. for RO failover */ 148b9238976Sth struct nfs_args *dma_nargs; 149b9238976Sth } domount_args_t; 150b9238976Sth 151b9238976Sth 152b9238976Sth /* 153b9238976Sth * The vnode ops functions for a trigger stub vnode 154b9238976Sth */ 155da6c28aaSamw static int nfs4_trigger_open(vnode_t **, int, cred_t *, caller_context_t *); 156da6c28aaSamw static int nfs4_trigger_getattr(vnode_t *, struct vattr *, int, cred_t *, 157da6c28aaSamw caller_context_t *); 158da6c28aaSamw static int nfs4_trigger_setattr(vnode_t *, struct vattr *, int, cred_t *, 159da6c28aaSamw caller_context_t *); 160da6c28aaSamw static int nfs4_trigger_access(vnode_t *, int, int, cred_t *, 161da6c28aaSamw caller_context_t *); 162da6c28aaSamw static int nfs4_trigger_readlink(vnode_t *, struct uio *, cred_t *, 163da6c28aaSamw caller_context_t *); 164da6c28aaSamw static int nfs4_trigger_lookup(vnode_t *, char *, vnode_t **, 165da6c28aaSamw struct pathname *, int, vnode_t *, cred_t *, caller_context_t *, 166da6c28aaSamw int *, pathname_t *); 167da6c28aaSamw static int nfs4_trigger_create(vnode_t *, char *, struct vattr *, 168da6c28aaSamw enum vcexcl, int, vnode_t **, cred_t *, int, caller_context_t *, 169da6c28aaSamw vsecattr_t *); 170da6c28aaSamw static int nfs4_trigger_remove(vnode_t *, char *, cred_t *, caller_context_t *, 171da6c28aaSamw int); 172da6c28aaSamw static int nfs4_trigger_link(vnode_t *, vnode_t *, char *, cred_t *, 173da6c28aaSamw caller_context_t *, int); 174da6c28aaSamw static int nfs4_trigger_rename(vnode_t *, char *, vnode_t *, char *, 175da6c28aaSamw cred_t *, caller_context_t *, int); 176da6c28aaSamw static int nfs4_trigger_mkdir(vnode_t *, char *, struct vattr *, 177da6c28aaSamw vnode_t **, cred_t *, caller_context_t *, int, vsecattr_t *vsecp); 178da6c28aaSamw static int nfs4_trigger_rmdir(vnode_t *, char *, vnode_t *, cred_t *, 179da6c28aaSamw caller_context_t *, int); 180da6c28aaSamw static int nfs4_trigger_symlink(vnode_t *, char *, struct vattr *, char *, 181da6c28aaSamw cred_t *, caller_context_t *, int); 182da6c28aaSamw static int nfs4_trigger_cmp(vnode_t *, vnode_t *, caller_context_t *); 183b9238976Sth 184b9238976Sth /* 185b9238976Sth * Regular NFSv4 vnodeops that we need to reference directly 186b9238976Sth */ 187da6c28aaSamw extern int nfs4_getattr(vnode_t *, struct vattr *, int, cred_t *, 188da6c28aaSamw caller_context_t *); 189da6c28aaSamw extern void nfs4_inactive(vnode_t *, cred_t *, caller_context_t *); 190b9238976Sth extern int nfs4_rwlock(vnode_t *, int, caller_context_t *); 191b9238976Sth extern void nfs4_rwunlock(vnode_t *, int, caller_context_t *); 192b9238976Sth extern int nfs4_lookup(vnode_t *, char *, vnode_t **, 193da6c28aaSamw struct pathname *, int, vnode_t *, cred_t *, 194da6c28aaSamw caller_context_t *, int *, pathname_t *); 195da6c28aaSamw extern int nfs4_pathconf(vnode_t *, int, ulong_t *, cred_t *, 196da6c28aaSamw caller_context_t *); 197da6c28aaSamw extern int nfs4_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *, 198da6c28aaSamw caller_context_t *); 199da6c28aaSamw extern int nfs4_fid(vnode_t *, fid_t *, caller_context_t *); 200da6c28aaSamw extern int nfs4_realvp(vnode_t *, vnode_t **, caller_context_t *); 201b9238976Sth 202546a3997SThomas Haynes static int nfs4_trigger_mount(vnode_t *, cred_t *, vnode_t **); 203b9238976Sth static int nfs4_trigger_domount(vnode_t *, domount_args_t *, vfs_t **, 204*6962f5b8SThomas Haynes cred_t *, vnode_t **); 205b9238976Sth static domount_args_t *nfs4_trigger_domount_args_create(vnode_t *); 206b9238976Sth static void nfs4_trigger_domount_args_destroy(domount_args_t *dma, 207b9238976Sth vnode_t *vp); 208b9238976Sth static ephemeral_servinfo_t *nfs4_trigger_esi_create(vnode_t *, servinfo4_t *); 209b9238976Sth static void nfs4_trigger_esi_destroy(ephemeral_servinfo_t *, vnode_t *); 210b9238976Sth static ephemeral_servinfo_t *nfs4_trigger_esi_create_mirrormount(vnode_t *, 211b9238976Sth servinfo4_t *); 212b9238976Sth static struct nfs_args *nfs4_trigger_nargs_create(mntinfo4_t *, servinfo4_t *, 213b9238976Sth ephemeral_servinfo_t *); 214b9238976Sth static void nfs4_trigger_nargs_destroy(struct nfs_args *); 215b9238976Sth static char *nfs4_trigger_create_mntopts(vfs_t *); 216b9238976Sth static void nfs4_trigger_destroy_mntopts(char *); 217b9238976Sth static int nfs4_trigger_add_mntopt(char *, char *, vfs_t *); 218b9238976Sth static enum clnt_stat nfs4_trigger_ping_server(servinfo4_t *, int); 219b9238976Sth 220b9238976Sth extern int umount2_engine(vfs_t *, int, cred_t *, int); 221b9238976Sth 222b9238976Sth 223b9238976Sth vnodeops_t *nfs4_trigger_vnodeops; 224b9238976Sth 225b9238976Sth /* 226b9238976Sth * These are the vnodeops that we must define for stub vnodes. 227b9238976Sth * 228b9238976Sth * 229b9238976Sth * Many of the VOPs defined for NFSv4 do not need to be defined here, 230b9238976Sth * for various reasons. This will result in the VFS default function being 231b9238976Sth * used: 232b9238976Sth * 233b9238976Sth * - These VOPs require a previous VOP_OPEN to have occurred. That will have 234b9238976Sth * lost the reference to the stub vnode, meaning these should not be called: 235b9238976Sth * close, read, write, ioctl, readdir, seek. 236b9238976Sth * 237b9238976Sth * - These VOPs are meaningless for vnodes without data pages. Since the 238b9238976Sth * stub vnode is of type VDIR, these should not be called: 239b9238976Sth * space, getpage, putpage, map, addmap, delmap, pageio, fsync. 240b9238976Sth * 241b9238976Sth * - These VOPs are otherwise not applicable, and should not be called: 242b9238976Sth * dump, setsecattr. 243b9238976Sth * 244b9238976Sth * 245b9238976Sth * These VOPs we do not want to define, but nor do we want the VFS default 246b9238976Sth * action. Instead, we specify the VFS error function, with fs_error(), but 247b9238976Sth * note that fs_error() is not actually called. Instead it results in the 248b9238976Sth * use of the error function defined for the particular VOP, in vn_ops_table[]: 249b9238976Sth * 250b9238976Sth * - frlock, dispose, shrlock. 251b9238976Sth * 252b9238976Sth * 253b9238976Sth * These VOPs we define to use the corresponding regular NFSv4 vnodeop. 254b9238976Sth * NOTE: if any of these ops involve an OTW call with the stub FH, then 255b9238976Sth * that call must be wrapped with save_mnt_secinfo()/check_mnt_secinfo() 256b9238976Sth * to protect the security data in the servinfo4_t for the "parent" 257b9238976Sth * filesystem that contains the stub. 258b9238976Sth * 259b9238976Sth * - These VOPs should not trigger a mount, so that "ls -l" does not: 260b9238976Sth * pathconf, getsecattr. 261b9238976Sth * 262b9238976Sth * - These VOPs would not make sense to trigger: 263b9238976Sth * inactive, rwlock, rwunlock, fid, realvp. 264b9238976Sth */ 265b9238976Sth const fs_operation_def_t nfs4_trigger_vnodeops_template[] = { 266b9238976Sth VOPNAME_OPEN, { .vop_open = nfs4_trigger_open }, 267b9238976Sth VOPNAME_GETATTR, { .vop_getattr = nfs4_trigger_getattr }, 268b9238976Sth VOPNAME_SETATTR, { .vop_setattr = nfs4_trigger_setattr }, 269b9238976Sth VOPNAME_ACCESS, { .vop_access = nfs4_trigger_access }, 270b9238976Sth VOPNAME_LOOKUP, { .vop_lookup = nfs4_trigger_lookup }, 271b9238976Sth VOPNAME_CREATE, { .vop_create = nfs4_trigger_create }, 272b9238976Sth VOPNAME_REMOVE, { .vop_remove = nfs4_trigger_remove }, 273b9238976Sth VOPNAME_LINK, { .vop_link = nfs4_trigger_link }, 274b9238976Sth VOPNAME_RENAME, { .vop_rename = nfs4_trigger_rename }, 275b9238976Sth VOPNAME_MKDIR, { .vop_mkdir = nfs4_trigger_mkdir }, 276b9238976Sth VOPNAME_RMDIR, { .vop_rmdir = nfs4_trigger_rmdir }, 277b9238976Sth VOPNAME_SYMLINK, { .vop_symlink = nfs4_trigger_symlink }, 278b9238976Sth VOPNAME_READLINK, { .vop_readlink = nfs4_trigger_readlink }, 279b9238976Sth VOPNAME_INACTIVE, { .vop_inactive = nfs4_inactive }, 280b9238976Sth VOPNAME_FID, { .vop_fid = nfs4_fid }, 281b9238976Sth VOPNAME_RWLOCK, { .vop_rwlock = nfs4_rwlock }, 282b9238976Sth VOPNAME_RWUNLOCK, { .vop_rwunlock = nfs4_rwunlock }, 283b9238976Sth VOPNAME_REALVP, { .vop_realvp = nfs4_realvp }, 284b9238976Sth VOPNAME_GETSECATTR, { .vop_getsecattr = nfs4_getsecattr }, 285b9238976Sth VOPNAME_PATHCONF, { .vop_pathconf = nfs4_pathconf }, 286b9238976Sth VOPNAME_FRLOCK, { .error = fs_error }, 287b9238976Sth VOPNAME_DISPOSE, { .error = fs_error }, 288b9238976Sth VOPNAME_SHRLOCK, { .error = fs_error }, 289b9238976Sth VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 290b9238976Sth NULL, NULL 291b9238976Sth }; 292b9238976Sth 293d3a14591SThomas Haynes static void 294d708af74SThomas Haynes nfs4_ephemeral_tree_incr(nfs4_ephemeral_tree_t *net) 295d3a14591SThomas Haynes { 296d708af74SThomas Haynes ASSERT(mutex_owned(&net->net_cnt_lock)); 297d3a14591SThomas Haynes net->net_refcnt++; 298d3a14591SThomas Haynes ASSERT(net->net_refcnt != 0); 299d708af74SThomas Haynes } 300d708af74SThomas Haynes 301d708af74SThomas Haynes static void 302d708af74SThomas Haynes nfs4_ephemeral_tree_hold(nfs4_ephemeral_tree_t *net) 303d708af74SThomas Haynes { 304d708af74SThomas Haynes mutex_enter(&net->net_cnt_lock); 305d708af74SThomas Haynes nfs4_ephemeral_tree_incr(net); 306d3a14591SThomas Haynes mutex_exit(&net->net_cnt_lock); 307d3a14591SThomas Haynes } 308d3a14591SThomas Haynes 309d3a14591SThomas Haynes /* 310d3a14591SThomas Haynes * We need a safe way to decrement the refcnt whilst the 311d3a14591SThomas Haynes * lock is being held. 312d3a14591SThomas Haynes */ 313d3a14591SThomas Haynes static void 314d3a14591SThomas Haynes nfs4_ephemeral_tree_decr(nfs4_ephemeral_tree_t *net) 315d3a14591SThomas Haynes { 316d3a14591SThomas Haynes ASSERT(mutex_owned(&net->net_cnt_lock)); 317d3a14591SThomas Haynes ASSERT(net->net_refcnt != 0); 318d3a14591SThomas Haynes net->net_refcnt--; 319d3a14591SThomas Haynes } 320d3a14591SThomas Haynes 321d3a14591SThomas Haynes static void 322d3a14591SThomas Haynes nfs4_ephemeral_tree_rele(nfs4_ephemeral_tree_t *net) 323d3a14591SThomas Haynes { 324d3a14591SThomas Haynes mutex_enter(&net->net_cnt_lock); 325d3a14591SThomas Haynes nfs4_ephemeral_tree_decr(net); 326d3a14591SThomas Haynes mutex_exit(&net->net_cnt_lock); 327d3a14591SThomas Haynes } 328d3a14591SThomas Haynes 329b9238976Sth /* 330b9238976Sth * Trigger ops for stub vnodes; for mirror mounts, etc. 331b9238976Sth * 332b9238976Sth * The general idea is that a "triggering" op will first call 333b9238976Sth * nfs4_trigger_mount(), which will find out whether a mount has already 334b9238976Sth * been triggered. 335b9238976Sth * 336b9238976Sth * If it has, then nfs4_trigger_mount() sets newvp to the root vnode 337b9238976Sth * of the covering vfs. 338b9238976Sth * 339b9238976Sth * If a mount has not yet been triggered, nfs4_trigger_mount() will do so, 340b9238976Sth * and again set newvp, as above. 341b9238976Sth * 342b9238976Sth * The triggering op may then re-issue the VOP by calling it on newvp. 343b9238976Sth * 344b9238976Sth * Note that some ops may perform custom action, and may or may not need 345b9238976Sth * to trigger a mount. 346b9238976Sth * 347b9238976Sth * Some ops need to call the regular NFSv4 vnodeop for a stub vnode. We 348b9238976Sth * obviously can't do this with VOP_<whatever>, since it's a stub vnode 349b9238976Sth * and that would just recurse. Instead, we call the v4 op directly, 350b9238976Sth * by name. This is OK, since we know that the vnode is for NFSv4, 351b9238976Sth * otherwise it couldn't be a stub. 352b9238976Sth * 353b9238976Sth */ 354b9238976Sth 355b9238976Sth static int 356da6c28aaSamw nfs4_trigger_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 357b9238976Sth { 358b9238976Sth int error; 359b9238976Sth vnode_t *newvp; 360b9238976Sth 361546a3997SThomas Haynes error = nfs4_trigger_mount(*vpp, cr, &newvp); 362b9238976Sth if (error) 363b9238976Sth return (error); 364b9238976Sth 365b9238976Sth /* Release the stub vnode, as we're losing the reference to it */ 366b9238976Sth VN_RELE(*vpp); 367b9238976Sth 368b9238976Sth /* Give the caller the root vnode of the newly-mounted fs */ 369b9238976Sth *vpp = newvp; 370b9238976Sth 371b9238976Sth /* return with VN_HELD(newvp) */ 372da6c28aaSamw return (VOP_OPEN(vpp, flag, cr, ct)); 373b9238976Sth } 374b9238976Sth 375b9238976Sth /* 376b9238976Sth * For the majority of cases, nfs4_trigger_getattr() will not trigger 377b9238976Sth * a mount. However, if ATTR_TRIGGER is set, we are being informed 378b9238976Sth * that we need to force the mount before we attempt to determine 379b9238976Sth * the attributes. The intent is an atomic operation for security 380b9238976Sth * testing. 381b9238976Sth */ 382b9238976Sth static int 383da6c28aaSamw nfs4_trigger_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, 384da6c28aaSamw caller_context_t *ct) 385b9238976Sth { 386b9238976Sth int error; 387b9238976Sth 388b9238976Sth if (flags & ATTR_TRIGGER) { 389b9238976Sth vnode_t *newvp; 390b9238976Sth 391546a3997SThomas Haynes error = nfs4_trigger_mount(vp, cr, &newvp); 392b9238976Sth if (error) 393b9238976Sth return (error); 394b9238976Sth 395da6c28aaSamw error = VOP_GETATTR(newvp, vap, flags, cr, ct); 396b9238976Sth VN_RELE(newvp); 397b9238976Sth } else { 398da6c28aaSamw error = nfs4_getattr(vp, vap, flags, cr, ct); 399b9238976Sth } 400b9238976Sth 401b9238976Sth return (error); 402b9238976Sth } 403b9238976Sth 404b9238976Sth static int 405b9238976Sth nfs4_trigger_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, 406b9238976Sth caller_context_t *ct) 407b9238976Sth { 408b9238976Sth int error; 409b9238976Sth vnode_t *newvp; 410b9238976Sth 411546a3997SThomas Haynes error = nfs4_trigger_mount(vp, cr, &newvp); 412b9238976Sth if (error) 413b9238976Sth return (error); 414b9238976Sth 415b9238976Sth error = VOP_SETATTR(newvp, vap, flags, cr, ct); 416b9238976Sth VN_RELE(newvp); 417b9238976Sth 418b9238976Sth return (error); 419b9238976Sth } 420b9238976Sth 421b9238976Sth static int 422da6c28aaSamw nfs4_trigger_access(vnode_t *vp, int mode, int flags, cred_t *cr, 423da6c28aaSamw caller_context_t *ct) 424b9238976Sth { 425b9238976Sth int error; 426b9238976Sth vnode_t *newvp; 427b9238976Sth 428546a3997SThomas Haynes error = nfs4_trigger_mount(vp, cr, &newvp); 429b9238976Sth if (error) 430b9238976Sth return (error); 431b9238976Sth 432da6c28aaSamw error = VOP_ACCESS(newvp, mode, flags, cr, ct); 433b9238976Sth VN_RELE(newvp); 434b9238976Sth 435b9238976Sth return (error); 436b9238976Sth } 437b9238976Sth 438b9238976Sth static int 439da6c28aaSamw nfs4_trigger_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, 440da6c28aaSamw struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr, 441da6c28aaSamw caller_context_t *ct, int *deflags, pathname_t *rpnp) 442b9238976Sth { 443b9238976Sth int error; 444b9238976Sth vnode_t *newdvp; 445b9238976Sth rnode4_t *drp = VTOR4(dvp); 446b9238976Sth 447b9238976Sth ASSERT(RP_ISSTUB(drp)); 448b9238976Sth 449b9238976Sth /* for now, we only support mirror-mounts */ 450b9238976Sth ASSERT(RP_ISSTUB_MIRRORMOUNT(drp)); 451b9238976Sth 452b9238976Sth /* 453b9238976Sth * It's not legal to lookup ".." for an fs root, so we mustn't pass 454b9238976Sth * that up. Instead, pass onto the regular op, regardless of whether 455b9238976Sth * we've triggered a mount. 456b9238976Sth */ 457b9238976Sth if (strcmp(nm, "..") == 0) 458da6c28aaSamw return (nfs4_lookup(dvp, nm, vpp, pnp, flags, rdir, cr, 459da6c28aaSamw ct, deflags, rpnp)); 460b9238976Sth 461546a3997SThomas Haynes error = nfs4_trigger_mount(dvp, cr, &newdvp); 462b9238976Sth if (error) 463b9238976Sth return (error); 464b9238976Sth 465da6c28aaSamw error = VOP_LOOKUP(newdvp, nm, vpp, pnp, flags, rdir, cr, ct, 466da6c28aaSamw deflags, rpnp); 467b9238976Sth VN_RELE(newdvp); 468b9238976Sth 469b9238976Sth return (error); 470b9238976Sth } 471b9238976Sth 472b9238976Sth static int 473b9238976Sth nfs4_trigger_create(vnode_t *dvp, char *nm, struct vattr *va, 474da6c28aaSamw enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr, 475da6c28aaSamw int flags, caller_context_t *ct, vsecattr_t *vsecp) 476b9238976Sth { 477b9238976Sth int error; 478b9238976Sth vnode_t *newdvp; 479b9238976Sth 480546a3997SThomas Haynes error = nfs4_trigger_mount(dvp, cr, &newdvp); 481b9238976Sth if (error) 482b9238976Sth return (error); 483b9238976Sth 484da6c28aaSamw error = VOP_CREATE(newdvp, nm, va, exclusive, mode, vpp, cr, 485da6c28aaSamw flags, ct, vsecp); 486b9238976Sth VN_RELE(newdvp); 487b9238976Sth 488b9238976Sth return (error); 489b9238976Sth } 490b9238976Sth 491b9238976Sth static int 492da6c28aaSamw nfs4_trigger_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct, 493da6c28aaSamw int flags) 494b9238976Sth { 495b9238976Sth int error; 496b9238976Sth vnode_t *newdvp; 497b9238976Sth 498546a3997SThomas Haynes error = nfs4_trigger_mount(dvp, cr, &newdvp); 499b9238976Sth if (error) 500b9238976Sth return (error); 501b9238976Sth 502da6c28aaSamw error = VOP_REMOVE(newdvp, nm, cr, ct, flags); 503b9238976Sth VN_RELE(newdvp); 504b9238976Sth 505b9238976Sth return (error); 506b9238976Sth } 507b9238976Sth 508b9238976Sth static int 509da6c28aaSamw nfs4_trigger_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr, 510da6c28aaSamw caller_context_t *ct, int flags) 511b9238976Sth { 512b9238976Sth int error; 513b9238976Sth vnode_t *newtdvp; 514b9238976Sth 515546a3997SThomas Haynes error = nfs4_trigger_mount(tdvp, cr, &newtdvp); 516b9238976Sth if (error) 517b9238976Sth return (error); 518b9238976Sth 519b9238976Sth /* 520b9238976Sth * We don't check whether svp is a stub. Let the NFSv4 code 521b9238976Sth * detect that error, and return accordingly. 522b9238976Sth */ 523da6c28aaSamw error = VOP_LINK(newtdvp, svp, tnm, cr, ct, flags); 524b9238976Sth VN_RELE(newtdvp); 525b9238976Sth 526b9238976Sth return (error); 527b9238976Sth } 528b9238976Sth 529b9238976Sth static int 530b9238976Sth nfs4_trigger_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, 531da6c28aaSamw cred_t *cr, caller_context_t *ct, int flags) 532b9238976Sth { 533b9238976Sth int error; 534b9238976Sth vnode_t *newsdvp; 535b9238976Sth rnode4_t *tdrp = VTOR4(tdvp); 536b9238976Sth 537b9238976Sth /* 538b9238976Sth * We know that sdvp is a stub, otherwise we would not be here. 539b9238976Sth * 540b9238976Sth * If tdvp is also be a stub, there are two possibilities: it 541b9238976Sth * is either the same stub as sdvp [i.e. VN_CMP(sdvp, tdvp)] 542b9238976Sth * or it is a different stub [!VN_CMP(sdvp, tdvp)]. 543b9238976Sth * 544b9238976Sth * In the former case, just trigger sdvp, and treat tdvp as 545b9238976Sth * though it were not a stub. 546b9238976Sth * 547b9238976Sth * In the latter case, it might be a different stub for the 548b9238976Sth * same server fs as sdvp, or for a different server fs. 549b9238976Sth * Regardless, from the client perspective this would still 550b9238976Sth * be a cross-filesystem rename, and should not be allowed, 551b9238976Sth * so return EXDEV, without triggering either mount. 552b9238976Sth */ 553b9238976Sth if (RP_ISSTUB(tdrp) && !VN_CMP(sdvp, tdvp)) 554b9238976Sth return (EXDEV); 555b9238976Sth 556546a3997SThomas Haynes error = nfs4_trigger_mount(sdvp, cr, &newsdvp); 557b9238976Sth if (error) 558b9238976Sth return (error); 559b9238976Sth 560da6c28aaSamw error = VOP_RENAME(newsdvp, snm, tdvp, tnm, cr, ct, flags); 561b9238976Sth 562b9238976Sth VN_RELE(newsdvp); 563b9238976Sth 564b9238976Sth return (error); 565b9238976Sth } 566b9238976Sth 567da6c28aaSamw /* ARGSUSED */ 568b9238976Sth static int 569b9238976Sth nfs4_trigger_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp, 570da6c28aaSamw cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp) 571b9238976Sth { 572b9238976Sth int error; 573b9238976Sth vnode_t *newdvp; 574b9238976Sth 575546a3997SThomas Haynes error = nfs4_trigger_mount(dvp, cr, &newdvp); 576b9238976Sth if (error) 577b9238976Sth return (error); 578b9238976Sth 579da6c28aaSamw error = VOP_MKDIR(newdvp, nm, va, vpp, cr, ct, flags, vsecp); 580b9238976Sth VN_RELE(newdvp); 581b9238976Sth 582b9238976Sth return (error); 583b9238976Sth } 584b9238976Sth 585b9238976Sth static int 586da6c28aaSamw nfs4_trigger_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr, 587da6c28aaSamw caller_context_t *ct, int flags) 588b9238976Sth { 589b9238976Sth int error; 590b9238976Sth vnode_t *newdvp; 591b9238976Sth 592546a3997SThomas Haynes error = nfs4_trigger_mount(dvp, cr, &newdvp); 593b9238976Sth if (error) 594b9238976Sth return (error); 595b9238976Sth 596da6c28aaSamw error = VOP_RMDIR(newdvp, nm, cdir, cr, ct, flags); 597b9238976Sth VN_RELE(newdvp); 598b9238976Sth 599b9238976Sth return (error); 600b9238976Sth } 601b9238976Sth 602b9238976Sth static int 603b9238976Sth nfs4_trigger_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm, 604da6c28aaSamw cred_t *cr, caller_context_t *ct, int flags) 605b9238976Sth { 606b9238976Sth int error; 607b9238976Sth vnode_t *newdvp; 608b9238976Sth 609546a3997SThomas Haynes error = nfs4_trigger_mount(dvp, cr, &newdvp); 610b9238976Sth if (error) 611b9238976Sth return (error); 612b9238976Sth 613da6c28aaSamw error = VOP_SYMLINK(newdvp, lnm, tva, tnm, cr, ct, flags); 614b9238976Sth VN_RELE(newdvp); 615b9238976Sth 616b9238976Sth return (error); 617b9238976Sth } 618b9238976Sth 619b9238976Sth static int 620da6c28aaSamw nfs4_trigger_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, 621da6c28aaSamw caller_context_t *ct) 622b9238976Sth { 623b9238976Sth int error; 624b9238976Sth vnode_t *newvp; 625b9238976Sth 626546a3997SThomas Haynes error = nfs4_trigger_mount(vp, cr, &newvp); 627b9238976Sth if (error) 628b9238976Sth return (error); 629b9238976Sth 630da6c28aaSamw error = VOP_READLINK(newvp, uiop, cr, ct); 631b9238976Sth VN_RELE(newvp); 632b9238976Sth 633b9238976Sth return (error); 634b9238976Sth } 635b9238976Sth 636b9238976Sth /* end of trigger vnode ops */ 637b9238976Sth 638*6962f5b8SThomas Haynes /* 639*6962f5b8SThomas Haynes * See if the mount has already been done by another caller. 640*6962f5b8SThomas Haynes */ 641*6962f5b8SThomas Haynes static int 642*6962f5b8SThomas Haynes nfs4_trigger_mounted_already(vnode_t *vp, vnode_t **newvpp, 643*6962f5b8SThomas Haynes bool_t *was_mounted, vfs_t **vfsp) 644*6962f5b8SThomas Haynes { 645*6962f5b8SThomas Haynes int error; 646*6962f5b8SThomas Haynes mntinfo4_t *mi = VTOMI4(vp); 647*6962f5b8SThomas Haynes 648*6962f5b8SThomas Haynes *was_mounted = FALSE; 649*6962f5b8SThomas Haynes 650*6962f5b8SThomas Haynes error = vn_vfsrlock_wait(vp); 651*6962f5b8SThomas Haynes if (error) 652*6962f5b8SThomas Haynes return (error); 653*6962f5b8SThomas Haynes 654*6962f5b8SThomas Haynes *vfsp = vn_mountedvfs(vp); 655*6962f5b8SThomas Haynes if (*vfsp != NULL) { 656*6962f5b8SThomas Haynes /* the mount has already occurred */ 657*6962f5b8SThomas Haynes error = VFS_ROOT(*vfsp, newvpp); 658*6962f5b8SThomas Haynes if (!error) { 659*6962f5b8SThomas Haynes /* need to update the reference time */ 660*6962f5b8SThomas Haynes mutex_enter(&mi->mi_lock); 661*6962f5b8SThomas Haynes if (mi->mi_ephemeral) 662*6962f5b8SThomas Haynes mi->mi_ephemeral->ne_ref_time = 663*6962f5b8SThomas Haynes gethrestime_sec(); 664*6962f5b8SThomas Haynes mutex_exit(&mi->mi_lock); 665*6962f5b8SThomas Haynes 666*6962f5b8SThomas Haynes *was_mounted = TRUE; 667*6962f5b8SThomas Haynes } 668*6962f5b8SThomas Haynes } 669*6962f5b8SThomas Haynes 670*6962f5b8SThomas Haynes vn_vfsunlock(vp); 671*6962f5b8SThomas Haynes return (0); 672*6962f5b8SThomas Haynes } 673*6962f5b8SThomas Haynes 674b9238976Sth /* 675b9238976Sth * Mount upon a trigger vnode; for mirror-mounts, etc. 676b9238976Sth * 677b9238976Sth * The mount may have already occurred, via another thread. If not, 678b9238976Sth * assemble the location information - which may require fetching - and 679b9238976Sth * perform the mount. 680b9238976Sth * 681b9238976Sth * Sets newvp to be the root of the fs that is now covering vp. Note 682b9238976Sth * that we return with VN_HELD(*newvp). 683b9238976Sth * 684b9238976Sth * The caller is responsible for passing the VOP onto the covering fs. 685b9238976Sth */ 686b9238976Sth static int 687546a3997SThomas Haynes nfs4_trigger_mount(vnode_t *vp, cred_t *cr, vnode_t **newvpp) 688b9238976Sth { 689b9238976Sth int error; 690b9238976Sth vfs_t *vfsp; 691b9238976Sth rnode4_t *rp = VTOR4(vp); 692b9238976Sth mntinfo4_t *mi = VTOMI4(vp); 693b9238976Sth domount_args_t *dma; 694b9238976Sth 695b9238976Sth nfs4_ephemeral_tree_t *net; 696b9238976Sth 697b9238976Sth bool_t must_unlock = FALSE; 698b9238976Sth bool_t is_building = FALSE; 699*6962f5b8SThomas Haynes bool_t was_mounted = FALSE; 700b9238976Sth 701546a3997SThomas Haynes cred_t *mcred = NULL; 702b9238976Sth 703b9238976Sth nfs4_trigger_globals_t *ntg; 704b9238976Sth 705b9238976Sth zone_t *zone = curproc->p_zone; 706b9238976Sth 707b9238976Sth ASSERT(RP_ISSTUB(rp)); 708b9238976Sth 709b9238976Sth /* for now, we only support mirror-mounts */ 710b9238976Sth ASSERT(RP_ISSTUB_MIRRORMOUNT(rp)); 711b9238976Sth 712b9238976Sth *newvpp = NULL; 713b9238976Sth 714b9238976Sth /* 715b9238976Sth * Has the mount already occurred? 716b9238976Sth */ 717*6962f5b8SThomas Haynes error = nfs4_trigger_mounted_already(vp, newvpp, 718*6962f5b8SThomas Haynes &was_mounted, &vfsp); 719*6962f5b8SThomas Haynes if (error || was_mounted) 720b9238976Sth goto done; 721b9238976Sth 722b9238976Sth ntg = zone_getspecific(nfs4_ephemeral_key, zone); 723b9238976Sth ASSERT(ntg != NULL); 724b9238976Sth 725b9238976Sth mutex_enter(&mi->mi_lock); 726b9238976Sth 727b9238976Sth /* 728b9238976Sth * We need to lock down the ephemeral tree. 729b9238976Sth */ 730b9238976Sth if (mi->mi_ephemeral_tree == NULL) { 731b9238976Sth net = kmem_zalloc(sizeof (*net), KM_SLEEP); 732b9238976Sth mutex_init(&net->net_tree_lock, NULL, MUTEX_DEFAULT, NULL); 733b9238976Sth mutex_init(&net->net_cnt_lock, NULL, MUTEX_DEFAULT, NULL); 734b9238976Sth net->net_refcnt = 1; 735b9238976Sth net->net_status = NFS4_EPHEMERAL_TREE_BUILDING; 736b9238976Sth is_building = TRUE; 737b9238976Sth 738b9238976Sth /* 739b9238976Sth * We need to add it to the zone specific list for 740b9238976Sth * automatic unmounting and harvesting of deadwood. 741b9238976Sth */ 742b9238976Sth mutex_enter(&ntg->ntg_forest_lock); 743b9238976Sth if (ntg->ntg_forest != NULL) 744b9238976Sth net->net_next = ntg->ntg_forest; 745b9238976Sth ntg->ntg_forest = net; 746b9238976Sth mutex_exit(&ntg->ntg_forest_lock); 747b9238976Sth 748b9238976Sth /* 749b9238976Sth * No lock order confusion with mi_lock because no 750b9238976Sth * other node could have grabbed net_tree_lock. 751b9238976Sth */ 752b9238976Sth mutex_enter(&net->net_tree_lock); 753b9238976Sth mi->mi_ephemeral_tree = net; 754b9238976Sth net->net_mount = mi; 755b9238976Sth mutex_exit(&mi->mi_lock); 756b9238976Sth } else { 757b9238976Sth net = mi->mi_ephemeral_tree; 758d3a14591SThomas Haynes nfs4_ephemeral_tree_hold(net); 759d3a14591SThomas Haynes 760d708af74SThomas Haynes mutex_exit(&mi->mi_lock); 761d708af74SThomas Haynes 762d3a14591SThomas Haynes mutex_enter(&net->net_tree_lock); 763b9238976Sth 764b9238976Sth /* 765d3a14591SThomas Haynes * We can only procede if the tree is neither locked 766d3a14591SThomas Haynes * nor being torn down. 767b9238976Sth */ 768d3a14591SThomas Haynes mutex_enter(&net->net_cnt_lock); 769d3a14591SThomas Haynes if (net->net_status & NFS4_EPHEMERAL_TREE_PROCESSING) { 770d3a14591SThomas Haynes nfs4_ephemeral_tree_decr(net); 771d3a14591SThomas Haynes mutex_exit(&net->net_cnt_lock); 772d3a14591SThomas Haynes mutex_exit(&net->net_tree_lock); 773d3a14591SThomas Haynes 774d3a14591SThomas Haynes return (EIO); 775d3a14591SThomas Haynes } 776d3a14591SThomas Haynes mutex_exit(&net->net_cnt_lock); 777b9238976Sth } 778b9238976Sth 779b9238976Sth mutex_enter(&net->net_cnt_lock); 780b9238976Sth net->net_status |= NFS4_EPHEMERAL_TREE_MOUNTING; 781b9238976Sth mutex_exit(&net->net_cnt_lock); 782b9238976Sth 783b9238976Sth must_unlock = TRUE; 784b9238976Sth 785b9238976Sth dma = nfs4_trigger_domount_args_create(vp); 786b9238976Sth if (dma == NULL) { 787b9238976Sth error = EINVAL; 788b9238976Sth goto done; 789b9238976Sth } 790b9238976Sth 791b9238976Sth /* 792b9238976Sth * Note that since we define mirror mounts to work 793546a3997SThomas Haynes * for any user, we simply extend the privileges of 794546a3997SThomas Haynes * the user's credentials to allow the mount to 795546a3997SThomas Haynes * proceed. 796b9238976Sth */ 797546a3997SThomas Haynes mcred = crdup(cr); 798546a3997SThomas Haynes if (mcred == NULL) { 799546a3997SThomas Haynes error = EINVAL; 800546a3997SThomas Haynes goto done; 801546a3997SThomas Haynes } 802546a3997SThomas Haynes 803546a3997SThomas Haynes crset_zone_privall(mcred); 804b9238976Sth 805*6962f5b8SThomas Haynes error = nfs4_trigger_domount(vp, dma, &vfsp, mcred, newvpp); 806b9238976Sth nfs4_trigger_domount_args_destroy(dma, vp); 807b9238976Sth 808546a3997SThomas Haynes crfree(mcred); 809b9238976Sth 810b9238976Sth done: 811*6962f5b8SThomas Haynes 812b9238976Sth if (must_unlock) { 813b9238976Sth mutex_enter(&net->net_cnt_lock); 814b9238976Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_MOUNTING; 815b9238976Sth if (is_building) 816b9238976Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_BUILDING; 817d3a14591SThomas Haynes nfs4_ephemeral_tree_decr(net); 818b9238976Sth mutex_exit(&net->net_cnt_lock); 819b9238976Sth 820b9238976Sth mutex_exit(&net->net_tree_lock); 821b9238976Sth } 822b9238976Sth 823b9238976Sth if (!error && (newvpp == NULL || *newvpp == NULL)) 824b9238976Sth error = ENOSYS; 825b9238976Sth 826b9238976Sth return (error); 827b9238976Sth } 828b9238976Sth 829b9238976Sth /* 830b9238976Sth * Collect together both the generic & mount-type specific args. 831b9238976Sth */ 832b9238976Sth static domount_args_t * 833b9238976Sth nfs4_trigger_domount_args_create(vnode_t *vp) 834b9238976Sth { 835b9238976Sth int nointr; 836b9238976Sth char *hostlist; 837b9238976Sth servinfo4_t *svp; 838b9238976Sth struct nfs_args *nargs, *nargs_head; 839b9238976Sth enum clnt_stat status; 840b9238976Sth ephemeral_servinfo_t *esi, *esi_first; 841b9238976Sth domount_args_t *dma; 842b9238976Sth mntinfo4_t *mi = VTOMI4(vp); 843b9238976Sth 844b9238976Sth nointr = !(mi->mi_flags & MI4_INT); 845b9238976Sth hostlist = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 846b9238976Sth 847b9238976Sth svp = mi->mi_curr_serv; 848b9238976Sth /* check if the current server is responding */ 849b9238976Sth status = nfs4_trigger_ping_server(svp, nointr); 850b9238976Sth if (status == RPC_SUCCESS) { 851b9238976Sth esi_first = nfs4_trigger_esi_create(vp, svp); 852b9238976Sth if (esi_first == NULL) { 853b9238976Sth kmem_free(hostlist, MAXPATHLEN); 854b9238976Sth return (NULL); 855b9238976Sth } 856b9238976Sth 857b9238976Sth (void) strlcpy(hostlist, esi_first->esi_hostname, MAXPATHLEN); 858b9238976Sth 859b9238976Sth nargs_head = nfs4_trigger_nargs_create(mi, svp, esi_first); 860b9238976Sth } else { 861b9238976Sth /* current server did not respond */ 862b9238976Sth esi_first = NULL; 863b9238976Sth nargs_head = NULL; 864b9238976Sth } 865b9238976Sth nargs = nargs_head; 866b9238976Sth 867b9238976Sth /* 868b9238976Sth * NFS RO failover. 869b9238976Sth * 870b9238976Sth * If we have multiple servinfo4 structures, linked via sv_next, 871b9238976Sth * we must create one nfs_args for each, linking the nfs_args via 872b9238976Sth * nfs_ext_u.nfs_extB.next. 873b9238976Sth * 874b9238976Sth * We need to build a corresponding esi for each, too, but that is 875b9238976Sth * used solely for building nfs_args, and may be immediately 876b9238976Sth * discarded, as domount() requires the info from just one esi, 877b9238976Sth * but all the nfs_args. 878b9238976Sth * 879b9238976Sth * Currently, the NFS mount code will hang if not all servers 880b9238976Sth * requested are available. To avoid that, we need to ping each 881b9238976Sth * server, here, and remove it from the list if it is not 882b9238976Sth * responding. This has the side-effect of that server then 883b9238976Sth * being permanently unavailable for this failover mount, even if 884b9238976Sth * it recovers. That's unfortunate, but the best we can do until 885b9238976Sth * the mount code path is fixed. 886b9238976Sth */ 887b9238976Sth 888b9238976Sth /* 889b9238976Sth * If the current server was down, loop indefinitely until we find 890b9238976Sth * at least one responsive server. 891b9238976Sth */ 892b9238976Sth do { 893b9238976Sth /* no locking needed for sv_next; it is only set at fs mount */ 894b9238976Sth for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 895b9238976Sth struct nfs_args *next; 896b9238976Sth 897b9238976Sth /* 898b9238976Sth * nargs_head: the head of the nfs_args list 899b9238976Sth * nargs: the current tail of the list 900b9238976Sth * next: the newly-created element to be added 901b9238976Sth */ 902b9238976Sth 903b9238976Sth /* 904b9238976Sth * We've already tried the current server, above; 905b9238976Sth * if it was responding, we have already included it 906b9238976Sth * and it may now be ignored. 907b9238976Sth * 908b9238976Sth * Otherwise, try it again, since it may now have 909b9238976Sth * recovered. 910b9238976Sth */ 911b9238976Sth if (svp == mi->mi_curr_serv && esi_first != NULL) 912b9238976Sth continue; 913b9238976Sth 914b9238976Sth (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 915b9238976Sth if (svp->sv_flags & SV4_NOTINUSE) { 916b9238976Sth nfs_rw_exit(&svp->sv_lock); 917b9238976Sth continue; 918b9238976Sth } 919b9238976Sth nfs_rw_exit(&svp->sv_lock); 920b9238976Sth 921b9238976Sth /* check if the server is responding */ 922b9238976Sth status = nfs4_trigger_ping_server(svp, nointr); 923b9238976Sth /* if the server did not respond, ignore it */ 924b9238976Sth if (status != RPC_SUCCESS) 925b9238976Sth continue; 926b9238976Sth 927b9238976Sth esi = nfs4_trigger_esi_create(vp, svp); 928b9238976Sth if (esi == NULL) 929b9238976Sth continue; 930b9238976Sth 931b9238976Sth /* 932b9238976Sth * If the original current server (mi_curr_serv) 933b9238976Sth * was down when when we first tried it, 934b9238976Sth * (i.e. esi_first == NULL), 935b9238976Sth * we select this new server (svp) to be the server 936b9238976Sth * that we will actually contact (esi_first). 937b9238976Sth * 938b9238976Sth * Note that it's possible that mi_curr_serv == svp, 939b9238976Sth * if that mi_curr_serv was down but has now recovered. 940b9238976Sth */ 941b9238976Sth next = nfs4_trigger_nargs_create(mi, svp, esi); 942b9238976Sth if (esi_first == NULL) { 943b9238976Sth ASSERT(nargs == NULL); 944b9238976Sth ASSERT(nargs_head == NULL); 945b9238976Sth nargs_head = next; 946b9238976Sth esi_first = esi; 947b9238976Sth (void) strlcpy(hostlist, 948b9238976Sth esi_first->esi_hostname, MAXPATHLEN); 949b9238976Sth } else { 950b9238976Sth ASSERT(nargs_head != NULL); 951b9238976Sth nargs->nfs_ext_u.nfs_extB.next = next; 952b9238976Sth (void) strlcat(hostlist, ",", MAXPATHLEN); 953b9238976Sth (void) strlcat(hostlist, esi->esi_hostname, 954b9238976Sth MAXPATHLEN); 955b9238976Sth /* esi was only needed for hostname & nargs */ 956b9238976Sth nfs4_trigger_esi_destroy(esi, vp); 957b9238976Sth } 958b9238976Sth 959b9238976Sth nargs = next; 960b9238976Sth } 961b9238976Sth 962b9238976Sth /* if we've had no response at all, wait a second */ 963b9238976Sth if (esi_first == NULL) 964b9238976Sth delay(drv_usectohz(1000000)); 965b9238976Sth 966b9238976Sth } while (esi_first == NULL); 967b9238976Sth ASSERT(nargs_head != NULL); 968b9238976Sth 969b9238976Sth dma = kmem_zalloc(sizeof (domount_args_t), KM_SLEEP); 970b9238976Sth dma->dma_esi = esi_first; 971b9238976Sth dma->dma_hostlist = hostlist; 972b9238976Sth dma->dma_nargs = nargs_head; 973b9238976Sth 974b9238976Sth return (dma); 975b9238976Sth } 976b9238976Sth 977b9238976Sth static void 978b9238976Sth nfs4_trigger_domount_args_destroy(domount_args_t *dma, vnode_t *vp) 979b9238976Sth { 980b9238976Sth if (dma != NULL) { 981b9238976Sth if (dma->dma_esi != NULL && vp != NULL) 982b9238976Sth nfs4_trigger_esi_destroy(dma->dma_esi, vp); 983b9238976Sth 984b9238976Sth if (dma->dma_hostlist != NULL) 985b9238976Sth kmem_free(dma->dma_hostlist, MAXPATHLEN); 986b9238976Sth 987b9238976Sth if (dma->dma_nargs != NULL) { 988b9238976Sth struct nfs_args *nargs = dma->dma_nargs; 989b9238976Sth 990b9238976Sth do { 991b9238976Sth struct nfs_args *next = 992b9238976Sth nargs->nfs_ext_u.nfs_extB.next; 993b9238976Sth 994b9238976Sth nfs4_trigger_nargs_destroy(nargs); 995b9238976Sth nargs = next; 996b9238976Sth } while (nargs != NULL); 997b9238976Sth } 998b9238976Sth 999b9238976Sth kmem_free(dma, sizeof (domount_args_t)); 1000b9238976Sth } 1001b9238976Sth } 1002b9238976Sth 1003b9238976Sth /* 1004b9238976Sth * The ephemeral_servinfo_t struct contains basic information we will need to 1005b9238976Sth * perform the mount. Whilst the structure is generic across different 1006b9238976Sth * types of ephemeral mount, the way we gather its contents differs. 1007b9238976Sth */ 1008b9238976Sth static ephemeral_servinfo_t * 1009b9238976Sth nfs4_trigger_esi_create(vnode_t *vp, servinfo4_t *svp) 1010b9238976Sth { 1011b9238976Sth ephemeral_servinfo_t *esi; 1012b9238976Sth rnode4_t *rp = VTOR4(vp); 1013b9238976Sth 1014b9238976Sth ASSERT(RP_ISSTUB(rp)); 1015b9238976Sth 1016b9238976Sth /* Call the ephemeral type-specific routine */ 1017b9238976Sth if (RP_ISSTUB_MIRRORMOUNT(rp)) 1018b9238976Sth esi = nfs4_trigger_esi_create_mirrormount(vp, svp); 1019b9238976Sth else 1020b9238976Sth esi = NULL; 1021b9238976Sth 1022b9238976Sth /* for now, we only support mirror-mounts */ 1023b9238976Sth ASSERT(esi != NULL); 1024b9238976Sth 1025b9238976Sth return (esi); 1026b9238976Sth } 1027b9238976Sth 1028b9238976Sth static void 1029b9238976Sth nfs4_trigger_esi_destroy(ephemeral_servinfo_t *esi, vnode_t *vp) 1030b9238976Sth { 1031b9238976Sth rnode4_t *rp = VTOR4(vp); 1032b9238976Sth 1033b9238976Sth ASSERT(RP_ISSTUB(rp)); 1034b9238976Sth 1035b9238976Sth /* for now, we only support mirror-mounts */ 1036b9238976Sth ASSERT(RP_ISSTUB_MIRRORMOUNT(rp)); 1037b9238976Sth 1038b9238976Sth /* Currently, no need for an ephemeral type-specific routine */ 1039b9238976Sth 1040b9238976Sth /* 1041b9238976Sth * The contents of ephemeral_servinfo_t goes into nfs_args, 1042b9238976Sth * and will be handled by nfs4_trigger_nargs_destroy(). 1043b9238976Sth * We need only free the structure itself. 1044b9238976Sth */ 1045b9238976Sth if (esi != NULL) 1046b9238976Sth kmem_free(esi, sizeof (ephemeral_servinfo_t)); 1047b9238976Sth } 1048b9238976Sth 1049b9238976Sth /* 1050b9238976Sth * Some of this may turn out to be common with other ephemeral types, 1051b9238976Sth * in which case it should be moved to nfs4_trigger_esi_create(), or a 1052b9238976Sth * common function called. 1053b9238976Sth */ 1054b9238976Sth static ephemeral_servinfo_t * 1055b9238976Sth nfs4_trigger_esi_create_mirrormount(vnode_t *vp, servinfo4_t *svp) 1056b9238976Sth { 1057b9238976Sth char *stubpath; 1058b9238976Sth struct knetconfig *sikncp, *svkncp; 1059b9238976Sth struct netbuf *bufp; 1060b9238976Sth ephemeral_servinfo_t *esi; 1061b9238976Sth 1062b9238976Sth esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP); 1063b9238976Sth 1064b9238976Sth /* initially set to be our type of ephemeral mount; may be added to */ 1065b9238976Sth esi->esi_mount_flags = NFSMNT_MIRRORMOUNT; 1066b9238976Sth 1067b9238976Sth /* 1068b9238976Sth * We're copying info from the stub rnode's servinfo4, but 1069b9238976Sth * we must create new copies, not pointers, since this information 1070b9238976Sth * is to be associated with the new mount, which will be 1071b9238976Sth * unmounted (and its structures freed) separately 1072b9238976Sth */ 1073b9238976Sth 1074b9238976Sth /* 1075b9238976Sth * Sizes passed to kmem_[z]alloc here must match those freed 1076b9238976Sth * in nfs4_free_args() 1077b9238976Sth */ 1078b9238976Sth 1079b9238976Sth /* 1080b9238976Sth * We hold sv_lock across kmem_zalloc() calls that may sleep, but this 1081b9238976Sth * is difficult to avoid: as we need to read svp to calculate the 1082b9238976Sth * sizes to be allocated. 1083b9238976Sth */ 1084b9238976Sth (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1085b9238976Sth 1086b9238976Sth esi->esi_hostname = kmem_zalloc(strlen(svp->sv_hostname) + 1, KM_SLEEP); 1087b9238976Sth (void) strcat(esi->esi_hostname, svp->sv_hostname); 1088b9238976Sth 1089b9238976Sth esi->esi_addr = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 1090b9238976Sth bufp = esi->esi_addr; 1091b9238976Sth bufp->len = svp->sv_addr.len; 1092b9238976Sth bufp->maxlen = svp->sv_addr.maxlen; 1093b9238976Sth bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1094b9238976Sth bcopy(svp->sv_addr.buf, bufp->buf, bufp->len); 1095b9238976Sth 1096b9238976Sth esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP); 1097b9238976Sth sikncp = esi->esi_knconf; 1098b9238976Sth svkncp = svp->sv_knconf; 1099b9238976Sth sikncp->knc_semantics = svkncp->knc_semantics; 1100b9238976Sth sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1101b9238976Sth (void) strcat((char *)sikncp->knc_protofmly, 1102b9238976Sth (char *)svkncp->knc_protofmly); 1103b9238976Sth sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1104b9238976Sth (void) strcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto); 1105b9238976Sth sikncp->knc_rdev = svkncp->knc_rdev; 1106b9238976Sth 1107b9238976Sth /* 1108b9238976Sth * Used when AUTH_DH is negotiated. 1109b9238976Sth * 1110b9238976Sth * This is ephemeral mount-type specific, since it contains the 1111b9238976Sth * server's time-sync syncaddr. 1112b9238976Sth */ 1113b9238976Sth if (svp->sv_dhsec) { 1114b9238976Sth struct netbuf *bufp; 1115b9238976Sth sec_data_t *sdata; 1116b9238976Sth dh_k4_clntdata_t *data; 1117b9238976Sth 1118b9238976Sth sdata = svp->sv_dhsec; 1119b9238976Sth data = (dh_k4_clntdata_t *)sdata->data; 1120b9238976Sth ASSERT(sdata->rpcflavor == AUTH_DH); 1121b9238976Sth 1122b9238976Sth bufp = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 1123b9238976Sth bufp->len = data->syncaddr.len; 1124b9238976Sth bufp->maxlen = data->syncaddr.maxlen; 1125b9238976Sth bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1126b9238976Sth bcopy(data->syncaddr.buf, bufp->buf, bufp->len); 1127b9238976Sth esi->esi_syncaddr = bufp; 1128b9238976Sth 1129b9238976Sth if (data->netname != NULL) { 1130b9238976Sth int nmlen = data->netnamelen; 1131b9238976Sth 1132b9238976Sth /* 1133b9238976Sth * We need to copy from a dh_k4_clntdata_t 1134b9238976Sth * netname/netnamelen pair to a NUL-terminated 1135b9238976Sth * netname string suitable for putting in nfs_args, 1136b9238976Sth * where the latter has no netnamelen field. 1137b9238976Sth */ 1138b9238976Sth esi->esi_netname = kmem_zalloc(nmlen + 1, KM_SLEEP); 1139b9238976Sth bcopy(data->netname, esi->esi_netname, nmlen); 1140b9238976Sth } 1141b9238976Sth } else { 1142b9238976Sth esi->esi_syncaddr = NULL; 1143b9238976Sth esi->esi_netname = NULL; 1144b9238976Sth } 1145b9238976Sth 1146b9238976Sth stubpath = fn_path(VTOSV(vp)->sv_name); 1147b9238976Sth /* step over initial '.', to avoid e.g. sv_path: "/tank./ws" */ 1148b9238976Sth ASSERT(*stubpath == '.'); 1149b9238976Sth stubpath += 1; 1150b9238976Sth 1151b9238976Sth /* for nfs_args->fh */ 1152b9238976Sth esi->esi_path_len = strlen(svp->sv_path) + strlen(stubpath) + 1; 1153b9238976Sth esi->esi_path = kmem_zalloc(esi->esi_path_len, KM_SLEEP); 1154b9238976Sth (void) strcat(esi->esi_path, svp->sv_path); 1155b9238976Sth (void) strcat(esi->esi_path, stubpath); 1156b9238976Sth 1157b9238976Sth stubpath -= 1; 1158b9238976Sth /* stubpath allocated by fn_path() */ 1159b9238976Sth kmem_free(stubpath, strlen(stubpath) + 1); 1160b9238976Sth 1161b9238976Sth nfs_rw_exit(&svp->sv_lock); 1162b9238976Sth 1163b9238976Sth return (esi); 1164b9238976Sth } 1165b9238976Sth 1166b9238976Sth /* 1167b9238976Sth * Assemble the args, and call the generic VFS mount function to 1168b9238976Sth * finally perform the ephemeral mount. 1169b9238976Sth */ 1170b9238976Sth static int 1171b9238976Sth nfs4_trigger_domount(vnode_t *stubvp, domount_args_t *dma, vfs_t **vfsp, 1172*6962f5b8SThomas Haynes cred_t *cr, vnode_t **newvpp) 1173b9238976Sth { 1174b9238976Sth struct mounta *uap; 1175b9238976Sth char *mntpt, *orig_path, *path; 1176b9238976Sth const char *orig_mntpt; 1177b9238976Sth int retval; 1178b9238976Sth int mntpt_len; 1179b9238976Sth int spec_len; 1180b9238976Sth zone_t *zone = curproc->p_zone; 1181b9238976Sth bool_t has_leading_slash; 1182*6962f5b8SThomas Haynes int i; 1183b9238976Sth 1184b9238976Sth vfs_t *stubvfsp = stubvp->v_vfsp; 1185b9238976Sth ephemeral_servinfo_t *esi = dma->dma_esi; 1186b9238976Sth struct nfs_args *nargs = dma->dma_nargs; 1187b9238976Sth 1188b9238976Sth /* first, construct the mount point for the ephemeral mount */ 1189b9238976Sth orig_path = path = fn_path(VTOSV(stubvp)->sv_name); 1190b9238976Sth orig_mntpt = (char *)refstr_value(stubvfsp->vfs_mntpt); 1191b9238976Sth 1192b9238976Sth if (*orig_path == '.') 1193b9238976Sth orig_path++; 1194b9238976Sth 1195b9238976Sth /* 1196b9238976Sth * Get rid of zone's root path 1197b9238976Sth */ 1198b9238976Sth if (zone != global_zone) { 1199b9238976Sth /* 1200b9238976Sth * -1 for trailing '/' and -1 for EOS. 1201b9238976Sth */ 1202b9238976Sth if (strncmp(zone->zone_rootpath, orig_mntpt, 1203b9238976Sth zone->zone_rootpathlen - 1) == 0) { 1204b9238976Sth orig_mntpt += (zone->zone_rootpathlen - 2); 1205b9238976Sth } 1206b9238976Sth } 1207b9238976Sth 1208b9238976Sth mntpt_len = strlen(orig_mntpt) + strlen(orig_path); 1209b9238976Sth mntpt = kmem_zalloc(mntpt_len + 1, KM_SLEEP); 1210b9238976Sth (void) strcat(mntpt, orig_mntpt); 1211b9238976Sth (void) strcat(mntpt, orig_path); 1212b9238976Sth 1213b9238976Sth kmem_free(path, strlen(path) + 1); 1214b9238976Sth path = esi->esi_path; 1215b9238976Sth if (*path == '.') 1216b9238976Sth path++; 1217b9238976Sth if (path[0] == '/' && path[1] == '/') 1218b9238976Sth path++; 1219b9238976Sth has_leading_slash = (*path == '/'); 1220b9238976Sth 1221b9238976Sth spec_len = strlen(dma->dma_hostlist); 1222b9238976Sth spec_len += strlen(path); 1223b9238976Sth 1224b9238976Sth /* We are going to have to add this in */ 1225b9238976Sth if (!has_leading_slash) 1226b9238976Sth spec_len++; 1227b9238976Sth 1228b9238976Sth /* We need to get the ':' for dma_hostlist:esi_path */ 1229b9238976Sth spec_len++; 1230b9238976Sth 1231b9238976Sth uap = kmem_zalloc(sizeof (struct mounta), KM_SLEEP); 1232b9238976Sth uap->spec = kmem_zalloc(spec_len + 1, KM_SLEEP); 1233b9238976Sth (void) snprintf(uap->spec, spec_len + 1, "%s:%s%s", dma->dma_hostlist, 1234b9238976Sth has_leading_slash ? "" : "/", path); 1235b9238976Sth 1236b9238976Sth uap->dir = mntpt; 1237b9238976Sth 1238b9238976Sth uap->flags = MS_SYSSPACE | MS_DATA; 1239b9238976Sth /* fstype-independent mount options not covered elsewhere */ 1240b9238976Sth /* copy parent's mount(1M) "-m" flag */ 1241b9238976Sth if (stubvfsp->vfs_flag & VFS_NOMNTTAB) 1242b9238976Sth uap->flags |= MS_NOMNTTAB; 1243b9238976Sth 1244b9238976Sth uap->fstype = MNTTYPE_NFS4; 1245b9238976Sth uap->dataptr = (char *)nargs; 1246b9238976Sth /* not needed for MS_SYSSPACE */ 1247b9238976Sth uap->datalen = 0; 1248b9238976Sth 1249b9238976Sth /* use optptr to pass in extra mount options */ 1250b9238976Sth uap->flags |= MS_OPTIONSTR; 1251b9238976Sth uap->optptr = nfs4_trigger_create_mntopts(stubvfsp); 1252b9238976Sth if (uap->optptr == NULL) { 1253b9238976Sth retval = EINVAL; 1254b9238976Sth goto done; 1255b9238976Sth } 1256546a3997SThomas Haynes 1257b9238976Sth /* domount() expects us to count the trailing NUL */ 1258b9238976Sth uap->optlen = strlen(uap->optptr) + 1; 1259b9238976Sth 1260*6962f5b8SThomas Haynes /* 1261*6962f5b8SThomas Haynes * If we get EBUSY, we try again once to see if we can perform 1262*6962f5b8SThomas Haynes * the mount. We do this because of a spurious race condition. 1263*6962f5b8SThomas Haynes */ 1264*6962f5b8SThomas Haynes for (i = 0; i < 2; i++) { 1265*6962f5b8SThomas Haynes int error; 1266*6962f5b8SThomas Haynes bool_t was_mounted; 1267*6962f5b8SThomas Haynes 1268*6962f5b8SThomas Haynes retval = domount(NULL, uap, stubvp, cr, vfsp); 1269*6962f5b8SThomas Haynes if (retval == 0) { 1270*6962f5b8SThomas Haynes retval = VFS_ROOT(*vfsp, newvpp); 1271*6962f5b8SThomas Haynes VFS_RELE(*vfsp); 1272*6962f5b8SThomas Haynes break; 1273*6962f5b8SThomas Haynes } else if (retval != EBUSY) { 1274*6962f5b8SThomas Haynes break; 1275*6962f5b8SThomas Haynes } 1276*6962f5b8SThomas Haynes 1277*6962f5b8SThomas Haynes /* 1278*6962f5b8SThomas Haynes * We might find it mounted by the other racer... 1279*6962f5b8SThomas Haynes */ 1280*6962f5b8SThomas Haynes error = nfs4_trigger_mounted_already(stubvp, 1281*6962f5b8SThomas Haynes newvpp, &was_mounted, vfsp); 1282*6962f5b8SThomas Haynes if (error) { 1283*6962f5b8SThomas Haynes goto done; 1284*6962f5b8SThomas Haynes } else if (was_mounted) { 1285*6962f5b8SThomas Haynes retval = 0; 1286*6962f5b8SThomas Haynes break; 1287*6962f5b8SThomas Haynes } 1288*6962f5b8SThomas Haynes } 1289546a3997SThomas Haynes 1290b9238976Sth done: 1291b9238976Sth if (uap->optptr) 1292b9238976Sth nfs4_trigger_destroy_mntopts(uap->optptr); 1293b9238976Sth 1294b9238976Sth kmem_free(uap->spec, spec_len + 1); 1295b9238976Sth kmem_free(uap, sizeof (struct mounta)); 1296b9238976Sth kmem_free(mntpt, mntpt_len + 1); 1297b9238976Sth 1298b9238976Sth return (retval); 1299b9238976Sth } 1300b9238976Sth 1301b9238976Sth /* 1302b9238976Sth * Build an nfs_args structure for passing to domount(). 1303b9238976Sth * 1304b9238976Sth * Ephemeral mount-type specific data comes from the ephemeral_servinfo_t; 1305b9238976Sth * generic data - common to all ephemeral mount types - is read directly 1306b9238976Sth * from the parent mount's servinfo4_t and mntinfo4_t, via the stub vnode. 1307b9238976Sth */ 1308b9238976Sth static struct nfs_args * 1309b9238976Sth nfs4_trigger_nargs_create(mntinfo4_t *mi, servinfo4_t *svp, 1310b9238976Sth ephemeral_servinfo_t *esi) 1311b9238976Sth { 1312b9238976Sth sec_data_t *secdata; 1313b9238976Sth struct nfs_args *nargs; 1314b9238976Sth 1315b9238976Sth /* setup the nfs args */ 1316b9238976Sth nargs = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP); 1317b9238976Sth 1318b9238976Sth (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1319b9238976Sth 1320b9238976Sth nargs->addr = esi->esi_addr; 1321b9238976Sth 1322b9238976Sth /* for AUTH_DH by negotiation */ 1323b9238976Sth if (esi->esi_syncaddr || esi->esi_netname) { 1324b9238976Sth nargs->flags |= NFSMNT_SECURE; 1325b9238976Sth nargs->syncaddr = esi->esi_syncaddr; 1326b9238976Sth nargs->netname = esi->esi_netname; 1327b9238976Sth } 1328b9238976Sth 1329b9238976Sth nargs->flags |= NFSMNT_KNCONF; 1330b9238976Sth nargs->knconf = esi->esi_knconf; 1331b9238976Sth nargs->flags |= NFSMNT_HOSTNAME; 1332b9238976Sth nargs->hostname = esi->esi_hostname; 1333b9238976Sth nargs->fh = esi->esi_path; 1334b9238976Sth 1335b9238976Sth /* general mount settings, all copied from parent mount */ 1336b9238976Sth mutex_enter(&mi->mi_lock); 1337b9238976Sth 1338b9238976Sth if (!(mi->mi_flags & MI4_HARD)) 1339b9238976Sth nargs->flags |= NFSMNT_SOFT; 1340b9238976Sth 1341b9238976Sth nargs->flags |= NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_TIMEO | 1342b9238976Sth NFSMNT_RETRANS; 1343b9238976Sth nargs->wsize = mi->mi_stsize; 1344b9238976Sth nargs->rsize = mi->mi_tsize; 1345b9238976Sth nargs->timeo = mi->mi_timeo; 1346b9238976Sth nargs->retrans = mi->mi_retrans; 1347b9238976Sth 1348b9238976Sth if (mi->mi_flags & MI4_INT) 1349b9238976Sth nargs->flags |= NFSMNT_INT; 1350b9238976Sth if (mi->mi_flags & MI4_NOAC) 1351b9238976Sth nargs->flags |= NFSMNT_NOAC; 1352b9238976Sth 1353b9238976Sth nargs->flags |= NFSMNT_ACREGMIN | NFSMNT_ACREGMAX | NFSMNT_ACDIRMIN | 1354b9238976Sth NFSMNT_ACDIRMAX; 1355b9238976Sth nargs->acregmin = HR2SEC(mi->mi_acregmin); 1356b9238976Sth nargs->acregmax = HR2SEC(mi->mi_acregmax); 1357b9238976Sth nargs->acdirmin = HR2SEC(mi->mi_acdirmin); 1358b9238976Sth nargs->acdirmax = HR2SEC(mi->mi_acdirmax); 1359b9238976Sth 1360b9238976Sth if (mi->mi_flags & MI4_NOCTO) 1361b9238976Sth nargs->flags |= NFSMNT_NOCTO; 1362b9238976Sth if (mi->mi_flags & MI4_GRPID) 1363b9238976Sth nargs->flags |= NFSMNT_GRPID; 1364b9238976Sth if (mi->mi_flags & MI4_LLOCK) 1365b9238976Sth nargs->flags |= NFSMNT_LLOCK; 1366b9238976Sth if (mi->mi_flags & MI4_NOPRINT) 1367b9238976Sth nargs->flags |= NFSMNT_NOPRINT; 1368b9238976Sth if (mi->mi_flags & MI4_DIRECTIO) 1369b9238976Sth nargs->flags |= NFSMNT_DIRECTIO; 1370b9238976Sth if (mi->mi_flags & MI4_PUBLIC) 1371b9238976Sth nargs->flags |= NFSMNT_PUBLIC; 1372b9238976Sth 1373b9238976Sth mutex_exit(&mi->mi_lock); 1374b9238976Sth 1375b9238976Sth /* add any specific flags for this type of ephemeral mount */ 1376b9238976Sth nargs->flags |= esi->esi_mount_flags; 1377b9238976Sth 1378b9238976Sth /* 1379b9238976Sth * Security data & negotiation policy. 1380b9238976Sth * 1381b9238976Sth * We need to preserve the parent mount's preference for security 1382b9238976Sth * negotiation, translating SV4_TRYSECDEFAULT -> NFSMNT_SECDEFAULT. 1383b9238976Sth * 1384b9238976Sth * If SV4_TRYSECDEFAULT is not set, that indicates that a specific 1385b9238976Sth * security flavour was requested, with data in sv_secdata, and that 1386b9238976Sth * no negotiation should occur. If this specified flavour fails, that's 1387b9238976Sth * it. We will copy sv_secdata, and not set NFSMNT_SECDEFAULT. 1388b9238976Sth * 1389b9238976Sth * If SV4_TRYSECDEFAULT is set, then we start with a passed-in 1390b9238976Sth * default flavour, in sv_secdata, but then negotiate a new flavour. 1391b9238976Sth * Possible flavours are recorded in an array in sv_secinfo, with 1392b9238976Sth * currently in-use flavour pointed to by sv_currsec. 1393b9238976Sth * 1394b9238976Sth * If sv_currsec is set, i.e. if negotiation has already occurred, 1395b9238976Sth * we will copy sv_currsec. Otherwise, copy sv_secdata. Regardless, 1396b9238976Sth * we will set NFSMNT_SECDEFAULT, to enable negotiation. 1397b9238976Sth */ 1398b9238976Sth if (svp->sv_flags & SV4_TRYSECDEFAULT) { 1399b9238976Sth /* enable negotiation for ephemeral mount */ 1400b9238976Sth nargs->flags |= NFSMNT_SECDEFAULT; 1401b9238976Sth 1402b9238976Sth /* 1403b9238976Sth * As a starting point for negotiation, copy parent 1404b9238976Sth * mount's negotiated flavour (sv_currsec) if available, 1405b9238976Sth * or its passed-in flavour (sv_secdata) if not. 1406b9238976Sth */ 1407b9238976Sth if (svp->sv_currsec != NULL) 1408b9238976Sth secdata = copy_sec_data(svp->sv_currsec); 1409b9238976Sth else if (svp->sv_secdata != NULL) 1410b9238976Sth secdata = copy_sec_data(svp->sv_secdata); 1411b9238976Sth else 1412b9238976Sth secdata = NULL; 1413b9238976Sth } else { 1414b9238976Sth /* do not enable negotiation; copy parent's passed-in flavour */ 1415b9238976Sth if (svp->sv_secdata != NULL) 1416b9238976Sth secdata = copy_sec_data(svp->sv_secdata); 1417b9238976Sth else 1418b9238976Sth secdata = NULL; 1419b9238976Sth } 1420b9238976Sth 1421b9238976Sth nfs_rw_exit(&svp->sv_lock); 1422b9238976Sth 1423b9238976Sth nargs->flags |= NFSMNT_NEWARGS; 1424b9238976Sth nargs->nfs_args_ext = NFS_ARGS_EXTB; 1425b9238976Sth nargs->nfs_ext_u.nfs_extB.secdata = secdata; 1426b9238976Sth 1427b9238976Sth /* for NFS RO failover; caller will set if necessary */ 1428b9238976Sth nargs->nfs_ext_u.nfs_extB.next = NULL; 1429b9238976Sth 1430b9238976Sth return (nargs); 1431b9238976Sth } 1432b9238976Sth 1433b9238976Sth static void 1434b9238976Sth nfs4_trigger_nargs_destroy(struct nfs_args *nargs) 1435b9238976Sth { 1436b9238976Sth /* 1437b9238976Sth * Either the mount failed, in which case the data is not needed, or 1438b9238976Sth * nfs4_mount() has either taken copies of what it needs or, 1439b9238976Sth * where it has merely copied the ptr, it has set *our* ptr to NULL, 1440b9238976Sth * whereby nfs4_free_args() will ignore it. 1441b9238976Sth */ 1442b9238976Sth nfs4_free_args(nargs); 1443b9238976Sth kmem_free(nargs, sizeof (struct nfs_args)); 1444b9238976Sth } 1445b9238976Sth 1446b9238976Sth /* 1447b9238976Sth * When we finally get into the mounting, we need to add this 1448b9238976Sth * node to the ephemeral tree. 1449b9238976Sth * 1450b9238976Sth * This is called from nfs4_mount(). 1451b9238976Sth */ 1452d3a14591SThomas Haynes int 1453b9238976Sth nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp) 1454b9238976Sth { 1455b9238976Sth mntinfo4_t *mi_parent; 1456b9238976Sth nfs4_ephemeral_t *eph; 1457b9238976Sth nfs4_ephemeral_tree_t *net; 1458b9238976Sth 1459b9238976Sth nfs4_ephemeral_t *prior; 1460b9238976Sth nfs4_ephemeral_t *child; 1461b9238976Sth 1462b9238976Sth nfs4_ephemeral_t *peer; 1463b9238976Sth 1464b9238976Sth nfs4_trigger_globals_t *ntg; 1465b9238976Sth zone_t *zone = curproc->p_zone; 1466b9238976Sth 1467d3a14591SThomas Haynes int rc = 0; 1468d3a14591SThomas Haynes 1469b9238976Sth mi_parent = VTOMI4(mvp); 1470b9238976Sth 1471b9238976Sth /* 1472b9238976Sth * Get this before grabbing anything else! 1473b9238976Sth */ 1474b9238976Sth ntg = zone_getspecific(nfs4_ephemeral_key, zone); 1475b9238976Sth if (!ntg->ntg_thread_started) { 1476b9238976Sth nfs4_ephemeral_start_harvester(ntg); 1477b9238976Sth } 1478b9238976Sth 1479b9238976Sth mutex_enter(&mi_parent->mi_lock); 1480b9238976Sth mutex_enter(&mi->mi_lock); 1481b9238976Sth 1482d3a14591SThomas Haynes net = mi->mi_ephemeral_tree = 1483d3a14591SThomas Haynes mi_parent->mi_ephemeral_tree; 1484d3a14591SThomas Haynes 1485d3a14591SThomas Haynes /* 1486d3a14591SThomas Haynes * If the mi_ephemeral_tree is NULL, then it 1487d3a14591SThomas Haynes * means that either the harvester or a manual 1488d3a14591SThomas Haynes * umount has cleared the tree out right before 1489d3a14591SThomas Haynes * we got here. 1490d3a14591SThomas Haynes * 1491d3a14591SThomas Haynes * There is nothing we can do here, so return 1492d3a14591SThomas Haynes * to the caller and let them decide whether they 1493d3a14591SThomas Haynes * try again. 1494d3a14591SThomas Haynes */ 1495d3a14591SThomas Haynes if (net == NULL) { 1496d3a14591SThomas Haynes mutex_exit(&mi->mi_lock); 1497d3a14591SThomas Haynes mutex_exit(&mi_parent->mi_lock); 1498d3a14591SThomas Haynes 1499d3a14591SThomas Haynes return (EBUSY); 1500d3a14591SThomas Haynes } 1501d3a14591SThomas Haynes 1502d3a14591SThomas Haynes nfs4_ephemeral_tree_hold(net); 1503d3a14591SThomas Haynes 1504b9238976Sth /* 1505b9238976Sth * We need to tack together the ephemeral mount 1506b9238976Sth * with this new mntinfo. 1507b9238976Sth */ 1508b9238976Sth eph = kmem_zalloc(sizeof (*eph), KM_SLEEP); 1509b9238976Sth eph->ne_mount = mi; 1510b9238976Sth eph->ne_ref_time = gethrestime_sec(); 1511b9238976Sth 1512b9238976Sth /* 1513b9238976Sth * We need to tell the ephemeral mount when 1514b9238976Sth * to time out. 1515b9238976Sth */ 1516b9238976Sth eph->ne_mount_to = ntg->ntg_mount_to; 1517b9238976Sth 1518b9238976Sth mi->mi_flags |= MI4_EPHEMERAL; 1519b9238976Sth mi->mi_ephemeral = eph; 1520b9238976Sth 1521b9238976Sth /* 1522b9238976Sth * If the enclosing mntinfo4 is also ephemeral, 1523b9238976Sth * then we need to point to its enclosing parent. 1524b9238976Sth * Else the enclosing mntinfo4 is the enclosing parent. 1525b9238976Sth * 1526b9238976Sth * We also need to weave this ephemeral node 1527b9238976Sth * into the tree. 1528b9238976Sth */ 1529b9238976Sth if (mi_parent->mi_flags & MI4_EPHEMERAL) { 1530b9238976Sth /* 1531b9238976Sth * We need to decide if we are 1532b9238976Sth * the root node of this branch 1533b9238976Sth * or if we are a sibling of this 1534b9238976Sth * branch. 1535b9238976Sth */ 1536b9238976Sth prior = mi_parent->mi_ephemeral; 1537d3a14591SThomas Haynes if (prior == NULL) { 1538d3a14591SThomas Haynes /* 1539d3a14591SThomas Haynes * Race condition, clean up, and 1540d3a14591SThomas Haynes * let caller handle mntinfo. 1541d3a14591SThomas Haynes */ 1542d3a14591SThomas Haynes mi->mi_flags &= ~MI4_EPHEMERAL; 1543d3a14591SThomas Haynes mi->mi_ephemeral = NULL; 1544d3a14591SThomas Haynes kmem_free(eph, sizeof (*eph)); 1545d3a14591SThomas Haynes rc = EBUSY; 1546b9238976Sth } else { 1547d3a14591SThomas Haynes if (prior->ne_child == NULL) { 1548d3a14591SThomas Haynes prior->ne_child = eph; 1549d3a14591SThomas Haynes } else { 1550d3a14591SThomas Haynes child = prior->ne_child; 1551b9238976Sth 1552d3a14591SThomas Haynes prior->ne_child = eph; 1553d3a14591SThomas Haynes eph->ne_peer = child; 1554b9238976Sth 1555d3a14591SThomas Haynes child->ne_prior = eph; 1556d3a14591SThomas Haynes } 1557b9238976Sth 1558d3a14591SThomas Haynes eph->ne_prior = prior; 1559d3a14591SThomas Haynes } 1560b9238976Sth } else { 1561b9238976Sth /* 1562b9238976Sth * The parent mntinfo4 is the non-ephemeral 1563b9238976Sth * root of the ephemeral tree. We 1564b9238976Sth * need to decide if we are the root 1565b9238976Sth * node of that tree or if we are a 1566b9238976Sth * sibling of the root node. 1567b9238976Sth * 1568b9238976Sth * We are the root if there is no 1569b9238976Sth * other node. 1570b9238976Sth */ 1571b9238976Sth if (net->net_root == NULL) { 1572b9238976Sth net->net_root = eph; 1573b9238976Sth } else { 1574b9238976Sth eph->ne_peer = peer = net->net_root; 1575b9238976Sth ASSERT(peer != NULL); 1576b9238976Sth net->net_root = eph; 1577b9238976Sth 1578b9238976Sth peer->ne_prior = eph; 1579b9238976Sth } 1580b9238976Sth 1581b9238976Sth eph->ne_prior = NULL; 1582b9238976Sth } 1583b9238976Sth 1584d3a14591SThomas Haynes nfs4_ephemeral_tree_rele(net); 1585d3a14591SThomas Haynes 1586b9238976Sth mutex_exit(&mi->mi_lock); 1587b9238976Sth mutex_exit(&mi_parent->mi_lock); 1588d3a14591SThomas Haynes 1589d3a14591SThomas Haynes return (rc); 1590b9238976Sth } 1591b9238976Sth 1592b9238976Sth /* 1593b9238976Sth * Commit the changes to the ephemeral tree for removing this node. 1594b9238976Sth */ 1595b9238976Sth static void 1596b9238976Sth nfs4_ephemeral_umount_cleanup(nfs4_ephemeral_t *eph) 1597b9238976Sth { 1598b9238976Sth nfs4_ephemeral_t *e = eph; 1599b9238976Sth nfs4_ephemeral_t *peer; 1600b9238976Sth nfs4_ephemeral_t *prior; 1601b9238976Sth 1602b9238976Sth peer = eph->ne_peer; 1603b9238976Sth prior = e->ne_prior; 1604b9238976Sth 1605b9238976Sth /* 1606b9238976Sth * If this branch root was not the 1607b9238976Sth * tree root, then we need to fix back pointers. 1608b9238976Sth */ 1609b9238976Sth if (prior) { 1610b9238976Sth if (prior->ne_child == e) { 1611b9238976Sth prior->ne_child = peer; 1612b9238976Sth } else { 1613b9238976Sth prior->ne_peer = peer; 1614b9238976Sth } 1615b9238976Sth 1616b9238976Sth if (peer) 1617b9238976Sth peer->ne_prior = prior; 1618b9238976Sth } else if (peer) { 1619b9238976Sth peer->ne_mount->mi_ephemeral_tree->net_root = peer; 1620b9238976Sth peer->ne_prior = NULL; 1621b9238976Sth } else { 1622b9238976Sth e->ne_mount->mi_ephemeral_tree->net_root = NULL; 1623b9238976Sth } 1624b9238976Sth } 1625b9238976Sth 1626b9238976Sth /* 1627b9238976Sth * We want to avoid recursion at all costs. So we need to 1628b9238976Sth * unroll the tree. We do this by a depth first traversal to 1629b9238976Sth * leaf nodes. We blast away the leaf and work our way back 1630b9238976Sth * up and down the tree. 1631b9238976Sth */ 1632b9238976Sth static int 1633b9238976Sth nfs4_ephemeral_unmount_engine(nfs4_ephemeral_t *eph, 1634b9238976Sth int isTreeRoot, int flag, cred_t *cr) 1635b9238976Sth { 1636b9238976Sth nfs4_ephemeral_t *e = eph; 1637b9238976Sth nfs4_ephemeral_t *prior; 1638b9238976Sth mntinfo4_t *mi; 1639b9238976Sth vfs_t *vfsp; 1640b9238976Sth int error; 1641b9238976Sth 1642b9238976Sth /* 1643b9238976Sth * We use the loop while unrolling the ephemeral tree. 1644b9238976Sth */ 1645b9238976Sth for (;;) { 1646b9238976Sth /* 1647b9238976Sth * First we walk down the child. 1648b9238976Sth */ 1649b9238976Sth if (e->ne_child) { 1650b9238976Sth prior = e; 1651b9238976Sth e = e->ne_child; 1652b9238976Sth continue; 1653b9238976Sth } 1654b9238976Sth 1655b9238976Sth /* 1656b9238976Sth * If we are the root of the branch we are removing, 1657b9238976Sth * we end it here. But if the branch is the root of 1658b9238976Sth * the tree, we have to forge on. We do not consider 1659b9238976Sth * the peer list for the root because while it may 1660b9238976Sth * be okay to remove, it is both extra work and a 1661b9238976Sth * potential for a false-positive error to stall the 1662b9238976Sth * unmount attempt. 1663b9238976Sth */ 1664b9238976Sth if (e == eph && isTreeRoot == FALSE) 1665b9238976Sth return (0); 1666b9238976Sth 1667b9238976Sth /* 1668b9238976Sth * Next we walk down the peer list. 1669b9238976Sth */ 1670b9238976Sth if (e->ne_peer) { 1671b9238976Sth prior = e; 1672b9238976Sth e = e->ne_peer; 1673b9238976Sth continue; 1674b9238976Sth } 1675b9238976Sth 1676b9238976Sth /* 1677b9238976Sth * We can only remove the node passed in by the 1678b9238976Sth * caller if it is the root of the ephemeral tree. 1679b9238976Sth * Otherwise, the caller will remove it. 1680b9238976Sth */ 1681b9238976Sth if (e == eph && isTreeRoot == FALSE) 1682b9238976Sth return (0); 1683b9238976Sth 1684b9238976Sth /* 1685b9238976Sth * Okay, we have a leaf node, time 1686b9238976Sth * to prune it! 1687b9238976Sth * 1688b9238976Sth * Note that prior can only be NULL if 1689b9238976Sth * and only if it is the root of the 1690b9238976Sth * ephemeral tree. 1691b9238976Sth */ 1692b9238976Sth prior = e->ne_prior; 1693b9238976Sth 1694b9238976Sth mi = e->ne_mount; 1695b9238976Sth mutex_enter(&mi->mi_lock); 1696b9238976Sth vfsp = mi->mi_vfsp; 1697b9238976Sth 1698b9238976Sth /* 1699b9238976Sth * Cleared by umount2_engine. 1700b9238976Sth */ 1701b9238976Sth VFS_HOLD(vfsp); 1702b9238976Sth 1703b9238976Sth /* 1704b9238976Sth * Inform nfs4_unmount to not recursively 1705b9238976Sth * descend into this node's children when it 1706b9238976Sth * gets processed. 1707b9238976Sth */ 1708b9238976Sth mi->mi_flags |= MI4_EPHEMERAL_RECURSED; 1709b9238976Sth mutex_exit(&mi->mi_lock); 1710b9238976Sth 1711b9238976Sth error = umount2_engine(vfsp, flag, cr, FALSE); 1712b9238976Sth if (error) { 1713b9238976Sth /* 1714b9238976Sth * We need to reenable nfs4_unmount's ability 1715b9238976Sth * to recursively descend on this node. 1716b9238976Sth */ 1717b9238976Sth mutex_enter(&mi->mi_lock); 1718b9238976Sth mi->mi_flags &= ~MI4_EPHEMERAL_RECURSED; 1719b9238976Sth mutex_exit(&mi->mi_lock); 1720b9238976Sth 1721b9238976Sth return (error); 1722b9238976Sth } 1723b9238976Sth 1724b9238976Sth /* 1725b9238976Sth * If we are the current node, we do not want to 1726b9238976Sth * touch anything else. At this point, the only 1727b9238976Sth * way the current node can have survived to here 1728b9238976Sth * is if it is the root of the ephemeral tree and 1729b9238976Sth * we are unmounting the enclosing mntinfo4. 1730b9238976Sth */ 1731b9238976Sth if (e == eph) { 1732b9238976Sth ASSERT(prior == NULL); 1733b9238976Sth return (0); 1734b9238976Sth } 1735b9238976Sth 1736b9238976Sth /* 1737b9238976Sth * Stitch up the prior node. Note that since 1738b9238976Sth * we have handled the root of the tree, prior 1739b9238976Sth * must be non-NULL. 1740b9238976Sth */ 1741b9238976Sth ASSERT(prior != NULL); 1742b9238976Sth if (prior->ne_child == e) { 1743b9238976Sth prior->ne_child = NULL; 1744b9238976Sth } else { 1745b9238976Sth ASSERT(prior->ne_peer == e); 1746b9238976Sth 1747b9238976Sth prior->ne_peer = NULL; 1748b9238976Sth } 1749b9238976Sth 1750b9238976Sth e = prior; 1751b9238976Sth } 1752b9238976Sth 1753b9238976Sth /* NOTREACHED */ 1754b9238976Sth } 1755b9238976Sth 1756b9238976Sth /* 1757b9238976Sth * Common code to safely release net_cnt_lock and net_tree_lock 1758b9238976Sth */ 1759b9238976Sth void 1760b9238976Sth nfs4_ephemeral_umount_unlock(bool_t *pmust_unlock, 1761d708af74SThomas Haynes bool_t *pmust_rele, nfs4_ephemeral_tree_t **pnet) 1762b9238976Sth { 1763b9238976Sth nfs4_ephemeral_tree_t *net = *pnet; 1764b9238976Sth 1765b9238976Sth if (*pmust_unlock) { 1766b9238976Sth mutex_enter(&net->net_cnt_lock); 1767b9238976Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_UMOUNTING; 1768d708af74SThomas Haynes if (*pmust_rele) 1769d708af74SThomas Haynes nfs4_ephemeral_tree_decr(net); 1770b9238976Sth mutex_exit(&net->net_cnt_lock); 1771b9238976Sth 1772b9238976Sth mutex_exit(&net->net_tree_lock); 1773b9238976Sth 1774b9238976Sth *pmust_unlock = FALSE; 1775b9238976Sth } 1776b9238976Sth } 1777b9238976Sth 1778b9238976Sth /* 1779b9238976Sth * While we may have removed any child or sibling nodes of this 1780b9238976Sth * ephemeral node, we can not nuke it until we know that there 1781b9238976Sth * were no actived vnodes on it. This will do that final 1782b9238976Sth * work once we know it is not busy. 1783b9238976Sth */ 1784b9238976Sth void 1785b9238976Sth nfs4_ephemeral_umount_activate(mntinfo4_t *mi, bool_t *pmust_unlock, 1786d708af74SThomas Haynes bool_t *pmust_rele, nfs4_ephemeral_tree_t **pnet) 1787b9238976Sth { 1788b9238976Sth /* 1789b9238976Sth * Now we need to get rid of the ephemeral data if it exists. 1790b9238976Sth */ 1791b9238976Sth mutex_enter(&mi->mi_lock); 1792b9238976Sth if (mi->mi_ephemeral) { 1793b9238976Sth /* 1794b9238976Sth * If we are the root node of an ephemeral branch 1795b9238976Sth * which is being removed, then we need to fixup 1796b9238976Sth * pointers into and out of the node. 1797b9238976Sth */ 1798b9238976Sth if (!(mi->mi_flags & MI4_EPHEMERAL_RECURSED)) 1799b9238976Sth nfs4_ephemeral_umount_cleanup(mi->mi_ephemeral); 1800b9238976Sth 1801b9238976Sth ASSERT(mi->mi_ephemeral != NULL); 1802b9238976Sth 1803b9238976Sth kmem_free(mi->mi_ephemeral, sizeof (*mi->mi_ephemeral)); 1804b9238976Sth mi->mi_ephemeral = NULL; 1805b9238976Sth } 1806b9238976Sth mutex_exit(&mi->mi_lock); 1807b9238976Sth 1808d708af74SThomas Haynes nfs4_ephemeral_umount_unlock(pmust_unlock, pmust_rele, pnet); 1809b9238976Sth } 1810b9238976Sth 1811b9238976Sth /* 1812b9238976Sth * Unmount an ephemeral node. 1813b9238976Sth */ 1814b9238976Sth int 1815b9238976Sth nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr, 1816d708af74SThomas Haynes bool_t *pmust_unlock, bool_t *pmust_rele, nfs4_ephemeral_tree_t **pnet) 1817b9238976Sth { 1818b9238976Sth int error = 0; 1819b9238976Sth nfs4_ephemeral_t *eph; 1820b9238976Sth nfs4_ephemeral_tree_t *net; 1821b9238976Sth int is_derooting = FALSE; 1822b9238976Sth int is_recursed = FALSE; 1823d3a14591SThomas Haynes int was_locked = FALSE; 1824d3a14591SThomas Haynes 1825d3a14591SThomas Haynes /* 1826d3a14591SThomas Haynes * Make sure to set the default state for cleaning 1827d3a14591SThomas Haynes * up the tree in the caller (and on the way out). 1828d3a14591SThomas Haynes */ 1829d708af74SThomas Haynes *pmust_unlock = *pmust_rele = FALSE; 1830b9238976Sth 1831b9238976Sth /* 1832b9238976Sth * The active vnodes on this file system may be ephemeral 1833b9238976Sth * children. We need to check for and try to unmount them 1834b9238976Sth * here. If any can not be unmounted, we are going 1835b9238976Sth * to return EBUSY. 1836b9238976Sth */ 1837b9238976Sth mutex_enter(&mi->mi_lock); 1838b9238976Sth 1839b9238976Sth /* 1840b9238976Sth * If an ephemeral tree, we need to check to see if 1841b9238976Sth * the lock is already held. If it is, then we need 1842b9238976Sth * to see if we are being called as a result of 1843b9238976Sth * the recursive removal of some node of the tree or 1844b9238976Sth * if we are another attempt to remove the tree. 1845b9238976Sth * 1846b9238976Sth * mi_flags & MI4_EPHEMERAL indicates an ephemeral 1847b9238976Sth * node. mi_ephemeral being non-NULL also does this. 1848b9238976Sth * 1849b9238976Sth * mi_ephemeral_tree being non-NULL is sufficient 1850b9238976Sth * to also indicate either it is an ephemeral node 1851b9238976Sth * or the enclosing mntinfo4. 1852b9238976Sth * 1853b9238976Sth * Do we need MI4_EPHEMERAL? Yes, it is useful for 1854b9238976Sth * when we delete the ephemeral node and need to 1855b9238976Sth * differentiate from an ephemeral node and the 1856b9238976Sth * enclosing root node. 1857b9238976Sth */ 1858b9238976Sth *pnet = net = mi->mi_ephemeral_tree; 1859eabd0450Sth if (net == NULL) { 1860b9238976Sth mutex_exit(&mi->mi_lock); 1861eabd0450Sth return (0); 1862eabd0450Sth } 1863b9238976Sth 1864eabd0450Sth eph = mi->mi_ephemeral; 1865eabd0450Sth is_recursed = mi->mi_flags & MI4_EPHEMERAL_RECURSED; 1866eabd0450Sth is_derooting = (eph == NULL); 1867b9238976Sth 1868eabd0450Sth /* 1869eabd0450Sth * If this is not recursion, then we need to 1870eabd0450Sth * grab a ref count. 1871eabd0450Sth * 1872eabd0450Sth * But wait, we also do not want to do that 1873eabd0450Sth * if a harvester thread has already grabbed 1874eabd0450Sth * the lock. 1875eabd0450Sth */ 1876eabd0450Sth if (!is_recursed) { 1877eabd0450Sth mutex_enter(&net->net_cnt_lock); 1878eabd0450Sth if (net->net_status & 1879eabd0450Sth NFS4_EPHEMERAL_TREE_LOCKED) { 1880b9238976Sth /* 1881d3a14591SThomas Haynes * If the tree is locked, we need 1882d3a14591SThomas Haynes * to decide whether we are the 1883d3a14591SThomas Haynes * harvester or some explicit call 1884d3a14591SThomas Haynes * for a umount. The only way that 1885d3a14591SThomas Haynes * we are the harvester is if 1886d3a14591SThomas Haynes * MS_SYSSPACE is set. 1887d3a14591SThomas Haynes * 1888d3a14591SThomas Haynes * We only let the harvester through 1889d3a14591SThomas Haynes * at this point. 1890eabd0450Sth * 1891eabd0450Sth * We return EBUSY so that the 1892eabd0450Sth * caller knows something is 1893eabd0450Sth * going on. Note that by that 1894eabd0450Sth * time, the umount in the other 1895eabd0450Sth * thread may have already occured. 1896b9238976Sth */ 1897d3a14591SThomas Haynes if (!(flag & MS_SYSSPACE)) { 1898d3a14591SThomas Haynes mutex_exit(&net->net_cnt_lock); 1899d3a14591SThomas Haynes mutex_exit(&mi->mi_lock); 1900d3a14591SThomas Haynes 1901d3a14591SThomas Haynes return (EBUSY); 1902d3a14591SThomas Haynes } 1903d3a14591SThomas Haynes 1904d3a14591SThomas Haynes was_locked = TRUE; 1905d3a14591SThomas Haynes } else { 1906d708af74SThomas Haynes nfs4_ephemeral_tree_incr(net); 1907d708af74SThomas Haynes *pmust_rele = TRUE; 1908d3a14591SThomas Haynes } 1909d3a14591SThomas Haynes 1910eabd0450Sth mutex_exit(&net->net_cnt_lock); 1911eabd0450Sth } 1912eabd0450Sth mutex_exit(&mi->mi_lock); 1913b9238976Sth 1914eabd0450Sth /* 1915d3a14591SThomas Haynes * If we are not the harvester, we need to check 1916d3a14591SThomas Haynes * to see if we need to grab the tree lock. 1917eabd0450Sth */ 1918d3a14591SThomas Haynes if (was_locked == FALSE) { 1919d3a14591SThomas Haynes /* 1920d3a14591SThomas Haynes * If we grab the lock, it means that no other 1921d3a14591SThomas Haynes * operation is working on the tree. If we don't 1922d3a14591SThomas Haynes * grab it, we need to decide if this is because 1923d3a14591SThomas Haynes * we are a recursive call or a new operation. 1924d3a14591SThomas Haynes */ 1925d3a14591SThomas Haynes if (mutex_tryenter(&net->net_tree_lock)) { 1926d3a14591SThomas Haynes *pmust_unlock = TRUE; 1927d3a14591SThomas Haynes } else { 1928b9238976Sth /* 1929d3a14591SThomas Haynes * If we are a recursive call, we can 1930d3a14591SThomas Haynes * proceed without the lock. 1931d3a14591SThomas Haynes * Otherwise we have to wait until 1932d3a14591SThomas Haynes * the lock becomes free. 1933b9238976Sth */ 1934d3a14591SThomas Haynes if (!is_recursed) { 1935d3a14591SThomas Haynes mutex_enter(&net->net_cnt_lock); 1936d3a14591SThomas Haynes if (net->net_status & 1937d3a14591SThomas Haynes (NFS4_EPHEMERAL_TREE_DEROOTING 1938d3a14591SThomas Haynes | NFS4_EPHEMERAL_TREE_INVALID)) { 1939d3a14591SThomas Haynes nfs4_ephemeral_tree_decr(net); 1940d3a14591SThomas Haynes mutex_exit(&net->net_cnt_lock); 1941d708af74SThomas Haynes *pmust_rele = FALSE; 1942d3a14591SThomas Haynes goto is_busy; 1943d3a14591SThomas Haynes } 1944d3a14591SThomas Haynes mutex_exit(&net->net_cnt_lock); 1945b9238976Sth 1946d3a14591SThomas Haynes /* 1947d3a14591SThomas Haynes * We can't hold any other locks whilst 1948d3a14591SThomas Haynes * we wait on this to free up. 1949d3a14591SThomas Haynes */ 1950d3a14591SThomas Haynes mutex_enter(&net->net_tree_lock); 1951b9238976Sth 1952d3a14591SThomas Haynes /* 1953d3a14591SThomas Haynes * Note that while mi->mi_ephemeral 1954d3a14591SThomas Haynes * may change and thus we have to 1955d3a14591SThomas Haynes * update eph, it is the case that 1956d3a14591SThomas Haynes * we have tied down net and 1957d3a14591SThomas Haynes * do not care if mi->mi_ephemeral_tree 1958d3a14591SThomas Haynes * has changed. 1959d3a14591SThomas Haynes */ 1960d3a14591SThomas Haynes mutex_enter(&mi->mi_lock); 1961d3a14591SThomas Haynes eph = mi->mi_ephemeral; 1962d3a14591SThomas Haynes mutex_exit(&mi->mi_lock); 1963d3a14591SThomas Haynes 1964d3a14591SThomas Haynes /* 1965d3a14591SThomas Haynes * Okay, we need to see if either the 1966d3a14591SThomas Haynes * tree got nuked or the current node 1967d3a14591SThomas Haynes * got nuked. Both of which will cause 1968d3a14591SThomas Haynes * an error. 1969d3a14591SThomas Haynes * 1970d3a14591SThomas Haynes * Note that a subsequent retry of the 1971d3a14591SThomas Haynes * umount shall work. 1972d3a14591SThomas Haynes */ 1973d3a14591SThomas Haynes mutex_enter(&net->net_cnt_lock); 1974d3a14591SThomas Haynes if (net->net_status & 1975d3a14591SThomas Haynes NFS4_EPHEMERAL_TREE_INVALID || 1976d3a14591SThomas Haynes (!is_derooting && eph == NULL)) { 1977d3a14591SThomas Haynes nfs4_ephemeral_tree_decr(net); 1978d3a14591SThomas Haynes mutex_exit(&net->net_cnt_lock); 1979d3a14591SThomas Haynes mutex_exit(&net->net_tree_lock); 1980d708af74SThomas Haynes *pmust_rele = FALSE; 1981d3a14591SThomas Haynes goto is_busy; 1982d3a14591SThomas Haynes } 1983eabd0450Sth mutex_exit(&net->net_cnt_lock); 1984d3a14591SThomas Haynes *pmust_unlock = TRUE; 1985eabd0450Sth } 1986eabd0450Sth } 1987eabd0450Sth } 1988eabd0450Sth 1989eabd0450Sth /* 1990eabd0450Sth * Only once we have grabbed the lock can we mark what we 1991eabd0450Sth * are planning on doing to the ephemeral tree. 1992eabd0450Sth */ 1993eabd0450Sth if (*pmust_unlock) { 1994eabd0450Sth mutex_enter(&net->net_cnt_lock); 1995eabd0450Sth net->net_status |= NFS4_EPHEMERAL_TREE_UMOUNTING; 1996eabd0450Sth 1997eabd0450Sth /* 1998eabd0450Sth * Check to see if we are nuking the root. 1999eabd0450Sth */ 2000eabd0450Sth if (is_derooting) 2001eabd0450Sth net->net_status |= 2002eabd0450Sth NFS4_EPHEMERAL_TREE_DEROOTING; 2003eabd0450Sth mutex_exit(&net->net_cnt_lock); 2004eabd0450Sth } 2005eabd0450Sth 2006eabd0450Sth if (!is_derooting) { 2007eabd0450Sth /* 2008eabd0450Sth * Only work on children if the caller has not already 2009eabd0450Sth * done so. 2010eabd0450Sth */ 2011eabd0450Sth if (!is_recursed) { 2012eabd0450Sth ASSERT(eph != NULL); 2013eabd0450Sth 2014eabd0450Sth error = nfs4_ephemeral_unmount_engine(eph, 2015eabd0450Sth FALSE, flag, cr); 2016eabd0450Sth if (error) 2017eabd0450Sth goto is_busy; 2018eabd0450Sth } 2019eabd0450Sth } else { 2020eabd0450Sth eph = net->net_root; 2021eabd0450Sth 2022eabd0450Sth /* 2023eabd0450Sth * Only work if there is something there. 2024eabd0450Sth */ 2025eabd0450Sth if (eph) { 2026eabd0450Sth error = nfs4_ephemeral_unmount_engine(eph, TRUE, 2027eabd0450Sth flag, cr); 2028eabd0450Sth if (error) { 2029eabd0450Sth mutex_enter(&net->net_cnt_lock); 2030eabd0450Sth net->net_status &= 2031eabd0450Sth ~NFS4_EPHEMERAL_TREE_DEROOTING; 2032eabd0450Sth mutex_exit(&net->net_cnt_lock); 2033eabd0450Sth goto is_busy; 2034eabd0450Sth } 2035b9238976Sth 2036b9238976Sth /* 2037eabd0450Sth * Nothing else which goes wrong will 2038eabd0450Sth * invalidate the blowing away of the 2039eabd0450Sth * ephmeral tree. 2040b9238976Sth */ 2041eabd0450Sth net->net_root = NULL; 2042b9238976Sth } 2043eabd0450Sth 2044eabd0450Sth /* 2045eabd0450Sth * We have derooted and we have caused the tree to be 2046d3a14591SThomas Haynes * invalidated. 2047eabd0450Sth */ 2048eabd0450Sth mutex_enter(&net->net_cnt_lock); 2049eabd0450Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_DEROOTING; 2050eabd0450Sth net->net_status |= NFS4_EPHEMERAL_TREE_INVALID; 2051d708af74SThomas Haynes if (was_locked == FALSE) 2052d708af74SThomas Haynes nfs4_ephemeral_tree_decr(net); 2053eabd0450Sth mutex_exit(&net->net_cnt_lock); 2054eabd0450Sth 2055d3a14591SThomas Haynes if (was_locked == FALSE) 2056d3a14591SThomas Haynes mutex_exit(&net->net_tree_lock); 2057d3a14591SThomas Haynes 2058d3a14591SThomas Haynes /* 2059d3a14591SThomas Haynes * We have just blown away any notation of this 2060d3a14591SThomas Haynes * tree being locked. We can't let the caller 2061d3a14591SThomas Haynes * try to clean things up. 2062d3a14591SThomas Haynes */ 2063d3a14591SThomas Haynes *pmust_unlock = FALSE; 2064d3a14591SThomas Haynes 2065eabd0450Sth /* 2066d708af74SThomas Haynes * At this point, the tree should no longer be 2067d708af74SThomas Haynes * associated with the mntinfo4. We need to pull 2068d708af74SThomas Haynes * it off there and let the harvester take 2069eabd0450Sth * care of it once the refcnt drops. 2070eabd0450Sth */ 2071eabd0450Sth mutex_enter(&mi->mi_lock); 2072eabd0450Sth mi->mi_ephemeral_tree = NULL; 2073b9238976Sth mutex_exit(&mi->mi_lock); 2074b9238976Sth } 2075b9238976Sth 2076b9238976Sth return (0); 2077b9238976Sth 2078b9238976Sth is_busy: 2079b9238976Sth 2080d708af74SThomas Haynes nfs4_ephemeral_umount_unlock(pmust_unlock, pmust_rele, 2081d708af74SThomas Haynes pnet); 2082b9238976Sth 2083b9238976Sth return (error); 2084b9238976Sth } 2085b9238976Sth 2086b9238976Sth /* 2087b9238976Sth * Do the umount and record any error in the parent. 2088b9238976Sth */ 2089b9238976Sth static void 2090b9238976Sth nfs4_ephemeral_record_umount(vfs_t *vfsp, int flag, 2091b9238976Sth nfs4_ephemeral_t *e, nfs4_ephemeral_t *prior) 2092b9238976Sth { 2093b9238976Sth int error; 2094b9238976Sth 2095b9238976Sth error = umount2_engine(vfsp, flag, kcred, FALSE); 2096b9238976Sth if (error) { 2097b9238976Sth if (prior) { 2098b9238976Sth if (prior->ne_child == e) 2099b9238976Sth prior->ne_state |= 2100b9238976Sth NFS4_EPHEMERAL_CHILD_ERROR; 2101b9238976Sth else 2102b9238976Sth prior->ne_state |= 2103b9238976Sth NFS4_EPHEMERAL_PEER_ERROR; 2104b9238976Sth } 2105b9238976Sth } 2106b9238976Sth } 2107b9238976Sth 2108b9238976Sth /* 2109b9238976Sth * For each tree in the forest (where the forest is in 2110b9238976Sth * effect all of the ephemeral trees for this zone), 2111b9238976Sth * scan to see if a node can be unmounted. Note that 2112b9238976Sth * unlike nfs4_ephemeral_unmount_engine(), we do 2113b9238976Sth * not process the current node before children or 2114b9238976Sth * siblings. I.e., if a node can be unmounted, we 2115b9238976Sth * do not recursively check to see if the nodes 2116b9238976Sth * hanging off of it can also be unmounted. 2117b9238976Sth * 2118b9238976Sth * Instead, we delve down deep to try and remove the 2119b9238976Sth * children first. Then, because we share code with 2120b9238976Sth * nfs4_ephemeral_unmount_engine(), we will try 2121b9238976Sth * them again. This could be a performance issue in 2122b9238976Sth * the future. 2123b9238976Sth * 2124b9238976Sth * Also note that unlike nfs4_ephemeral_unmount_engine(), 2125b9238976Sth * we do not halt on an error. We will not remove the 2126b9238976Sth * current node, but we will keep on trying to remove 2127b9238976Sth * the others. 2128b9238976Sth * 2129b9238976Sth * force indicates that we want the unmount to occur 2130b9238976Sth * even if there is something blocking it. 2131b9238976Sth * 2132b9238976Sth * time_check indicates that we want to see if the 2133b9238976Sth * mount has expired past mount_to or not. Typically 2134b9238976Sth * we want to do this and only on a shutdown of the 2135b9238976Sth * zone would we want to ignore the check. 2136b9238976Sth */ 2137b9238976Sth static void 2138b9238976Sth nfs4_ephemeral_harvest_forest(nfs4_trigger_globals_t *ntg, 2139b9238976Sth bool_t force, bool_t time_check) 2140b9238976Sth { 2141b9238976Sth nfs4_ephemeral_tree_t *net; 2142b9238976Sth nfs4_ephemeral_tree_t *prev = NULL; 2143b9238976Sth nfs4_ephemeral_tree_t *next; 2144b9238976Sth nfs4_ephemeral_t *e; 2145b9238976Sth nfs4_ephemeral_t *prior; 2146b9238976Sth time_t now = gethrestime_sec(); 2147b9238976Sth 2148b9238976Sth nfs4_ephemeral_tree_t *harvest = NULL; 2149b9238976Sth 2150b9238976Sth int flag; 2151b9238976Sth 2152b9238976Sth mntinfo4_t *mi; 2153b9238976Sth vfs_t *vfsp; 2154b9238976Sth 2155b9238976Sth if (force) 2156d3a14591SThomas Haynes flag = MS_FORCE | MS_SYSSPACE; 2157b9238976Sth else 2158d3a14591SThomas Haynes flag = MS_SYSSPACE; 2159b9238976Sth 2160b9238976Sth mutex_enter(&ntg->ntg_forest_lock); 2161b9238976Sth for (net = ntg->ntg_forest; net != NULL; net = next) { 2162b9238976Sth next = net->net_next; 2163b9238976Sth 2164d3a14591SThomas Haynes nfs4_ephemeral_tree_hold(net); 2165b9238976Sth 2166b9238976Sth mutex_enter(&net->net_tree_lock); 2167b9238976Sth 2168b9238976Sth /* 2169b9238976Sth * Let the unmount code know that the 2170b9238976Sth * tree is already locked! 2171b9238976Sth */ 2172b9238976Sth mutex_enter(&net->net_cnt_lock); 2173b9238976Sth net->net_status |= NFS4_EPHEMERAL_TREE_LOCKED; 2174b9238976Sth mutex_exit(&net->net_cnt_lock); 2175b9238976Sth 2176b9238976Sth /* 2177b9238976Sth * If the intent is force all ephemeral nodes to 2178b9238976Sth * be unmounted in this zone, we can short circuit a 2179b9238976Sth * lot of tree traversal and simply zap the root node. 2180b9238976Sth */ 2181b9238976Sth if (force) { 2182b9238976Sth if (net->net_root) { 2183b9238976Sth mi = net->net_root->ne_mount; 2184b9238976Sth vfsp = mi->mi_vfsp; 2185b9238976Sth 2186b9238976Sth /* 2187b9238976Sth * Cleared by umount2_engine. 2188b9238976Sth */ 2189b9238976Sth VFS_HOLD(vfsp); 2190b9238976Sth 2191b9238976Sth (void) umount2_engine(vfsp, flag, 2192b9238976Sth kcred, FALSE); 2193b9238976Sth 2194b9238976Sth goto check_done; 2195b9238976Sth } 2196b9238976Sth } 2197b9238976Sth 2198b9238976Sth e = net->net_root; 2199b9238976Sth if (e) 2200b9238976Sth e->ne_state = NFS4_EPHEMERAL_VISIT_CHILD; 2201b9238976Sth 2202b9238976Sth while (e) { 2203b9238976Sth if (e->ne_state == NFS4_EPHEMERAL_VISIT_CHILD) { 2204b9238976Sth e->ne_state = NFS4_EPHEMERAL_VISIT_SIBLING; 2205b9238976Sth if (e->ne_child) { 2206b9238976Sth e = e->ne_child; 2207b9238976Sth e->ne_state = 2208b9238976Sth NFS4_EPHEMERAL_VISIT_CHILD; 2209b9238976Sth } 2210b9238976Sth 2211b9238976Sth continue; 2212b9238976Sth } else if (e->ne_state == 2213b9238976Sth NFS4_EPHEMERAL_VISIT_SIBLING) { 2214b9238976Sth e->ne_state = NFS4_EPHEMERAL_PROCESS_ME; 2215b9238976Sth if (e->ne_peer) { 2216b9238976Sth e = e->ne_peer; 2217b9238976Sth e->ne_state = 2218b9238976Sth NFS4_EPHEMERAL_VISIT_CHILD; 2219b9238976Sth } 2220b9238976Sth 2221b9238976Sth continue; 2222b9238976Sth } else if (e->ne_state == 2223b9238976Sth NFS4_EPHEMERAL_CHILD_ERROR) { 2224b9238976Sth prior = e->ne_prior; 2225b9238976Sth 2226b9238976Sth /* 2227b9238976Sth * If a child reported an error, do 2228b9238976Sth * not bother trying to unmount. 2229b9238976Sth * 2230b9238976Sth * If your prior node is a parent, 2231b9238976Sth * pass the error up such that they 2232b9238976Sth * also do not try to unmount. 2233b9238976Sth * 2234b9238976Sth * However, if your prior is a sibling, 2235b9238976Sth * let them try to unmount if they can. 2236b9238976Sth */ 2237b9238976Sth if (prior) { 2238b9238976Sth if (prior->ne_child == e) 2239b9238976Sth prior->ne_state |= 2240b9238976Sth NFS4_EPHEMERAL_CHILD_ERROR; 2241b9238976Sth else 2242b9238976Sth prior->ne_state |= 2243b9238976Sth NFS4_EPHEMERAL_PEER_ERROR; 2244b9238976Sth } 2245b9238976Sth 2246b9238976Sth /* 2247b9238976Sth * Clear the error and if needed, process peers. 2248b9238976Sth * 2249b9238976Sth * Once we mask out the error, we know whether 2250b9238976Sth * or we have to process another node. 2251b9238976Sth */ 2252b9238976Sth e->ne_state &= ~NFS4_EPHEMERAL_CHILD_ERROR; 2253b9238976Sth if (e->ne_state == NFS4_EPHEMERAL_PROCESS_ME) 2254b9238976Sth e = prior; 2255b9238976Sth 2256b9238976Sth continue; 2257b9238976Sth } else if (e->ne_state == 2258b9238976Sth NFS4_EPHEMERAL_PEER_ERROR) { 2259b9238976Sth prior = e->ne_prior; 2260b9238976Sth 2261b9238976Sth if (prior) { 2262b9238976Sth if (prior->ne_child == e) 2263b9238976Sth prior->ne_state = 2264b9238976Sth NFS4_EPHEMERAL_CHILD_ERROR; 2265b9238976Sth else 2266b9238976Sth prior->ne_state = 2267b9238976Sth NFS4_EPHEMERAL_PEER_ERROR; 2268b9238976Sth } 2269b9238976Sth 2270b9238976Sth /* 2271b9238976Sth * Clear the error from this node and do the 2272b9238976Sth * correct processing. 2273b9238976Sth */ 2274b9238976Sth e->ne_state &= ~NFS4_EPHEMERAL_PEER_ERROR; 2275b9238976Sth continue; 2276b9238976Sth } 2277b9238976Sth 2278b9238976Sth prior = e->ne_prior; 2279b9238976Sth e->ne_state = NFS4_EPHEMERAL_OK; 2280b9238976Sth 2281b9238976Sth /* 2282b9238976Sth * It must be the case that we need to process 2283b9238976Sth * this node. 2284b9238976Sth */ 2285b9238976Sth if (!time_check || 2286b9238976Sth now - e->ne_ref_time > e->ne_mount_to) { 2287b9238976Sth mi = e->ne_mount; 2288b9238976Sth vfsp = mi->mi_vfsp; 2289b9238976Sth 2290b9238976Sth /* 2291b9238976Sth * Cleared by umount2_engine. 2292b9238976Sth */ 2293b9238976Sth VFS_HOLD(vfsp); 2294b9238976Sth 2295b9238976Sth /* 2296b9238976Sth * Note that we effectively work down to the 2297b9238976Sth * leaf nodes first, try to unmount them, 2298b9238976Sth * then work our way back up into the leaf 2299b9238976Sth * nodes. 2300b9238976Sth * 2301b9238976Sth * Also note that we deal with a lot of 2302b9238976Sth * complexity by sharing the work with 2303b9238976Sth * the manual unmount code. 2304b9238976Sth */ 2305b9238976Sth nfs4_ephemeral_record_umount(vfsp, flag, 2306b9238976Sth e, prior); 2307b9238976Sth } 2308b9238976Sth 2309b9238976Sth e = prior; 2310b9238976Sth } 2311b9238976Sth 2312b9238976Sth check_done: 2313b9238976Sth 2314b9238976Sth /* 2315d3a14591SThomas Haynes * At this point we are done processing this tree. 2316d3a14591SThomas Haynes * 2317d3a14591SThomas Haynes * If the tree is invalid and we are the only reference 2318d3a14591SThomas Haynes * to it, then we push it on the local linked list 2319d3a14591SThomas Haynes * to remove it at the end. We avoid that action now 2320d3a14591SThomas Haynes * to keep the tree processing going along at a fair clip. 2321d3a14591SThomas Haynes * 2322d3a14591SThomas Haynes * Else, even if we are the only reference, we drop 2323d3a14591SThomas Haynes * our hold on the current tree and allow it to be 2324d3a14591SThomas Haynes * reused as needed. 2325b9238976Sth */ 2326b9238976Sth mutex_enter(&net->net_cnt_lock); 2327b9238976Sth if (net->net_refcnt == 1 && 2328b9238976Sth net->net_status & NFS4_EPHEMERAL_TREE_INVALID) { 2329d3a14591SThomas Haynes nfs4_ephemeral_tree_decr(net); 2330b9238976Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED; 2331b9238976Sth mutex_exit(&net->net_cnt_lock); 2332b9238976Sth mutex_exit(&net->net_tree_lock); 2333b9238976Sth 2334b9238976Sth if (prev) 2335b9238976Sth prev->net_next = net->net_next; 2336b9238976Sth else 2337b9238976Sth ntg->ntg_forest = net->net_next; 2338b9238976Sth 2339b9238976Sth net->net_next = harvest; 2340b9238976Sth harvest = net; 2341b9238976Sth continue; 2342b9238976Sth } 2343b9238976Sth 2344d3a14591SThomas Haynes nfs4_ephemeral_tree_decr(net); 2345b9238976Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED; 2346b9238976Sth mutex_exit(&net->net_cnt_lock); 2347b9238976Sth mutex_exit(&net->net_tree_lock); 2348b9238976Sth 2349b9238976Sth prev = net; 2350b9238976Sth } 2351b9238976Sth mutex_exit(&ntg->ntg_forest_lock); 2352b9238976Sth 2353b9238976Sth for (net = harvest; net != NULL; net = next) { 2354b9238976Sth next = net->net_next; 2355b9238976Sth 2356b9238976Sth mutex_destroy(&net->net_tree_lock); 2357b9238976Sth mutex_destroy(&net->net_cnt_lock); 2358b9238976Sth kmem_free(net, sizeof (*net)); 2359b9238976Sth } 2360b9238976Sth } 2361b9238976Sth 2362b9238976Sth /* 2363b9238976Sth * This is the thread which decides when the harvesting 2364b9238976Sth * can proceed and when to kill it off for this zone. 2365b9238976Sth */ 2366b9238976Sth static void 2367b9238976Sth nfs4_ephemeral_harvester(nfs4_trigger_globals_t *ntg) 2368b9238976Sth { 2369b9238976Sth clock_t timeleft; 2370b9238976Sth zone_t *zone = curproc->p_zone; 2371b9238976Sth 2372b9238976Sth for (;;) { 2373b9238976Sth timeleft = zone_status_timedwait(zone, lbolt + 2374b9238976Sth nfs4_trigger_thread_timer * hz, ZONE_IS_SHUTTING_DOWN); 2375b9238976Sth 2376b9238976Sth /* 2377b9238976Sth * zone is exiting... 2378b9238976Sth */ 2379b9238976Sth if (timeleft != -1) { 2380b9238976Sth ASSERT(zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN); 2381b9238976Sth zthread_exit(); 2382b9238976Sth /* NOTREACHED */ 2383b9238976Sth } 2384b9238976Sth 2385b9238976Sth /* 2386b9238976Sth * Only bother scanning if there is potential 2387b9238976Sth * work to be done. 2388b9238976Sth */ 2389b9238976Sth if (ntg->ntg_forest == NULL) 2390b9238976Sth continue; 2391b9238976Sth 2392b9238976Sth /* 2393b9238976Sth * Now scan the list and get rid of everything which 2394b9238976Sth * is old. 2395b9238976Sth */ 2396b9238976Sth nfs4_ephemeral_harvest_forest(ntg, FALSE, TRUE); 2397b9238976Sth } 2398b9238976Sth 2399b9238976Sth /* NOTREACHED */ 2400b9238976Sth } 2401b9238976Sth 2402b9238976Sth /* 2403b9238976Sth * The zone specific glue needed to start the unmount harvester. 2404b9238976Sth * 2405b9238976Sth * Note that we want to avoid holding the mutex as long as possible, 2406b9238976Sth * hence the multiple checks. 2407b9238976Sth * 2408b9238976Sth * The caller should avoid us getting down here in the first 2409b9238976Sth * place. 2410b9238976Sth */ 2411b9238976Sth static void 2412b9238976Sth nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *ntg) 2413b9238976Sth { 2414b9238976Sth /* 2415b9238976Sth * It got started before we got here... 2416b9238976Sth */ 2417b9238976Sth if (ntg->ntg_thread_started) 2418b9238976Sth return; 2419b9238976Sth 2420b9238976Sth mutex_enter(&nfs4_ephemeral_thread_lock); 2421b9238976Sth 2422b9238976Sth if (ntg->ntg_thread_started) { 2423b9238976Sth mutex_exit(&nfs4_ephemeral_thread_lock); 2424b9238976Sth return; 2425b9238976Sth } 2426b9238976Sth 2427b9238976Sth /* 2428b9238976Sth * Start the unmounter harvester thread for this zone. 2429b9238976Sth */ 2430b9238976Sth (void) zthread_create(NULL, 0, nfs4_ephemeral_harvester, 2431b9238976Sth ntg, 0, minclsyspri); 2432b9238976Sth 2433b9238976Sth ntg->ntg_thread_started = TRUE; 2434b9238976Sth mutex_exit(&nfs4_ephemeral_thread_lock); 2435b9238976Sth } 2436b9238976Sth 2437b9238976Sth /*ARGSUSED*/ 2438b9238976Sth static void * 2439b9238976Sth nfs4_ephemeral_zsd_create(zoneid_t zoneid) 2440b9238976Sth { 2441b9238976Sth nfs4_trigger_globals_t *ntg; 2442b9238976Sth 2443b9238976Sth ntg = kmem_zalloc(sizeof (*ntg), KM_SLEEP); 2444b9238976Sth ntg->ntg_thread_started = FALSE; 2445b9238976Sth 2446b9238976Sth /* 2447b9238976Sth * This is the default.... 2448b9238976Sth */ 2449b9238976Sth ntg->ntg_mount_to = nfs4_trigger_thread_timer; 2450b9238976Sth 2451b9238976Sth mutex_init(&ntg->ntg_forest_lock, NULL, 2452b9238976Sth MUTEX_DEFAULT, NULL); 2453b9238976Sth 2454b9238976Sth return (ntg); 2455b9238976Sth } 2456b9238976Sth 2457b9238976Sth /* 2458b9238976Sth * Try a nice gentle walk down the forest and convince 2459b9238976Sth * all of the trees to gracefully give it up. 2460b9238976Sth */ 2461b9238976Sth /*ARGSUSED*/ 2462b9238976Sth static void 2463b9238976Sth nfs4_ephemeral_zsd_shutdown(zoneid_t zoneid, void *arg) 2464b9238976Sth { 2465b9238976Sth nfs4_trigger_globals_t *ntg = arg; 2466b9238976Sth 2467b9238976Sth if (!ntg) 2468b9238976Sth return; 2469b9238976Sth 2470b9238976Sth nfs4_ephemeral_harvest_forest(ntg, FALSE, FALSE); 2471b9238976Sth } 2472b9238976Sth 2473b9238976Sth /* 2474b9238976Sth * Race along the forest and rip all of the trees out by 2475b9238976Sth * their rootballs! 2476b9238976Sth */ 2477b9238976Sth /*ARGSUSED*/ 2478b9238976Sth static void 2479b9238976Sth nfs4_ephemeral_zsd_destroy(zoneid_t zoneid, void *arg) 2480b9238976Sth { 2481b9238976Sth nfs4_trigger_globals_t *ntg = arg; 2482b9238976Sth 2483b9238976Sth if (!ntg) 2484b9238976Sth return; 2485b9238976Sth 2486b9238976Sth nfs4_ephemeral_harvest_forest(ntg, TRUE, FALSE); 2487b9238976Sth 2488b9238976Sth mutex_destroy(&ntg->ntg_forest_lock); 2489b9238976Sth kmem_free(ntg, sizeof (*ntg)); 2490b9238976Sth } 2491b9238976Sth 2492b9238976Sth /* 2493b9238976Sth * This is the zone independent cleanup needed for 2494b9238976Sth * emphemeral mount processing. 2495b9238976Sth */ 2496b9238976Sth void 2497b9238976Sth nfs4_ephemeral_fini(void) 2498b9238976Sth { 2499b9238976Sth (void) zone_key_delete(nfs4_ephemeral_key); 2500b9238976Sth mutex_destroy(&nfs4_ephemeral_thread_lock); 2501b9238976Sth } 2502b9238976Sth 2503b9238976Sth /* 2504b9238976Sth * This is the zone independent initialization needed for 2505b9238976Sth * emphemeral mount processing. 2506b9238976Sth */ 2507b9238976Sth void 2508b9238976Sth nfs4_ephemeral_init(void) 2509b9238976Sth { 2510b9238976Sth mutex_init(&nfs4_ephemeral_thread_lock, NULL, MUTEX_DEFAULT, 2511b9238976Sth NULL); 2512b9238976Sth 2513b9238976Sth zone_key_create(&nfs4_ephemeral_key, nfs4_ephemeral_zsd_create, 2514b9238976Sth nfs4_ephemeral_zsd_shutdown, nfs4_ephemeral_zsd_destroy); 2515b9238976Sth } 2516b9238976Sth 2517b9238976Sth /* 2518b9238976Sth * nfssys() calls this function to set the per-zone 2519b9238976Sth * value of mount_to to drive when an ephemeral mount is 2520b9238976Sth * timed out. Each mount will grab a copy of this value 2521b9238976Sth * when mounted. 2522b9238976Sth */ 2523b9238976Sth void 2524b9238976Sth nfs4_ephemeral_set_mount_to(uint_t mount_to) 2525b9238976Sth { 2526b9238976Sth nfs4_trigger_globals_t *ntg; 2527b9238976Sth zone_t *zone = curproc->p_zone; 2528b9238976Sth 2529b9238976Sth ntg = zone_getspecific(nfs4_ephemeral_key, zone); 2530b9238976Sth 2531b9238976Sth ntg->ntg_mount_to = mount_to; 2532b9238976Sth } 2533b9238976Sth 2534b9238976Sth /* 2535b9238976Sth * Walk the list of v4 mount options; if they are currently set in vfsp, 2536b9238976Sth * append them to a new comma-separated mount option string, and return it. 2537b9238976Sth * 2538b9238976Sth * Caller should free by calling nfs4_trigger_destroy_mntopts(). 2539b9238976Sth */ 2540b9238976Sth static char * 2541b9238976Sth nfs4_trigger_create_mntopts(vfs_t *vfsp) 2542b9238976Sth { 2543b9238976Sth uint_t i; 2544b9238976Sth char *mntopts; 2545b9238976Sth struct vfssw *vswp; 2546b9238976Sth mntopts_t *optproto; 2547b9238976Sth 2548b9238976Sth mntopts = kmem_zalloc(MAX_MNTOPT_STR, KM_SLEEP); 2549b9238976Sth 2550b9238976Sth /* get the list of applicable mount options for v4; locks *vswp */ 2551b9238976Sth vswp = vfs_getvfssw(MNTTYPE_NFS4); 2552b9238976Sth optproto = &vswp->vsw_optproto; 2553b9238976Sth 2554b9238976Sth for (i = 0; i < optproto->mo_count; i++) { 2555b9238976Sth struct mntopt *mop = &optproto->mo_list[i]; 2556b9238976Sth 2557b9238976Sth if (mop->mo_flags & MO_EMPTY) 2558b9238976Sth continue; 2559b9238976Sth 2560b9238976Sth if (nfs4_trigger_add_mntopt(mntopts, mop->mo_name, vfsp)) { 2561b9238976Sth kmem_free(mntopts, MAX_MNTOPT_STR); 2562b9238976Sth vfs_unrefvfssw(vswp); 2563b9238976Sth return (NULL); 2564b9238976Sth } 2565b9238976Sth } 2566b9238976Sth 2567b9238976Sth vfs_unrefvfssw(vswp); 2568b9238976Sth 2569b9238976Sth /* 2570b9238976Sth * MNTOPT_XATTR is not in the v4 mount opt proto list, 2571b9238976Sth * and it may only be passed via MS_OPTIONSTR, so we 2572b9238976Sth * must handle it here. 2573b9238976Sth * 2574b9238976Sth * Ideally, it would be in the list, but NFS does not specify its 2575b9238976Sth * own opt proto list, it uses instead the default one. Since 2576b9238976Sth * not all filesystems support extended attrs, it would not be 2577b9238976Sth * appropriate to add it there. 2578b9238976Sth */ 2579b9238976Sth if (nfs4_trigger_add_mntopt(mntopts, MNTOPT_XATTR, vfsp) || 2580b9238976Sth nfs4_trigger_add_mntopt(mntopts, MNTOPT_NOXATTR, vfsp)) { 2581b9238976Sth kmem_free(mntopts, MAX_MNTOPT_STR); 2582b9238976Sth return (NULL); 2583b9238976Sth } 2584b9238976Sth 2585b9238976Sth return (mntopts); 2586b9238976Sth } 2587b9238976Sth 2588b9238976Sth static void 2589b9238976Sth nfs4_trigger_destroy_mntopts(char *mntopts) 2590b9238976Sth { 2591b9238976Sth if (mntopts) 2592b9238976Sth kmem_free(mntopts, MAX_MNTOPT_STR); 2593b9238976Sth } 2594b9238976Sth 2595b9238976Sth /* 2596b9238976Sth * Check a single mount option (optname). Add to mntopts if it is set in VFS. 2597b9238976Sth */ 2598b9238976Sth static int 2599b9238976Sth nfs4_trigger_add_mntopt(char *mntopts, char *optname, vfs_t *vfsp) 2600b9238976Sth { 2601b9238976Sth if (mntopts == NULL || optname == NULL || vfsp == NULL) 2602b9238976Sth return (EINVAL); 2603b9238976Sth 2604b9238976Sth if (vfs_optionisset(vfsp, optname, NULL)) { 2605b9238976Sth size_t mntoptslen = strlen(mntopts); 2606b9238976Sth size_t optnamelen = strlen(optname); 2607b9238976Sth 2608b9238976Sth /* +1 for ',', +1 for NUL */ 2609b9238976Sth if (mntoptslen + optnamelen + 2 > MAX_MNTOPT_STR) 2610b9238976Sth return (EOVERFLOW); 2611b9238976Sth 2612b9238976Sth /* first or subsequent mount option? */ 2613b9238976Sth if (*mntopts != '\0') 2614b9238976Sth (void) strcat(mntopts, ","); 2615b9238976Sth 2616b9238976Sth (void) strcat(mntopts, optname); 2617b9238976Sth } 2618b9238976Sth 2619b9238976Sth return (0); 2620b9238976Sth } 2621b9238976Sth 2622b9238976Sth static enum clnt_stat 2623b9238976Sth nfs4_trigger_ping_server(servinfo4_t *svp, int nointr) 2624b9238976Sth { 2625b9238976Sth int retries, error; 2626b9238976Sth uint_t max_msgsize; 2627b9238976Sth enum clnt_stat status; 2628b9238976Sth CLIENT *cl; 2629b9238976Sth struct timeval timeout; 2630b9238976Sth 2631b9238976Sth /* as per recov_newserver() */ 2632b9238976Sth max_msgsize = 0; 2633b9238976Sth retries = 1; 2634b9238976Sth timeout.tv_sec = 2; 2635b9238976Sth timeout.tv_usec = 0; 2636b9238976Sth 2637b9238976Sth error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, NFS_PROGRAM, 2638b9238976Sth NFS_V4, max_msgsize, retries, CRED(), &cl); 2639b9238976Sth if (error) 2640b9238976Sth return (RPC_FAILED); 2641b9238976Sth 2642b9238976Sth if (nointr) 2643b9238976Sth cl->cl_nosignal = TRUE; 2644b9238976Sth status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL, xdr_void, NULL, 2645b9238976Sth timeout); 2646b9238976Sth if (nointr) 2647b9238976Sth cl->cl_nosignal = FALSE; 2648b9238976Sth 2649b9238976Sth AUTH_DESTROY(cl->cl_auth); 2650b9238976Sth CLNT_DESTROY(cl); 2651b9238976Sth 2652b9238976Sth return (status); 2653b9238976Sth } 2654