1b9238976Sth /* 2b9238976Sth * CDDL HEADER START 3b9238976Sth * 4b9238976Sth * The contents of this file are subject to the terms of the 5b9238976Sth * Common Development and Distribution License (the "License"). 6b9238976Sth * You may not use this file except in compliance with the License. 7b9238976Sth * 8b9238976Sth * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9b9238976Sth * or http://www.opensolaris.org/os/licensing. 10b9238976Sth * See the License for the specific language governing permissions 11b9238976Sth * and limitations under the License. 12b9238976Sth * 13b9238976Sth * When distributing Covered Code, include this CDDL HEADER in each 14b9238976Sth * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15b9238976Sth * If applicable, add the following below this CDDL HEADER, with the 16b9238976Sth * fields enclosed by brackets "[]" replaced with your own identifying 17b9238976Sth * information: Portions Copyright [yyyy] [name of copyright owner] 18b9238976Sth * 19b9238976Sth * CDDL HEADER END 20b9238976Sth */ 21b9238976Sth 22b9238976Sth /* 23546a3997SThomas Haynes * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24b9238976Sth * Use is subject to license terms. 25b9238976Sth */ 26b9238976Sth 27b9238976Sth /* 28b9238976Sth * Support for ephemeral mounts, e.g. mirror-mounts. These mounts are 29b9238976Sth * triggered from a "stub" rnode via a special set of vnodeops. 30b9238976Sth */ 31b9238976Sth 32b9238976Sth #include <sys/param.h> 33b9238976Sth #include <sys/types.h> 34b9238976Sth #include <sys/systm.h> 35b9238976Sth #include <sys/cred.h> 36b9238976Sth #include <sys/time.h> 37b9238976Sth #include <sys/vnode.h> 38b9238976Sth #include <sys/vfs.h> 39b9238976Sth #include <sys/vfs_opreg.h> 40b9238976Sth #include <sys/file.h> 41b9238976Sth #include <sys/filio.h> 42b9238976Sth #include <sys/uio.h> 43b9238976Sth #include <sys/buf.h> 44b9238976Sth #include <sys/mman.h> 45b9238976Sth #include <sys/pathname.h> 46b9238976Sth #include <sys/dirent.h> 47b9238976Sth #include <sys/debug.h> 48b9238976Sth #include <sys/vmsystm.h> 49b9238976Sth #include <sys/fcntl.h> 50b9238976Sth #include <sys/flock.h> 51b9238976Sth #include <sys/swap.h> 52b9238976Sth #include <sys/errno.h> 53b9238976Sth #include <sys/strsubr.h> 54b9238976Sth #include <sys/sysmacros.h> 55b9238976Sth #include <sys/kmem.h> 56b9238976Sth #include <sys/mount.h> 57b9238976Sth #include <sys/cmn_err.h> 58b9238976Sth #include <sys/pathconf.h> 59b9238976Sth #include <sys/utsname.h> 60b9238976Sth #include <sys/dnlc.h> 61b9238976Sth #include <sys/acl.h> 62b9238976Sth #include <sys/systeminfo.h> 63b9238976Sth #include <sys/policy.h> 64b9238976Sth #include <sys/sdt.h> 65b9238976Sth #include <sys/list.h> 66b9238976Sth #include <sys/stat.h> 67b9238976Sth #include <sys/mntent.h> 68*2f172c55SRobert Thurlow #include <sys/priv.h> 69b9238976Sth 70b9238976Sth #include <rpc/types.h> 71b9238976Sth #include <rpc/auth.h> 72b9238976Sth #include <rpc/clnt.h> 73b9238976Sth 74b9238976Sth #include <nfs/nfs.h> 75b9238976Sth #include <nfs/nfs_clnt.h> 76b9238976Sth #include <nfs/nfs_acl.h> 77b9238976Sth #include <nfs/lm.h> 78b9238976Sth #include <nfs/nfs4.h> 79b9238976Sth #include <nfs/nfs4_kprot.h> 80b9238976Sth #include <nfs/rnode4.h> 81b9238976Sth #include <nfs/nfs4_clnt.h> 82*2f172c55SRobert Thurlow #include <nfs/nfsid_map.h> 83*2f172c55SRobert Thurlow #include <nfs/nfs4_idmap_impl.h> 84b9238976Sth 85b9238976Sth #include <vm/hat.h> 86b9238976Sth #include <vm/as.h> 87b9238976Sth #include <vm/page.h> 88b9238976Sth #include <vm/pvn.h> 89b9238976Sth #include <vm/seg.h> 90b9238976Sth #include <vm/seg_map.h> 91b9238976Sth #include <vm/seg_kpm.h> 92b9238976Sth #include <vm/seg_vn.h> 93b9238976Sth 94b9238976Sth #include <fs/fs_subr.h> 95b9238976Sth 96b9238976Sth #include <sys/ddi.h> 97b9238976Sth #include <sys/int_fmtio.h> 98b9238976Sth 99f39b8789Sth #include <sys/sunddi.h> 100b9238976Sth 101546a3997SThomas Haynes #include <sys/priv_names.h> 102546a3997SThomas Haynes 103*2f172c55SRobert Thurlow extern zone_key_t nfs4clnt_zone_key; 104*2f172c55SRobert Thurlow extern zone_key_t nfsidmap_zone_key; 105*2f172c55SRobert Thurlow 106b9238976Sth /* 107b9238976Sth * The automatic unmounter thread stuff! 108b9238976Sth */ 109b9238976Sth static int nfs4_trigger_thread_timer = 20; /* in seconds */ 110b9238976Sth 111b9238976Sth /* 112b9238976Sth * Just a default.... 113b9238976Sth */ 114b9238976Sth static uint_t nfs4_trigger_mount_to = 240; 115b9238976Sth 116b9238976Sth typedef struct nfs4_trigger_globals { 117b9238976Sth kmutex_t ntg_forest_lock; 118b9238976Sth uint_t ntg_mount_to; 119b9238976Sth int ntg_thread_started; 120b9238976Sth nfs4_ephemeral_tree_t *ntg_forest; 121b9238976Sth } nfs4_trigger_globals_t; 122b9238976Sth 123b9238976Sth kmutex_t nfs4_ephemeral_thread_lock; 124b9238976Sth 125b9238976Sth zone_key_t nfs4_ephemeral_key = ZONE_KEY_UNINITIALIZED; 126b9238976Sth 127b9238976Sth static void nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *); 128b9238976Sth 129b9238976Sth /* 130b9238976Sth * Used for ephemeral mounts; contains data either duplicated from 131b9238976Sth * servinfo4_t, or hand-crafted, depending on type of ephemeral mount. 132b9238976Sth * 133b9238976Sth * It's intended that this structure is used solely for ephemeral 134b9238976Sth * mount-type specific data, for passing this data to 135b9238976Sth * nfs4_trigger_nargs_create(). 136b9238976Sth */ 137b9238976Sth typedef struct ephemeral_servinfo { 138b9238976Sth char *esi_hostname; 139b9238976Sth char *esi_netname; 140b9238976Sth char *esi_path; 141b9238976Sth int esi_path_len; 142b9238976Sth int esi_mount_flags; 143b9238976Sth struct netbuf *esi_addr; 144b9238976Sth struct netbuf *esi_syncaddr; 145b9238976Sth struct knetconfig *esi_knconf; 146b9238976Sth } ephemeral_servinfo_t; 147b9238976Sth 148b9238976Sth /* 149b9238976Sth * Collect together the mount-type specific and generic data args. 150b9238976Sth */ 151b9238976Sth typedef struct domount_args { 152b9238976Sth ephemeral_servinfo_t *dma_esi; 153b9238976Sth char *dma_hostlist; /* comma-sep. for RO failover */ 154b9238976Sth struct nfs_args *dma_nargs; 155b9238976Sth } domount_args_t; 156b9238976Sth 157b9238976Sth 158b9238976Sth /* 159b9238976Sth * The vnode ops functions for a trigger stub vnode 160b9238976Sth */ 161da6c28aaSamw static int nfs4_trigger_open(vnode_t **, int, cred_t *, caller_context_t *); 162da6c28aaSamw static int nfs4_trigger_getattr(vnode_t *, struct vattr *, int, cred_t *, 163da6c28aaSamw caller_context_t *); 164da6c28aaSamw static int nfs4_trigger_setattr(vnode_t *, struct vattr *, int, cred_t *, 165da6c28aaSamw caller_context_t *); 166da6c28aaSamw static int nfs4_trigger_access(vnode_t *, int, int, cred_t *, 167da6c28aaSamw caller_context_t *); 168da6c28aaSamw static int nfs4_trigger_readlink(vnode_t *, struct uio *, cred_t *, 169da6c28aaSamw caller_context_t *); 170da6c28aaSamw static int nfs4_trigger_lookup(vnode_t *, char *, vnode_t **, 171da6c28aaSamw struct pathname *, int, vnode_t *, cred_t *, caller_context_t *, 172da6c28aaSamw int *, pathname_t *); 173da6c28aaSamw static int nfs4_trigger_create(vnode_t *, char *, struct vattr *, 174da6c28aaSamw enum vcexcl, int, vnode_t **, cred_t *, int, caller_context_t *, 175da6c28aaSamw vsecattr_t *); 176da6c28aaSamw static int nfs4_trigger_remove(vnode_t *, char *, cred_t *, caller_context_t *, 177da6c28aaSamw int); 178da6c28aaSamw static int nfs4_trigger_link(vnode_t *, vnode_t *, char *, cred_t *, 179da6c28aaSamw caller_context_t *, int); 180da6c28aaSamw static int nfs4_trigger_rename(vnode_t *, char *, vnode_t *, char *, 181da6c28aaSamw cred_t *, caller_context_t *, int); 182da6c28aaSamw static int nfs4_trigger_mkdir(vnode_t *, char *, struct vattr *, 183da6c28aaSamw vnode_t **, cred_t *, caller_context_t *, int, vsecattr_t *vsecp); 184da6c28aaSamw static int nfs4_trigger_rmdir(vnode_t *, char *, vnode_t *, cred_t *, 185da6c28aaSamw caller_context_t *, int); 186da6c28aaSamw static int nfs4_trigger_symlink(vnode_t *, char *, struct vattr *, char *, 187da6c28aaSamw cred_t *, caller_context_t *, int); 188da6c28aaSamw static int nfs4_trigger_cmp(vnode_t *, vnode_t *, caller_context_t *); 189b9238976Sth 190b9238976Sth /* 191b9238976Sth * Regular NFSv4 vnodeops that we need to reference directly 192b9238976Sth */ 193da6c28aaSamw extern int nfs4_getattr(vnode_t *, struct vattr *, int, cred_t *, 194da6c28aaSamw caller_context_t *); 195da6c28aaSamw extern void nfs4_inactive(vnode_t *, cred_t *, caller_context_t *); 196b9238976Sth extern int nfs4_rwlock(vnode_t *, int, caller_context_t *); 197b9238976Sth extern void nfs4_rwunlock(vnode_t *, int, caller_context_t *); 198b9238976Sth extern int nfs4_lookup(vnode_t *, char *, vnode_t **, 199da6c28aaSamw struct pathname *, int, vnode_t *, cred_t *, 200da6c28aaSamw caller_context_t *, int *, pathname_t *); 201da6c28aaSamw extern int nfs4_pathconf(vnode_t *, int, ulong_t *, cred_t *, 202da6c28aaSamw caller_context_t *); 203da6c28aaSamw extern int nfs4_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *, 204da6c28aaSamw caller_context_t *); 205da6c28aaSamw extern int nfs4_fid(vnode_t *, fid_t *, caller_context_t *); 206da6c28aaSamw extern int nfs4_realvp(vnode_t *, vnode_t **, caller_context_t *); 207b9238976Sth 208546a3997SThomas Haynes static int nfs4_trigger_mount(vnode_t *, cred_t *, vnode_t **); 209b9238976Sth static int nfs4_trigger_domount(vnode_t *, domount_args_t *, vfs_t **, 2106962f5b8SThomas Haynes cred_t *, vnode_t **); 211*2f172c55SRobert Thurlow static domount_args_t *nfs4_trigger_domount_args_create(vnode_t *, cred_t *); 212b9238976Sth static void nfs4_trigger_domount_args_destroy(domount_args_t *dma, 213b9238976Sth vnode_t *vp); 214*2f172c55SRobert Thurlow static ephemeral_servinfo_t *nfs4_trigger_esi_create(vnode_t *, servinfo4_t *, 215*2f172c55SRobert Thurlow cred_t *); 216b9238976Sth static void nfs4_trigger_esi_destroy(ephemeral_servinfo_t *, vnode_t *); 217b9238976Sth static ephemeral_servinfo_t *nfs4_trigger_esi_create_mirrormount(vnode_t *, 218b9238976Sth servinfo4_t *); 219*2f172c55SRobert Thurlow static ephemeral_servinfo_t *nfs4_trigger_esi_create_referral(vnode_t *, 220*2f172c55SRobert Thurlow cred_t *); 221b9238976Sth static struct nfs_args *nfs4_trigger_nargs_create(mntinfo4_t *, servinfo4_t *, 222b9238976Sth ephemeral_servinfo_t *); 223b9238976Sth static void nfs4_trigger_nargs_destroy(struct nfs_args *); 224b9238976Sth static char *nfs4_trigger_create_mntopts(vfs_t *); 225b9238976Sth static void nfs4_trigger_destroy_mntopts(char *); 226b9238976Sth static int nfs4_trigger_add_mntopt(char *, char *, vfs_t *); 227b9238976Sth static enum clnt_stat nfs4_trigger_ping_server(servinfo4_t *, int); 228*2f172c55SRobert Thurlow static enum clnt_stat nfs4_ping_server_common(struct knetconfig *, 229*2f172c55SRobert Thurlow struct netbuf *, int); 230b9238976Sth 231b9238976Sth extern int umount2_engine(vfs_t *, int, cred_t *, int); 232b9238976Sth 233b9238976Sth vnodeops_t *nfs4_trigger_vnodeops; 234b9238976Sth 235b9238976Sth /* 236b9238976Sth * These are the vnodeops that we must define for stub vnodes. 237b9238976Sth * 238b9238976Sth * 239b9238976Sth * Many of the VOPs defined for NFSv4 do not need to be defined here, 240b9238976Sth * for various reasons. This will result in the VFS default function being 241b9238976Sth * used: 242b9238976Sth * 243b9238976Sth * - These VOPs require a previous VOP_OPEN to have occurred. That will have 244b9238976Sth * lost the reference to the stub vnode, meaning these should not be called: 245b9238976Sth * close, read, write, ioctl, readdir, seek. 246b9238976Sth * 247b9238976Sth * - These VOPs are meaningless for vnodes without data pages. Since the 248b9238976Sth * stub vnode is of type VDIR, these should not be called: 249b9238976Sth * space, getpage, putpage, map, addmap, delmap, pageio, fsync. 250b9238976Sth * 251b9238976Sth * - These VOPs are otherwise not applicable, and should not be called: 252b9238976Sth * dump, setsecattr. 253b9238976Sth * 254b9238976Sth * 255b9238976Sth * These VOPs we do not want to define, but nor do we want the VFS default 256b9238976Sth * action. Instead, we specify the VFS error function, with fs_error(), but 257b9238976Sth * note that fs_error() is not actually called. Instead it results in the 258b9238976Sth * use of the error function defined for the particular VOP, in vn_ops_table[]: 259b9238976Sth * 260b9238976Sth * - frlock, dispose, shrlock. 261b9238976Sth * 262b9238976Sth * 263b9238976Sth * These VOPs we define to use the corresponding regular NFSv4 vnodeop. 264b9238976Sth * NOTE: if any of these ops involve an OTW call with the stub FH, then 265b9238976Sth * that call must be wrapped with save_mnt_secinfo()/check_mnt_secinfo() 266b9238976Sth * to protect the security data in the servinfo4_t for the "parent" 267b9238976Sth * filesystem that contains the stub. 268b9238976Sth * 269b9238976Sth * - These VOPs should not trigger a mount, so that "ls -l" does not: 270b9238976Sth * pathconf, getsecattr. 271b9238976Sth * 272b9238976Sth * - These VOPs would not make sense to trigger: 273b9238976Sth * inactive, rwlock, rwunlock, fid, realvp. 274b9238976Sth */ 275b9238976Sth const fs_operation_def_t nfs4_trigger_vnodeops_template[] = { 276b9238976Sth VOPNAME_OPEN, { .vop_open = nfs4_trigger_open }, 277b9238976Sth VOPNAME_GETATTR, { .vop_getattr = nfs4_trigger_getattr }, 278b9238976Sth VOPNAME_SETATTR, { .vop_setattr = nfs4_trigger_setattr }, 279b9238976Sth VOPNAME_ACCESS, { .vop_access = nfs4_trigger_access }, 280b9238976Sth VOPNAME_LOOKUP, { .vop_lookup = nfs4_trigger_lookup }, 281b9238976Sth VOPNAME_CREATE, { .vop_create = nfs4_trigger_create }, 282b9238976Sth VOPNAME_REMOVE, { .vop_remove = nfs4_trigger_remove }, 283b9238976Sth VOPNAME_LINK, { .vop_link = nfs4_trigger_link }, 284b9238976Sth VOPNAME_RENAME, { .vop_rename = nfs4_trigger_rename }, 285b9238976Sth VOPNAME_MKDIR, { .vop_mkdir = nfs4_trigger_mkdir }, 286b9238976Sth VOPNAME_RMDIR, { .vop_rmdir = nfs4_trigger_rmdir }, 287b9238976Sth VOPNAME_SYMLINK, { .vop_symlink = nfs4_trigger_symlink }, 288b9238976Sth VOPNAME_READLINK, { .vop_readlink = nfs4_trigger_readlink }, 289b9238976Sth VOPNAME_INACTIVE, { .vop_inactive = nfs4_inactive }, 290b9238976Sth VOPNAME_FID, { .vop_fid = nfs4_fid }, 291b9238976Sth VOPNAME_RWLOCK, { .vop_rwlock = nfs4_rwlock }, 292b9238976Sth VOPNAME_RWUNLOCK, { .vop_rwunlock = nfs4_rwunlock }, 293b9238976Sth VOPNAME_REALVP, { .vop_realvp = nfs4_realvp }, 294b9238976Sth VOPNAME_GETSECATTR, { .vop_getsecattr = nfs4_getsecattr }, 295b9238976Sth VOPNAME_PATHCONF, { .vop_pathconf = nfs4_pathconf }, 296b9238976Sth VOPNAME_FRLOCK, { .error = fs_error }, 297b9238976Sth VOPNAME_DISPOSE, { .error = fs_error }, 298b9238976Sth VOPNAME_SHRLOCK, { .error = fs_error }, 299b9238976Sth VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 300b9238976Sth NULL, NULL 301b9238976Sth }; 302b9238976Sth 303d3a14591SThomas Haynes static void 304d708af74SThomas Haynes nfs4_ephemeral_tree_incr(nfs4_ephemeral_tree_t *net) 305d3a14591SThomas Haynes { 306d708af74SThomas Haynes ASSERT(mutex_owned(&net->net_cnt_lock)); 307d3a14591SThomas Haynes net->net_refcnt++; 308d3a14591SThomas Haynes ASSERT(net->net_refcnt != 0); 309d708af74SThomas Haynes } 310d708af74SThomas Haynes 311d708af74SThomas Haynes static void 312d708af74SThomas Haynes nfs4_ephemeral_tree_hold(nfs4_ephemeral_tree_t *net) 313d708af74SThomas Haynes { 314d708af74SThomas Haynes mutex_enter(&net->net_cnt_lock); 315d708af74SThomas Haynes nfs4_ephemeral_tree_incr(net); 316d3a14591SThomas Haynes mutex_exit(&net->net_cnt_lock); 317d3a14591SThomas Haynes } 318d3a14591SThomas Haynes 319d3a14591SThomas Haynes /* 320d3a14591SThomas Haynes * We need a safe way to decrement the refcnt whilst the 321d3a14591SThomas Haynes * lock is being held. 322d3a14591SThomas Haynes */ 323d3a14591SThomas Haynes static void 324d3a14591SThomas Haynes nfs4_ephemeral_tree_decr(nfs4_ephemeral_tree_t *net) 325d3a14591SThomas Haynes { 326d3a14591SThomas Haynes ASSERT(mutex_owned(&net->net_cnt_lock)); 327d3a14591SThomas Haynes ASSERT(net->net_refcnt != 0); 328d3a14591SThomas Haynes net->net_refcnt--; 329d3a14591SThomas Haynes } 330d3a14591SThomas Haynes 331d3a14591SThomas Haynes static void 332d3a14591SThomas Haynes nfs4_ephemeral_tree_rele(nfs4_ephemeral_tree_t *net) 333d3a14591SThomas Haynes { 334d3a14591SThomas Haynes mutex_enter(&net->net_cnt_lock); 335d3a14591SThomas Haynes nfs4_ephemeral_tree_decr(net); 336d3a14591SThomas Haynes mutex_exit(&net->net_cnt_lock); 337d3a14591SThomas Haynes } 338d3a14591SThomas Haynes 339b9238976Sth /* 340b9238976Sth * Trigger ops for stub vnodes; for mirror mounts, etc. 341b9238976Sth * 342b9238976Sth * The general idea is that a "triggering" op will first call 343b9238976Sth * nfs4_trigger_mount(), which will find out whether a mount has already 344b9238976Sth * been triggered. 345b9238976Sth * 346b9238976Sth * If it has, then nfs4_trigger_mount() sets newvp to the root vnode 347b9238976Sth * of the covering vfs. 348b9238976Sth * 349b9238976Sth * If a mount has not yet been triggered, nfs4_trigger_mount() will do so, 350b9238976Sth * and again set newvp, as above. 351b9238976Sth * 352b9238976Sth * The triggering op may then re-issue the VOP by calling it on newvp. 353b9238976Sth * 354b9238976Sth * Note that some ops may perform custom action, and may or may not need 355b9238976Sth * to trigger a mount. 356b9238976Sth * 357b9238976Sth * Some ops need to call the regular NFSv4 vnodeop for a stub vnode. We 358b9238976Sth * obviously can't do this with VOP_<whatever>, since it's a stub vnode 359b9238976Sth * and that would just recurse. Instead, we call the v4 op directly, 360b9238976Sth * by name. This is OK, since we know that the vnode is for NFSv4, 361b9238976Sth * otherwise it couldn't be a stub. 362b9238976Sth * 363b9238976Sth */ 364b9238976Sth 365b9238976Sth static int 366da6c28aaSamw nfs4_trigger_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 367b9238976Sth { 368b9238976Sth int error; 369b9238976Sth vnode_t *newvp; 370b9238976Sth 371546a3997SThomas Haynes error = nfs4_trigger_mount(*vpp, cr, &newvp); 372b9238976Sth if (error) 373b9238976Sth return (error); 374b9238976Sth 375b9238976Sth /* Release the stub vnode, as we're losing the reference to it */ 376b9238976Sth VN_RELE(*vpp); 377b9238976Sth 378b9238976Sth /* Give the caller the root vnode of the newly-mounted fs */ 379b9238976Sth *vpp = newvp; 380b9238976Sth 381b9238976Sth /* return with VN_HELD(newvp) */ 382da6c28aaSamw return (VOP_OPEN(vpp, flag, cr, ct)); 383b9238976Sth } 384b9238976Sth 385*2f172c55SRobert Thurlow void 386*2f172c55SRobert Thurlow nfs4_fake_attrs(vnode_t *vp, struct vattr *vap) 387*2f172c55SRobert Thurlow { 388*2f172c55SRobert Thurlow uint_t mask; 389*2f172c55SRobert Thurlow timespec_t now; 390*2f172c55SRobert Thurlow 391*2f172c55SRobert Thurlow /* 392*2f172c55SRobert Thurlow * Set some attributes here for referrals. 393*2f172c55SRobert Thurlow */ 394*2f172c55SRobert Thurlow mask = vap->va_mask; 395*2f172c55SRobert Thurlow bzero(vap, sizeof (struct vattr)); 396*2f172c55SRobert Thurlow vap->va_mask = mask; 397*2f172c55SRobert Thurlow vap->va_uid = 0; 398*2f172c55SRobert Thurlow vap->va_gid = 0; 399*2f172c55SRobert Thurlow vap->va_nlink = 1; 400*2f172c55SRobert Thurlow vap->va_size = 1; 401*2f172c55SRobert Thurlow gethrestime(&now); 402*2f172c55SRobert Thurlow vap->va_atime = now; 403*2f172c55SRobert Thurlow vap->va_mtime = now; 404*2f172c55SRobert Thurlow vap->va_ctime = now; 405*2f172c55SRobert Thurlow vap->va_type = VDIR; 406*2f172c55SRobert Thurlow vap->va_mode = 0555; 407*2f172c55SRobert Thurlow vap->va_fsid = vp->v_vfsp->vfs_dev; 408*2f172c55SRobert Thurlow vap->va_rdev = 0; 409*2f172c55SRobert Thurlow vap->va_blksize = MAXBSIZE; 410*2f172c55SRobert Thurlow vap->va_nblocks = 1; 411*2f172c55SRobert Thurlow vap->va_seq = 0; 412*2f172c55SRobert Thurlow } 413*2f172c55SRobert Thurlow 414b9238976Sth /* 415b9238976Sth * For the majority of cases, nfs4_trigger_getattr() will not trigger 416b9238976Sth * a mount. However, if ATTR_TRIGGER is set, we are being informed 417b9238976Sth * that we need to force the mount before we attempt to determine 418b9238976Sth * the attributes. The intent is an atomic operation for security 419b9238976Sth * testing. 420*2f172c55SRobert Thurlow * 421*2f172c55SRobert Thurlow * If we're not triggering a mount, we can still inquire about the 422*2f172c55SRobert Thurlow * actual attributes from the server in the mirror mount case, 423*2f172c55SRobert Thurlow * and will return manufactured attributes for a referral (see 424*2f172c55SRobert Thurlow * the 'create' branch of find_referral_stubvp()). 425b9238976Sth */ 426b9238976Sth static int 427da6c28aaSamw nfs4_trigger_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, 428da6c28aaSamw caller_context_t *ct) 429b9238976Sth { 430b9238976Sth int error; 431b9238976Sth 432b9238976Sth if (flags & ATTR_TRIGGER) { 433b9238976Sth vnode_t *newvp; 434b9238976Sth 435546a3997SThomas Haynes error = nfs4_trigger_mount(vp, cr, &newvp); 436b9238976Sth if (error) 437b9238976Sth return (error); 438b9238976Sth 439da6c28aaSamw error = VOP_GETATTR(newvp, vap, flags, cr, ct); 440b9238976Sth VN_RELE(newvp); 441*2f172c55SRobert Thurlow 442*2f172c55SRobert Thurlow } else if (RP_ISSTUB_MIRRORMOUNT(VTOR4(vp))) { 443*2f172c55SRobert Thurlow 444da6c28aaSamw error = nfs4_getattr(vp, vap, flags, cr, ct); 445*2f172c55SRobert Thurlow 446*2f172c55SRobert Thurlow } else if (RP_ISSTUB_REFERRAL(VTOR4(vp))) { 447*2f172c55SRobert Thurlow 448*2f172c55SRobert Thurlow nfs4_fake_attrs(vp, vap); 449*2f172c55SRobert Thurlow error = 0; 450b9238976Sth } 451b9238976Sth 452b9238976Sth return (error); 453b9238976Sth } 454b9238976Sth 455b9238976Sth static int 456b9238976Sth nfs4_trigger_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, 457b9238976Sth caller_context_t *ct) 458b9238976Sth { 459b9238976Sth int error; 460b9238976Sth vnode_t *newvp; 461b9238976Sth 462546a3997SThomas Haynes error = nfs4_trigger_mount(vp, cr, &newvp); 463b9238976Sth if (error) 464b9238976Sth return (error); 465b9238976Sth 466b9238976Sth error = VOP_SETATTR(newvp, vap, flags, cr, ct); 467b9238976Sth VN_RELE(newvp); 468b9238976Sth 469b9238976Sth return (error); 470b9238976Sth } 471b9238976Sth 472b9238976Sth static int 473da6c28aaSamw nfs4_trigger_access(vnode_t *vp, int mode, int flags, cred_t *cr, 474da6c28aaSamw caller_context_t *ct) 475b9238976Sth { 476b9238976Sth int error; 477b9238976Sth vnode_t *newvp; 478b9238976Sth 479546a3997SThomas Haynes error = nfs4_trigger_mount(vp, cr, &newvp); 480b9238976Sth if (error) 481b9238976Sth return (error); 482b9238976Sth 483da6c28aaSamw error = VOP_ACCESS(newvp, mode, flags, cr, ct); 484b9238976Sth VN_RELE(newvp); 485b9238976Sth 486b9238976Sth return (error); 487b9238976Sth } 488b9238976Sth 489b9238976Sth static int 490da6c28aaSamw nfs4_trigger_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, 491da6c28aaSamw struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr, 492da6c28aaSamw caller_context_t *ct, int *deflags, pathname_t *rpnp) 493b9238976Sth { 494b9238976Sth int error; 495b9238976Sth vnode_t *newdvp; 496b9238976Sth rnode4_t *drp = VTOR4(dvp); 497b9238976Sth 498b9238976Sth ASSERT(RP_ISSTUB(drp)); 499b9238976Sth 500b9238976Sth /* 501b9238976Sth * It's not legal to lookup ".." for an fs root, so we mustn't pass 502b9238976Sth * that up. Instead, pass onto the regular op, regardless of whether 503b9238976Sth * we've triggered a mount. 504b9238976Sth */ 505b9238976Sth if (strcmp(nm, "..") == 0) 506*2f172c55SRobert Thurlow if (RP_ISSTUB_MIRRORMOUNT(drp)) { 507*2f172c55SRobert Thurlow return (nfs4_lookup(dvp, nm, vpp, pnp, flags, rdir, cr, 508*2f172c55SRobert Thurlow ct, deflags, rpnp)); 509*2f172c55SRobert Thurlow } else if (RP_ISSTUB_REFERRAL(drp)) { 510*2f172c55SRobert Thurlow /* Return the parent vnode */ 511*2f172c55SRobert Thurlow return (vtodv(dvp, vpp, cr, TRUE)); 512*2f172c55SRobert Thurlow } 513b9238976Sth 514546a3997SThomas Haynes error = nfs4_trigger_mount(dvp, cr, &newdvp); 515b9238976Sth if (error) 516b9238976Sth return (error); 517b9238976Sth 518da6c28aaSamw error = VOP_LOOKUP(newdvp, nm, vpp, pnp, flags, rdir, cr, ct, 519da6c28aaSamw deflags, rpnp); 520b9238976Sth VN_RELE(newdvp); 521b9238976Sth 522b9238976Sth return (error); 523b9238976Sth } 524b9238976Sth 525b9238976Sth static int 526b9238976Sth nfs4_trigger_create(vnode_t *dvp, char *nm, struct vattr *va, 527da6c28aaSamw enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr, 528da6c28aaSamw int flags, caller_context_t *ct, vsecattr_t *vsecp) 529b9238976Sth { 530b9238976Sth int error; 531b9238976Sth vnode_t *newdvp; 532b9238976Sth 533546a3997SThomas Haynes error = nfs4_trigger_mount(dvp, cr, &newdvp); 534b9238976Sth if (error) 535b9238976Sth return (error); 536b9238976Sth 537da6c28aaSamw error = VOP_CREATE(newdvp, nm, va, exclusive, mode, vpp, cr, 538da6c28aaSamw flags, ct, vsecp); 539b9238976Sth VN_RELE(newdvp); 540b9238976Sth 541b9238976Sth return (error); 542b9238976Sth } 543b9238976Sth 544b9238976Sth static int 545da6c28aaSamw nfs4_trigger_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct, 546da6c28aaSamw int flags) 547b9238976Sth { 548b9238976Sth int error; 549b9238976Sth vnode_t *newdvp; 550b9238976Sth 551546a3997SThomas Haynes error = nfs4_trigger_mount(dvp, cr, &newdvp); 552b9238976Sth if (error) 553b9238976Sth return (error); 554b9238976Sth 555da6c28aaSamw error = VOP_REMOVE(newdvp, nm, cr, ct, flags); 556b9238976Sth VN_RELE(newdvp); 557b9238976Sth 558b9238976Sth return (error); 559b9238976Sth } 560b9238976Sth 561b9238976Sth static int 562da6c28aaSamw nfs4_trigger_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr, 563da6c28aaSamw caller_context_t *ct, int flags) 564b9238976Sth { 565b9238976Sth int error; 566b9238976Sth vnode_t *newtdvp; 567b9238976Sth 568546a3997SThomas Haynes error = nfs4_trigger_mount(tdvp, cr, &newtdvp); 569b9238976Sth if (error) 570b9238976Sth return (error); 571b9238976Sth 572b9238976Sth /* 573b9238976Sth * We don't check whether svp is a stub. Let the NFSv4 code 574b9238976Sth * detect that error, and return accordingly. 575b9238976Sth */ 576da6c28aaSamw error = VOP_LINK(newtdvp, svp, tnm, cr, ct, flags); 577b9238976Sth VN_RELE(newtdvp); 578b9238976Sth 579b9238976Sth return (error); 580b9238976Sth } 581b9238976Sth 582b9238976Sth static int 583b9238976Sth nfs4_trigger_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, 584da6c28aaSamw cred_t *cr, caller_context_t *ct, int flags) 585b9238976Sth { 586b9238976Sth int error; 587b9238976Sth vnode_t *newsdvp; 588b9238976Sth rnode4_t *tdrp = VTOR4(tdvp); 589b9238976Sth 590b9238976Sth /* 591b9238976Sth * We know that sdvp is a stub, otherwise we would not be here. 592b9238976Sth * 593b9238976Sth * If tdvp is also be a stub, there are two possibilities: it 594b9238976Sth * is either the same stub as sdvp [i.e. VN_CMP(sdvp, tdvp)] 595b9238976Sth * or it is a different stub [!VN_CMP(sdvp, tdvp)]. 596b9238976Sth * 597b9238976Sth * In the former case, just trigger sdvp, and treat tdvp as 598b9238976Sth * though it were not a stub. 599b9238976Sth * 600b9238976Sth * In the latter case, it might be a different stub for the 601b9238976Sth * same server fs as sdvp, or for a different server fs. 602b9238976Sth * Regardless, from the client perspective this would still 603b9238976Sth * be a cross-filesystem rename, and should not be allowed, 604b9238976Sth * so return EXDEV, without triggering either mount. 605b9238976Sth */ 606b9238976Sth if (RP_ISSTUB(tdrp) && !VN_CMP(sdvp, tdvp)) 607b9238976Sth return (EXDEV); 608b9238976Sth 609546a3997SThomas Haynes error = nfs4_trigger_mount(sdvp, cr, &newsdvp); 610b9238976Sth if (error) 611b9238976Sth return (error); 612b9238976Sth 613da6c28aaSamw error = VOP_RENAME(newsdvp, snm, tdvp, tnm, cr, ct, flags); 614b9238976Sth 615b9238976Sth VN_RELE(newsdvp); 616b9238976Sth 617b9238976Sth return (error); 618b9238976Sth } 619b9238976Sth 620da6c28aaSamw /* ARGSUSED */ 621b9238976Sth static int 622b9238976Sth nfs4_trigger_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp, 623da6c28aaSamw cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp) 624b9238976Sth { 625b9238976Sth int error; 626b9238976Sth vnode_t *newdvp; 627b9238976Sth 628546a3997SThomas Haynes error = nfs4_trigger_mount(dvp, cr, &newdvp); 629b9238976Sth if (error) 630b9238976Sth return (error); 631b9238976Sth 632da6c28aaSamw error = VOP_MKDIR(newdvp, nm, va, vpp, cr, ct, flags, vsecp); 633b9238976Sth VN_RELE(newdvp); 634b9238976Sth 635b9238976Sth return (error); 636b9238976Sth } 637b9238976Sth 638b9238976Sth static int 639da6c28aaSamw nfs4_trigger_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr, 640da6c28aaSamw caller_context_t *ct, int flags) 641b9238976Sth { 642b9238976Sth int error; 643b9238976Sth vnode_t *newdvp; 644b9238976Sth 645546a3997SThomas Haynes error = nfs4_trigger_mount(dvp, cr, &newdvp); 646b9238976Sth if (error) 647b9238976Sth return (error); 648b9238976Sth 649da6c28aaSamw error = VOP_RMDIR(newdvp, nm, cdir, cr, ct, flags); 650b9238976Sth VN_RELE(newdvp); 651b9238976Sth 652b9238976Sth return (error); 653b9238976Sth } 654b9238976Sth 655b9238976Sth static int 656b9238976Sth nfs4_trigger_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm, 657da6c28aaSamw cred_t *cr, caller_context_t *ct, int flags) 658b9238976Sth { 659b9238976Sth int error; 660b9238976Sth vnode_t *newdvp; 661b9238976Sth 662546a3997SThomas Haynes error = nfs4_trigger_mount(dvp, cr, &newdvp); 663b9238976Sth if (error) 664b9238976Sth return (error); 665b9238976Sth 666da6c28aaSamw error = VOP_SYMLINK(newdvp, lnm, tva, tnm, cr, ct, flags); 667b9238976Sth VN_RELE(newdvp); 668b9238976Sth 669b9238976Sth return (error); 670b9238976Sth } 671b9238976Sth 672b9238976Sth static int 673da6c28aaSamw nfs4_trigger_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, 674da6c28aaSamw caller_context_t *ct) 675b9238976Sth { 676b9238976Sth int error; 677b9238976Sth vnode_t *newvp; 678b9238976Sth 679546a3997SThomas Haynes error = nfs4_trigger_mount(vp, cr, &newvp); 680b9238976Sth if (error) 681b9238976Sth return (error); 682b9238976Sth 683da6c28aaSamw error = VOP_READLINK(newvp, uiop, cr, ct); 684b9238976Sth VN_RELE(newvp); 685b9238976Sth 686b9238976Sth return (error); 687b9238976Sth } 688b9238976Sth 689b9238976Sth /* end of trigger vnode ops */ 690b9238976Sth 6916962f5b8SThomas Haynes /* 6926962f5b8SThomas Haynes * See if the mount has already been done by another caller. 6936962f5b8SThomas Haynes */ 6946962f5b8SThomas Haynes static int 6956962f5b8SThomas Haynes nfs4_trigger_mounted_already(vnode_t *vp, vnode_t **newvpp, 6966962f5b8SThomas Haynes bool_t *was_mounted, vfs_t **vfsp) 6976962f5b8SThomas Haynes { 6986962f5b8SThomas Haynes int error; 6996962f5b8SThomas Haynes mntinfo4_t *mi = VTOMI4(vp); 7006962f5b8SThomas Haynes 7016962f5b8SThomas Haynes *was_mounted = FALSE; 7026962f5b8SThomas Haynes 7036962f5b8SThomas Haynes error = vn_vfsrlock_wait(vp); 7046962f5b8SThomas Haynes if (error) 7056962f5b8SThomas Haynes return (error); 7066962f5b8SThomas Haynes 7076962f5b8SThomas Haynes *vfsp = vn_mountedvfs(vp); 7086962f5b8SThomas Haynes if (*vfsp != NULL) { 7096962f5b8SThomas Haynes /* the mount has already occurred */ 7106962f5b8SThomas Haynes error = VFS_ROOT(*vfsp, newvpp); 7116962f5b8SThomas Haynes if (!error) { 7126962f5b8SThomas Haynes /* need to update the reference time */ 7136962f5b8SThomas Haynes mutex_enter(&mi->mi_lock); 7146962f5b8SThomas Haynes if (mi->mi_ephemeral) 7156962f5b8SThomas Haynes mi->mi_ephemeral->ne_ref_time = 7166962f5b8SThomas Haynes gethrestime_sec(); 7176962f5b8SThomas Haynes mutex_exit(&mi->mi_lock); 7186962f5b8SThomas Haynes 7196962f5b8SThomas Haynes *was_mounted = TRUE; 7206962f5b8SThomas Haynes } 7216962f5b8SThomas Haynes } 7226962f5b8SThomas Haynes 7236962f5b8SThomas Haynes vn_vfsunlock(vp); 7246962f5b8SThomas Haynes return (0); 7256962f5b8SThomas Haynes } 7266962f5b8SThomas Haynes 727b9238976Sth /* 728*2f172c55SRobert Thurlow * Mount upon a trigger vnode; for mirror-mounts, referrals, etc. 729b9238976Sth * 730b9238976Sth * The mount may have already occurred, via another thread. If not, 731b9238976Sth * assemble the location information - which may require fetching - and 732b9238976Sth * perform the mount. 733b9238976Sth * 734b9238976Sth * Sets newvp to be the root of the fs that is now covering vp. Note 735b9238976Sth * that we return with VN_HELD(*newvp). 736b9238976Sth * 737b9238976Sth * The caller is responsible for passing the VOP onto the covering fs. 738b9238976Sth */ 739b9238976Sth static int 740546a3997SThomas Haynes nfs4_trigger_mount(vnode_t *vp, cred_t *cr, vnode_t **newvpp) 741b9238976Sth { 742b9238976Sth int error; 743b9238976Sth vfs_t *vfsp; 744b9238976Sth rnode4_t *rp = VTOR4(vp); 745b9238976Sth mntinfo4_t *mi = VTOMI4(vp); 746b9238976Sth domount_args_t *dma; 747b9238976Sth 748b9238976Sth nfs4_ephemeral_tree_t *net; 749b9238976Sth 750b9238976Sth bool_t must_unlock = FALSE; 751b9238976Sth bool_t is_building = FALSE; 7526962f5b8SThomas Haynes bool_t was_mounted = FALSE; 753b9238976Sth 754546a3997SThomas Haynes cred_t *mcred = NULL; 755b9238976Sth 756b9238976Sth nfs4_trigger_globals_t *ntg; 757b9238976Sth 758b9238976Sth zone_t *zone = curproc->p_zone; 759b9238976Sth 760b9238976Sth ASSERT(RP_ISSTUB(rp)); 761b9238976Sth 762b9238976Sth *newvpp = NULL; 763b9238976Sth 764b9238976Sth /* 765b9238976Sth * Has the mount already occurred? 766b9238976Sth */ 7676962f5b8SThomas Haynes error = nfs4_trigger_mounted_already(vp, newvpp, 7686962f5b8SThomas Haynes &was_mounted, &vfsp); 7696962f5b8SThomas Haynes if (error || was_mounted) 770b9238976Sth goto done; 771b9238976Sth 772b9238976Sth ntg = zone_getspecific(nfs4_ephemeral_key, zone); 773b9238976Sth ASSERT(ntg != NULL); 774b9238976Sth 775b9238976Sth mutex_enter(&mi->mi_lock); 776b9238976Sth 777b9238976Sth /* 778b9238976Sth * We need to lock down the ephemeral tree. 779b9238976Sth */ 780b9238976Sth if (mi->mi_ephemeral_tree == NULL) { 781b9238976Sth net = kmem_zalloc(sizeof (*net), KM_SLEEP); 782b9238976Sth mutex_init(&net->net_tree_lock, NULL, MUTEX_DEFAULT, NULL); 783b9238976Sth mutex_init(&net->net_cnt_lock, NULL, MUTEX_DEFAULT, NULL); 784b9238976Sth net->net_refcnt = 1; 785b9238976Sth net->net_status = NFS4_EPHEMERAL_TREE_BUILDING; 786b9238976Sth is_building = TRUE; 787b9238976Sth 788b9238976Sth /* 789b9238976Sth * We need to add it to the zone specific list for 790b9238976Sth * automatic unmounting and harvesting of deadwood. 791b9238976Sth */ 792b9238976Sth mutex_enter(&ntg->ntg_forest_lock); 793b9238976Sth if (ntg->ntg_forest != NULL) 794b9238976Sth net->net_next = ntg->ntg_forest; 795b9238976Sth ntg->ntg_forest = net; 796b9238976Sth mutex_exit(&ntg->ntg_forest_lock); 797b9238976Sth 798b9238976Sth /* 799b9238976Sth * No lock order confusion with mi_lock because no 800b9238976Sth * other node could have grabbed net_tree_lock. 801b9238976Sth */ 802b9238976Sth mutex_enter(&net->net_tree_lock); 803b9238976Sth mi->mi_ephemeral_tree = net; 804b9238976Sth net->net_mount = mi; 805b9238976Sth mutex_exit(&mi->mi_lock); 806b9238976Sth } else { 807b9238976Sth net = mi->mi_ephemeral_tree; 808d3a14591SThomas Haynes nfs4_ephemeral_tree_hold(net); 809d3a14591SThomas Haynes 810d708af74SThomas Haynes mutex_exit(&mi->mi_lock); 811d708af74SThomas Haynes 812d3a14591SThomas Haynes mutex_enter(&net->net_tree_lock); 813b9238976Sth 814b9238976Sth /* 815d3a14591SThomas Haynes * We can only procede if the tree is neither locked 816d3a14591SThomas Haynes * nor being torn down. 817b9238976Sth */ 818d3a14591SThomas Haynes mutex_enter(&net->net_cnt_lock); 819d3a14591SThomas Haynes if (net->net_status & NFS4_EPHEMERAL_TREE_PROCESSING) { 820d3a14591SThomas Haynes nfs4_ephemeral_tree_decr(net); 821d3a14591SThomas Haynes mutex_exit(&net->net_cnt_lock); 822d3a14591SThomas Haynes mutex_exit(&net->net_tree_lock); 823d3a14591SThomas Haynes 824d3a14591SThomas Haynes return (EIO); 825d3a14591SThomas Haynes } 826d3a14591SThomas Haynes mutex_exit(&net->net_cnt_lock); 827b9238976Sth } 828b9238976Sth 829b9238976Sth mutex_enter(&net->net_cnt_lock); 830b9238976Sth net->net_status |= NFS4_EPHEMERAL_TREE_MOUNTING; 831b9238976Sth mutex_exit(&net->net_cnt_lock); 832b9238976Sth 833b9238976Sth must_unlock = TRUE; 834b9238976Sth 835*2f172c55SRobert Thurlow dma = nfs4_trigger_domount_args_create(vp, cr); 836b9238976Sth if (dma == NULL) { 837b9238976Sth error = EINVAL; 838b9238976Sth goto done; 839b9238976Sth } 840b9238976Sth 841b9238976Sth /* 842b9238976Sth * Note that since we define mirror mounts to work 843546a3997SThomas Haynes * for any user, we simply extend the privileges of 844546a3997SThomas Haynes * the user's credentials to allow the mount to 845546a3997SThomas Haynes * proceed. 846b9238976Sth */ 847546a3997SThomas Haynes mcred = crdup(cr); 848546a3997SThomas Haynes if (mcred == NULL) { 849546a3997SThomas Haynes error = EINVAL; 850546a3997SThomas Haynes goto done; 851546a3997SThomas Haynes } 852546a3997SThomas Haynes 853546a3997SThomas Haynes crset_zone_privall(mcred); 854*2f172c55SRobert Thurlow if (is_system_labeled()) 855*2f172c55SRobert Thurlow (void) setpflags(NET_MAC_AWARE, 1, mcred); 856b9238976Sth 8576962f5b8SThomas Haynes error = nfs4_trigger_domount(vp, dma, &vfsp, mcred, newvpp); 858b9238976Sth nfs4_trigger_domount_args_destroy(dma, vp); 859b9238976Sth 860*2f172c55SRobert Thurlow DTRACE_PROBE2(nfs4clnt__func__referral__mount, 861*2f172c55SRobert Thurlow vnode_t *, vp, int, error); 862*2f172c55SRobert Thurlow 863546a3997SThomas Haynes crfree(mcred); 864b9238976Sth 865b9238976Sth done: 8666962f5b8SThomas Haynes 867b9238976Sth if (must_unlock) { 868b9238976Sth mutex_enter(&net->net_cnt_lock); 869b9238976Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_MOUNTING; 870*2f172c55SRobert Thurlow 871*2f172c55SRobert Thurlow /* 872*2f172c55SRobert Thurlow * REFCNT: If we are the root of the tree, then we need 873*2f172c55SRobert Thurlow * to keep a reference because we malloced the tree and 874*2f172c55SRobert Thurlow * this is where we tied it to our mntinfo. 875*2f172c55SRobert Thurlow * 876*2f172c55SRobert Thurlow * If we are not the root of the tree, then our tie to 877*2f172c55SRobert Thurlow * the mntinfo occured elsewhere and we need to 878*2f172c55SRobert Thurlow * decrement the reference to the tree. 879*2f172c55SRobert Thurlow */ 880b9238976Sth if (is_building) 881b9238976Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_BUILDING; 882*2f172c55SRobert Thurlow else 883*2f172c55SRobert Thurlow nfs4_ephemeral_tree_decr(net); 884b9238976Sth mutex_exit(&net->net_cnt_lock); 885b9238976Sth 886b9238976Sth mutex_exit(&net->net_tree_lock); 887b9238976Sth } 888b9238976Sth 889b9238976Sth if (!error && (newvpp == NULL || *newvpp == NULL)) 890b9238976Sth error = ENOSYS; 891b9238976Sth 892b9238976Sth return (error); 893b9238976Sth } 894b9238976Sth 895b9238976Sth /* 896b9238976Sth * Collect together both the generic & mount-type specific args. 897b9238976Sth */ 898b9238976Sth static domount_args_t * 899*2f172c55SRobert Thurlow nfs4_trigger_domount_args_create(vnode_t *vp, cred_t *cr) 900b9238976Sth { 901b9238976Sth int nointr; 902b9238976Sth char *hostlist; 903b9238976Sth servinfo4_t *svp; 904b9238976Sth struct nfs_args *nargs, *nargs_head; 905b9238976Sth enum clnt_stat status; 906b9238976Sth ephemeral_servinfo_t *esi, *esi_first; 907b9238976Sth domount_args_t *dma; 908b9238976Sth mntinfo4_t *mi = VTOMI4(vp); 909b9238976Sth 910b9238976Sth nointr = !(mi->mi_flags & MI4_INT); 911b9238976Sth hostlist = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 912b9238976Sth 913b9238976Sth svp = mi->mi_curr_serv; 914b9238976Sth /* check if the current server is responding */ 915b9238976Sth status = nfs4_trigger_ping_server(svp, nointr); 916b9238976Sth if (status == RPC_SUCCESS) { 917*2f172c55SRobert Thurlow esi_first = nfs4_trigger_esi_create(vp, svp, cr); 918b9238976Sth if (esi_first == NULL) { 919b9238976Sth kmem_free(hostlist, MAXPATHLEN); 920b9238976Sth return (NULL); 921b9238976Sth } 922b9238976Sth 923b9238976Sth (void) strlcpy(hostlist, esi_first->esi_hostname, MAXPATHLEN); 924b9238976Sth 925b9238976Sth nargs_head = nfs4_trigger_nargs_create(mi, svp, esi_first); 926b9238976Sth } else { 927b9238976Sth /* current server did not respond */ 928b9238976Sth esi_first = NULL; 929b9238976Sth nargs_head = NULL; 930b9238976Sth } 931b9238976Sth nargs = nargs_head; 932b9238976Sth 933b9238976Sth /* 934b9238976Sth * NFS RO failover. 935b9238976Sth * 936b9238976Sth * If we have multiple servinfo4 structures, linked via sv_next, 937b9238976Sth * we must create one nfs_args for each, linking the nfs_args via 938b9238976Sth * nfs_ext_u.nfs_extB.next. 939b9238976Sth * 940b9238976Sth * We need to build a corresponding esi for each, too, but that is 941b9238976Sth * used solely for building nfs_args, and may be immediately 942b9238976Sth * discarded, as domount() requires the info from just one esi, 943b9238976Sth * but all the nfs_args. 944b9238976Sth * 945b9238976Sth * Currently, the NFS mount code will hang if not all servers 946b9238976Sth * requested are available. To avoid that, we need to ping each 947b9238976Sth * server, here, and remove it from the list if it is not 948b9238976Sth * responding. This has the side-effect of that server then 949b9238976Sth * being permanently unavailable for this failover mount, even if 950b9238976Sth * it recovers. That's unfortunate, but the best we can do until 951b9238976Sth * the mount code path is fixed. 952b9238976Sth */ 953b9238976Sth 954b9238976Sth /* 955b9238976Sth * If the current server was down, loop indefinitely until we find 956b9238976Sth * at least one responsive server. 957b9238976Sth */ 958b9238976Sth do { 959b9238976Sth /* no locking needed for sv_next; it is only set at fs mount */ 960b9238976Sth for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 961b9238976Sth struct nfs_args *next; 962b9238976Sth 963b9238976Sth /* 964b9238976Sth * nargs_head: the head of the nfs_args list 965b9238976Sth * nargs: the current tail of the list 966b9238976Sth * next: the newly-created element to be added 967b9238976Sth */ 968b9238976Sth 969b9238976Sth /* 970b9238976Sth * We've already tried the current server, above; 971b9238976Sth * if it was responding, we have already included it 972b9238976Sth * and it may now be ignored. 973b9238976Sth * 974b9238976Sth * Otherwise, try it again, since it may now have 975b9238976Sth * recovered. 976b9238976Sth */ 977b9238976Sth if (svp == mi->mi_curr_serv && esi_first != NULL) 978b9238976Sth continue; 979b9238976Sth 980b9238976Sth (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 981b9238976Sth if (svp->sv_flags & SV4_NOTINUSE) { 982b9238976Sth nfs_rw_exit(&svp->sv_lock); 983b9238976Sth continue; 984b9238976Sth } 985b9238976Sth nfs_rw_exit(&svp->sv_lock); 986b9238976Sth 987b9238976Sth /* check if the server is responding */ 988b9238976Sth status = nfs4_trigger_ping_server(svp, nointr); 989b9238976Sth /* if the server did not respond, ignore it */ 990b9238976Sth if (status != RPC_SUCCESS) 991b9238976Sth continue; 992b9238976Sth 993*2f172c55SRobert Thurlow esi = nfs4_trigger_esi_create(vp, svp, cr); 994b9238976Sth if (esi == NULL) 995b9238976Sth continue; 996b9238976Sth 997b9238976Sth /* 998b9238976Sth * If the original current server (mi_curr_serv) 999b9238976Sth * was down when when we first tried it, 1000b9238976Sth * (i.e. esi_first == NULL), 1001b9238976Sth * we select this new server (svp) to be the server 1002b9238976Sth * that we will actually contact (esi_first). 1003b9238976Sth * 1004b9238976Sth * Note that it's possible that mi_curr_serv == svp, 1005b9238976Sth * if that mi_curr_serv was down but has now recovered. 1006b9238976Sth */ 1007b9238976Sth next = nfs4_trigger_nargs_create(mi, svp, esi); 1008b9238976Sth if (esi_first == NULL) { 1009b9238976Sth ASSERT(nargs == NULL); 1010b9238976Sth ASSERT(nargs_head == NULL); 1011b9238976Sth nargs_head = next; 1012b9238976Sth esi_first = esi; 1013b9238976Sth (void) strlcpy(hostlist, 1014b9238976Sth esi_first->esi_hostname, MAXPATHLEN); 1015b9238976Sth } else { 1016b9238976Sth ASSERT(nargs_head != NULL); 1017b9238976Sth nargs->nfs_ext_u.nfs_extB.next = next; 1018b9238976Sth (void) strlcat(hostlist, ",", MAXPATHLEN); 1019b9238976Sth (void) strlcat(hostlist, esi->esi_hostname, 1020b9238976Sth MAXPATHLEN); 1021b9238976Sth /* esi was only needed for hostname & nargs */ 1022b9238976Sth nfs4_trigger_esi_destroy(esi, vp); 1023b9238976Sth } 1024b9238976Sth 1025b9238976Sth nargs = next; 1026b9238976Sth } 1027b9238976Sth 1028b9238976Sth /* if we've had no response at all, wait a second */ 1029b9238976Sth if (esi_first == NULL) 1030b9238976Sth delay(drv_usectohz(1000000)); 1031b9238976Sth 1032b9238976Sth } while (esi_first == NULL); 1033b9238976Sth ASSERT(nargs_head != NULL); 1034b9238976Sth 1035b9238976Sth dma = kmem_zalloc(sizeof (domount_args_t), KM_SLEEP); 1036b9238976Sth dma->dma_esi = esi_first; 1037b9238976Sth dma->dma_hostlist = hostlist; 1038b9238976Sth dma->dma_nargs = nargs_head; 1039b9238976Sth 1040b9238976Sth return (dma); 1041b9238976Sth } 1042b9238976Sth 1043b9238976Sth static void 1044b9238976Sth nfs4_trigger_domount_args_destroy(domount_args_t *dma, vnode_t *vp) 1045b9238976Sth { 1046b9238976Sth if (dma != NULL) { 1047b9238976Sth if (dma->dma_esi != NULL && vp != NULL) 1048b9238976Sth nfs4_trigger_esi_destroy(dma->dma_esi, vp); 1049b9238976Sth 1050b9238976Sth if (dma->dma_hostlist != NULL) 1051b9238976Sth kmem_free(dma->dma_hostlist, MAXPATHLEN); 1052b9238976Sth 1053b9238976Sth if (dma->dma_nargs != NULL) { 1054b9238976Sth struct nfs_args *nargs = dma->dma_nargs; 1055b9238976Sth 1056b9238976Sth do { 1057b9238976Sth struct nfs_args *next = 1058b9238976Sth nargs->nfs_ext_u.nfs_extB.next; 1059b9238976Sth 1060b9238976Sth nfs4_trigger_nargs_destroy(nargs); 1061b9238976Sth nargs = next; 1062b9238976Sth } while (nargs != NULL); 1063b9238976Sth } 1064b9238976Sth 1065b9238976Sth kmem_free(dma, sizeof (domount_args_t)); 1066b9238976Sth } 1067b9238976Sth } 1068b9238976Sth 1069b9238976Sth /* 1070b9238976Sth * The ephemeral_servinfo_t struct contains basic information we will need to 1071b9238976Sth * perform the mount. Whilst the structure is generic across different 1072b9238976Sth * types of ephemeral mount, the way we gather its contents differs. 1073b9238976Sth */ 1074b9238976Sth static ephemeral_servinfo_t * 1075*2f172c55SRobert Thurlow nfs4_trigger_esi_create(vnode_t *vp, servinfo4_t *svp, cred_t *cr) 1076b9238976Sth { 1077b9238976Sth ephemeral_servinfo_t *esi; 1078b9238976Sth rnode4_t *rp = VTOR4(vp); 1079b9238976Sth 1080b9238976Sth ASSERT(RP_ISSTUB(rp)); 1081b9238976Sth 1082b9238976Sth /* Call the ephemeral type-specific routine */ 1083b9238976Sth if (RP_ISSTUB_MIRRORMOUNT(rp)) 1084b9238976Sth esi = nfs4_trigger_esi_create_mirrormount(vp, svp); 1085*2f172c55SRobert Thurlow else if (RP_ISSTUB_REFERRAL(rp)) 1086*2f172c55SRobert Thurlow esi = nfs4_trigger_esi_create_referral(vp, cr); 1087b9238976Sth else 1088b9238976Sth esi = NULL; 1089b9238976Sth return (esi); 1090b9238976Sth } 1091b9238976Sth 1092b9238976Sth static void 1093b9238976Sth nfs4_trigger_esi_destroy(ephemeral_servinfo_t *esi, vnode_t *vp) 1094b9238976Sth { 1095b9238976Sth rnode4_t *rp = VTOR4(vp); 1096b9238976Sth 1097b9238976Sth ASSERT(RP_ISSTUB(rp)); 1098b9238976Sth 1099b9238976Sth /* Currently, no need for an ephemeral type-specific routine */ 1100b9238976Sth 1101b9238976Sth /* 1102b9238976Sth * The contents of ephemeral_servinfo_t goes into nfs_args, 1103b9238976Sth * and will be handled by nfs4_trigger_nargs_destroy(). 1104b9238976Sth * We need only free the structure itself. 1105b9238976Sth */ 1106b9238976Sth if (esi != NULL) 1107b9238976Sth kmem_free(esi, sizeof (ephemeral_servinfo_t)); 1108b9238976Sth } 1109b9238976Sth 1110b9238976Sth /* 1111b9238976Sth * Some of this may turn out to be common with other ephemeral types, 1112b9238976Sth * in which case it should be moved to nfs4_trigger_esi_create(), or a 1113b9238976Sth * common function called. 1114b9238976Sth */ 1115*2f172c55SRobert Thurlow 1116*2f172c55SRobert Thurlow /* 1117*2f172c55SRobert Thurlow * Mirror mounts case - should have all data available 1118*2f172c55SRobert Thurlow */ 1119b9238976Sth static ephemeral_servinfo_t * 1120b9238976Sth nfs4_trigger_esi_create_mirrormount(vnode_t *vp, servinfo4_t *svp) 1121b9238976Sth { 1122b9238976Sth char *stubpath; 1123b9238976Sth struct knetconfig *sikncp, *svkncp; 1124b9238976Sth struct netbuf *bufp; 1125b9238976Sth ephemeral_servinfo_t *esi; 1126b9238976Sth 1127b9238976Sth esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP); 1128b9238976Sth 1129b9238976Sth /* initially set to be our type of ephemeral mount; may be added to */ 1130b9238976Sth esi->esi_mount_flags = NFSMNT_MIRRORMOUNT; 1131b9238976Sth 1132b9238976Sth /* 1133b9238976Sth * We're copying info from the stub rnode's servinfo4, but 1134b9238976Sth * we must create new copies, not pointers, since this information 1135b9238976Sth * is to be associated with the new mount, which will be 1136b9238976Sth * unmounted (and its structures freed) separately 1137b9238976Sth */ 1138b9238976Sth 1139b9238976Sth /* 1140b9238976Sth * Sizes passed to kmem_[z]alloc here must match those freed 1141b9238976Sth * in nfs4_free_args() 1142b9238976Sth */ 1143b9238976Sth 1144b9238976Sth /* 1145b9238976Sth * We hold sv_lock across kmem_zalloc() calls that may sleep, but this 1146b9238976Sth * is difficult to avoid: as we need to read svp to calculate the 1147b9238976Sth * sizes to be allocated. 1148b9238976Sth */ 1149b9238976Sth (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1150b9238976Sth 1151b9238976Sth esi->esi_hostname = kmem_zalloc(strlen(svp->sv_hostname) + 1, KM_SLEEP); 1152b9238976Sth (void) strcat(esi->esi_hostname, svp->sv_hostname); 1153b9238976Sth 1154b9238976Sth esi->esi_addr = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 1155b9238976Sth bufp = esi->esi_addr; 1156b9238976Sth bufp->len = svp->sv_addr.len; 1157b9238976Sth bufp->maxlen = svp->sv_addr.maxlen; 1158b9238976Sth bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1159b9238976Sth bcopy(svp->sv_addr.buf, bufp->buf, bufp->len); 1160b9238976Sth 1161b9238976Sth esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP); 1162b9238976Sth sikncp = esi->esi_knconf; 1163b9238976Sth svkncp = svp->sv_knconf; 1164b9238976Sth sikncp->knc_semantics = svkncp->knc_semantics; 1165b9238976Sth sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1166b9238976Sth (void) strcat((char *)sikncp->knc_protofmly, 1167b9238976Sth (char *)svkncp->knc_protofmly); 1168b9238976Sth sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1169b9238976Sth (void) strcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto); 1170b9238976Sth sikncp->knc_rdev = svkncp->knc_rdev; 1171b9238976Sth 1172b9238976Sth /* 1173b9238976Sth * Used when AUTH_DH is negotiated. 1174b9238976Sth * 1175b9238976Sth * This is ephemeral mount-type specific, since it contains the 1176b9238976Sth * server's time-sync syncaddr. 1177b9238976Sth */ 1178b9238976Sth if (svp->sv_dhsec) { 1179b9238976Sth struct netbuf *bufp; 1180b9238976Sth sec_data_t *sdata; 1181b9238976Sth dh_k4_clntdata_t *data; 1182b9238976Sth 1183b9238976Sth sdata = svp->sv_dhsec; 1184b9238976Sth data = (dh_k4_clntdata_t *)sdata->data; 1185b9238976Sth ASSERT(sdata->rpcflavor == AUTH_DH); 1186b9238976Sth 1187b9238976Sth bufp = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 1188b9238976Sth bufp->len = data->syncaddr.len; 1189b9238976Sth bufp->maxlen = data->syncaddr.maxlen; 1190b9238976Sth bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1191b9238976Sth bcopy(data->syncaddr.buf, bufp->buf, bufp->len); 1192b9238976Sth esi->esi_syncaddr = bufp; 1193b9238976Sth 1194b9238976Sth if (data->netname != NULL) { 1195b9238976Sth int nmlen = data->netnamelen; 1196b9238976Sth 1197b9238976Sth /* 1198b9238976Sth * We need to copy from a dh_k4_clntdata_t 1199b9238976Sth * netname/netnamelen pair to a NUL-terminated 1200b9238976Sth * netname string suitable for putting in nfs_args, 1201b9238976Sth * where the latter has no netnamelen field. 1202b9238976Sth */ 1203b9238976Sth esi->esi_netname = kmem_zalloc(nmlen + 1, KM_SLEEP); 1204b9238976Sth bcopy(data->netname, esi->esi_netname, nmlen); 1205b9238976Sth } 1206b9238976Sth } else { 1207b9238976Sth esi->esi_syncaddr = NULL; 1208b9238976Sth esi->esi_netname = NULL; 1209b9238976Sth } 1210b9238976Sth 1211b9238976Sth stubpath = fn_path(VTOSV(vp)->sv_name); 1212b9238976Sth /* step over initial '.', to avoid e.g. sv_path: "/tank./ws" */ 1213b9238976Sth ASSERT(*stubpath == '.'); 1214b9238976Sth stubpath += 1; 1215b9238976Sth 1216b9238976Sth /* for nfs_args->fh */ 1217*2f172c55SRobert Thurlow esi->esi_path_len = strlen(stubpath) + 1; 1218*2f172c55SRobert Thurlow if (strcmp(svp->sv_path, "/") != 0) 1219*2f172c55SRobert Thurlow esi->esi_path_len += strlen(svp->sv_path); 1220b9238976Sth esi->esi_path = kmem_zalloc(esi->esi_path_len, KM_SLEEP); 1221*2f172c55SRobert Thurlow if (strcmp(svp->sv_path, "/") != 0) 1222*2f172c55SRobert Thurlow (void) strcat(esi->esi_path, svp->sv_path); 1223b9238976Sth (void) strcat(esi->esi_path, stubpath); 1224b9238976Sth 1225b9238976Sth stubpath -= 1; 1226b9238976Sth /* stubpath allocated by fn_path() */ 1227b9238976Sth kmem_free(stubpath, strlen(stubpath) + 1); 1228b9238976Sth 1229b9238976Sth nfs_rw_exit(&svp->sv_lock); 1230b9238976Sth 1231b9238976Sth return (esi); 1232b9238976Sth } 1233b9238976Sth 1234*2f172c55SRobert Thurlow /* 1235*2f172c55SRobert Thurlow * Makes an upcall to NFSMAPID daemon to resolve hostname of NFS server to 1236*2f172c55SRobert Thurlow * get network information required to do the mount call. 1237*2f172c55SRobert Thurlow */ 1238*2f172c55SRobert Thurlow int 1239*2f172c55SRobert Thurlow nfs4_callmapid(utf8string *server, struct nfs_fsl_info *resp) 1240*2f172c55SRobert Thurlow { 1241*2f172c55SRobert Thurlow door_arg_t door_args; 1242*2f172c55SRobert Thurlow door_handle_t dh; 1243*2f172c55SRobert Thurlow XDR xdr; 1244*2f172c55SRobert Thurlow refd_door_args_t *xdr_argsp; 1245*2f172c55SRobert Thurlow refd_door_res_t *orig_resp; 1246*2f172c55SRobert Thurlow k_sigset_t smask; 1247*2f172c55SRobert Thurlow int xdr_len = 0; 1248*2f172c55SRobert Thurlow int res_len = 16; /* length of an ip adress */ 1249*2f172c55SRobert Thurlow int orig_reslen = res_len; 1250*2f172c55SRobert Thurlow int error = 0; 1251*2f172c55SRobert Thurlow struct nfsidmap_globals *nig; 1252*2f172c55SRobert Thurlow 1253*2f172c55SRobert Thurlow if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) 1254*2f172c55SRobert Thurlow return (ECONNREFUSED); 1255*2f172c55SRobert Thurlow 1256*2f172c55SRobert Thurlow nig = zone_getspecific(nfsidmap_zone_key, nfs_zone()); 1257*2f172c55SRobert Thurlow ASSERT(nig != NULL); 1258*2f172c55SRobert Thurlow 1259*2f172c55SRobert Thurlow mutex_enter(&nig->nfsidmap_daemon_lock); 1260*2f172c55SRobert Thurlow dh = nig->nfsidmap_daemon_dh; 1261*2f172c55SRobert Thurlow if (dh == NULL) { 1262*2f172c55SRobert Thurlow mutex_exit(&nig->nfsidmap_daemon_lock); 1263*2f172c55SRobert Thurlow cmn_err(CE_NOTE, 1264*2f172c55SRobert Thurlow "nfs4_callmapid: nfsmapid daemon not " \ 1265*2f172c55SRobert Thurlow "running unable to resolve host name\n"); 1266*2f172c55SRobert Thurlow return (EINVAL); 1267*2f172c55SRobert Thurlow } 1268*2f172c55SRobert Thurlow door_ki_hold(dh); 1269*2f172c55SRobert Thurlow mutex_exit(&nig->nfsidmap_daemon_lock); 1270*2f172c55SRobert Thurlow 1271*2f172c55SRobert Thurlow xdr_len = xdr_sizeof(&(xdr_utf8string), server); 1272*2f172c55SRobert Thurlow 1273*2f172c55SRobert Thurlow xdr_argsp = kmem_zalloc(xdr_len + sizeof (*xdr_argsp), KM_SLEEP); 1274*2f172c55SRobert Thurlow xdr_argsp->xdr_len = xdr_len; 1275*2f172c55SRobert Thurlow xdr_argsp->cmd = NFSMAPID_SRV_NETINFO; 1276*2f172c55SRobert Thurlow 1277*2f172c55SRobert Thurlow xdrmem_create(&xdr, (char *)&xdr_argsp->xdr_arg, 1278*2f172c55SRobert Thurlow xdr_len, XDR_ENCODE); 1279*2f172c55SRobert Thurlow 1280*2f172c55SRobert Thurlow if (!xdr_utf8string(&xdr, server)) { 1281*2f172c55SRobert Thurlow kmem_free(xdr_argsp, xdr_len + sizeof (*xdr_argsp)); 1282*2f172c55SRobert Thurlow door_ki_rele(dh); 1283*2f172c55SRobert Thurlow return (1); 1284*2f172c55SRobert Thurlow } 1285*2f172c55SRobert Thurlow 1286*2f172c55SRobert Thurlow if (orig_reslen) 1287*2f172c55SRobert Thurlow orig_resp = kmem_alloc(orig_reslen, KM_SLEEP); 1288*2f172c55SRobert Thurlow 1289*2f172c55SRobert Thurlow door_args.data_ptr = (char *)xdr_argsp; 1290*2f172c55SRobert Thurlow door_args.data_size = sizeof (*xdr_argsp) + xdr_argsp->xdr_len; 1291*2f172c55SRobert Thurlow door_args.desc_ptr = NULL; 1292*2f172c55SRobert Thurlow door_args.desc_num = 0; 1293*2f172c55SRobert Thurlow door_args.rbuf = orig_resp ? (char *)orig_resp : NULL; 1294*2f172c55SRobert Thurlow door_args.rsize = res_len; 1295*2f172c55SRobert Thurlow 1296*2f172c55SRobert Thurlow sigintr(&smask, 1); 1297*2f172c55SRobert Thurlow error = door_ki_upcall(dh, &door_args); 1298*2f172c55SRobert Thurlow sigunintr(&smask); 1299*2f172c55SRobert Thurlow 1300*2f172c55SRobert Thurlow door_ki_rele(dh); 1301*2f172c55SRobert Thurlow 1302*2f172c55SRobert Thurlow kmem_free(xdr_argsp, xdr_len + sizeof (*xdr_argsp)); 1303*2f172c55SRobert Thurlow if (error) { 1304*2f172c55SRobert Thurlow kmem_free(orig_resp, orig_reslen); 1305*2f172c55SRobert Thurlow /* 1306*2f172c55SRobert Thurlow * There is no door to connect to. The referral daemon 1307*2f172c55SRobert Thurlow * must not be running yet. 1308*2f172c55SRobert Thurlow */ 1309*2f172c55SRobert Thurlow cmn_err(CE_WARN, 1310*2f172c55SRobert Thurlow "nfsmapid not running cannot resolve host name"); 1311*2f172c55SRobert Thurlow goto out; 1312*2f172c55SRobert Thurlow } 1313*2f172c55SRobert Thurlow 1314*2f172c55SRobert Thurlow /* 1315*2f172c55SRobert Thurlow * If the results buffer passed back are not the same as 1316*2f172c55SRobert Thurlow * what was sent free the old buffer and use the new one. 1317*2f172c55SRobert Thurlow */ 1318*2f172c55SRobert Thurlow if (orig_resp && orig_reslen) { 1319*2f172c55SRobert Thurlow refd_door_res_t *door_resp; 1320*2f172c55SRobert Thurlow 1321*2f172c55SRobert Thurlow door_resp = (refd_door_res_t *)door_args.rbuf; 1322*2f172c55SRobert Thurlow if ((void *)door_args.rbuf != orig_resp) 1323*2f172c55SRobert Thurlow kmem_free(orig_resp, orig_reslen); 1324*2f172c55SRobert Thurlow if (door_resp->res_status == 0) { 1325*2f172c55SRobert Thurlow xdrmem_create(&xdr, (char *)&door_resp->xdr_res, 1326*2f172c55SRobert Thurlow door_resp->xdr_len, XDR_DECODE); 1327*2f172c55SRobert Thurlow bzero(resp, sizeof (struct nfs_fsl_info)); 1328*2f172c55SRobert Thurlow if (!xdr_nfs_fsl_info(&xdr, resp)) { 1329*2f172c55SRobert Thurlow DTRACE_PROBE2( 1330*2f172c55SRobert Thurlow nfs4clnt__debug__referral__upcall__xdrfail, 1331*2f172c55SRobert Thurlow struct nfs_fsl_info *, resp, 1332*2f172c55SRobert Thurlow char *, "nfs4_callmapid"); 1333*2f172c55SRobert Thurlow error = EINVAL; 1334*2f172c55SRobert Thurlow } 1335*2f172c55SRobert Thurlow } else { 1336*2f172c55SRobert Thurlow DTRACE_PROBE2( 1337*2f172c55SRobert Thurlow nfs4clnt__debug__referral__upcall__badstatus, 1338*2f172c55SRobert Thurlow int, door_resp->res_status, 1339*2f172c55SRobert Thurlow char *, "nfs4_callmapid"); 1340*2f172c55SRobert Thurlow error = door_resp->res_status; 1341*2f172c55SRobert Thurlow } 1342*2f172c55SRobert Thurlow kmem_free(door_args.rbuf, door_args.rsize); 1343*2f172c55SRobert Thurlow } 1344*2f172c55SRobert Thurlow out: 1345*2f172c55SRobert Thurlow DTRACE_PROBE2(nfs4clnt__func__referral__upcall, 1346*2f172c55SRobert Thurlow char *, server, int, error); 1347*2f172c55SRobert Thurlow return (error); 1348*2f172c55SRobert Thurlow } 1349*2f172c55SRobert Thurlow 1350*2f172c55SRobert Thurlow /* 1351*2f172c55SRobert Thurlow * Fetches the fs_locations attribute. Typically called 1352*2f172c55SRobert Thurlow * from a Replication/Migration/Referrals/Mirror-mount context 1353*2f172c55SRobert Thurlow * 1354*2f172c55SRobert Thurlow * Fills in the attributes in garp. The caller is assumed 1355*2f172c55SRobert Thurlow * to have allocated memory for garp. 1356*2f172c55SRobert Thurlow * 1357*2f172c55SRobert Thurlow * lock: if set do not lock s_recovlock and mi_recovlock mutex, 1358*2f172c55SRobert Thurlow * it's already done by caller. Otherwise lock these mutexes 1359*2f172c55SRobert Thurlow * before doing the rfs4call(). 1360*2f172c55SRobert Thurlow * 1361*2f172c55SRobert Thurlow * Returns 1362*2f172c55SRobert Thurlow * 1 for success 1363*2f172c55SRobert Thurlow * 0 for failure 1364*2f172c55SRobert Thurlow */ 1365*2f172c55SRobert Thurlow int 1366*2f172c55SRobert Thurlow nfs4_fetch_locations(mntinfo4_t *mi, nfs4_sharedfh_t *sfh, char *nm, 1367*2f172c55SRobert Thurlow cred_t *cr, nfs4_ga_res_t *garp, COMPOUND4res_clnt *callres, bool_t lock) 1368*2f172c55SRobert Thurlow { 1369*2f172c55SRobert Thurlow COMPOUND4args_clnt args; 1370*2f172c55SRobert Thurlow COMPOUND4res_clnt res; 1371*2f172c55SRobert Thurlow nfs_argop4 *argop; 1372*2f172c55SRobert Thurlow int argoplist_size = 3 * sizeof (nfs_argop4); 1373*2f172c55SRobert Thurlow nfs4_server_t *sp = NULL; 1374*2f172c55SRobert Thurlow int doqueue = 1; 1375*2f172c55SRobert Thurlow nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 1376*2f172c55SRobert Thurlow int retval = 1; 1377*2f172c55SRobert Thurlow struct nfs4_clnt *nfscl; 1378*2f172c55SRobert Thurlow 1379*2f172c55SRobert Thurlow if (lock == TRUE) 1380*2f172c55SRobert Thurlow (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 1381*2f172c55SRobert Thurlow else 1382*2f172c55SRobert Thurlow ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 1383*2f172c55SRobert Thurlow nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 1384*2f172c55SRobert Thurlow 1385*2f172c55SRobert Thurlow sp = find_nfs4_server(mi); 1386*2f172c55SRobert Thurlow if (lock == TRUE) 1387*2f172c55SRobert Thurlow nfs_rw_exit(&mi->mi_recovlock); 1388*2f172c55SRobert Thurlow 1389*2f172c55SRobert Thurlow if (sp != NULL) 1390*2f172c55SRobert Thurlow mutex_exit(&sp->s_lock); 1391*2f172c55SRobert Thurlow 1392*2f172c55SRobert Thurlow if (lock == TRUE) { 1393*2f172c55SRobert Thurlow if (sp != NULL) 1394*2f172c55SRobert Thurlow (void) nfs_rw_enter_sig(&sp->s_recovlock, 1395*2f172c55SRobert Thurlow RW_WRITER, 0); 1396*2f172c55SRobert Thurlow (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_WRITER, 0); 1397*2f172c55SRobert Thurlow } else { 1398*2f172c55SRobert Thurlow if (sp != NULL) { 1399*2f172c55SRobert Thurlow ASSERT(nfs_rw_lock_held(&sp->s_recovlock, RW_READER) || 1400*2f172c55SRobert Thurlow nfs_rw_lock_held(&sp->s_recovlock, RW_WRITER)); 1401*2f172c55SRobert Thurlow } 1402*2f172c55SRobert Thurlow } 1403*2f172c55SRobert Thurlow 1404*2f172c55SRobert Thurlow /* 1405*2f172c55SRobert Thurlow * Do we want to do the setup for recovery here? 1406*2f172c55SRobert Thurlow * 1407*2f172c55SRobert Thurlow * We know that the server responded to a null ping a very 1408*2f172c55SRobert Thurlow * short time ago, and we know that we intend to do a 1409*2f172c55SRobert Thurlow * single stateless operation - we want to fetch attributes, 1410*2f172c55SRobert Thurlow * so we know we can't encounter errors about state. If 1411*2f172c55SRobert Thurlow * something goes wrong with the GETATTR, like not being 1412*2f172c55SRobert Thurlow * able to get a response from the server or getting any 1413*2f172c55SRobert Thurlow * kind of FH error, we should fail the mount. 1414*2f172c55SRobert Thurlow * 1415*2f172c55SRobert Thurlow * We may want to re-visited this at a later time. 1416*2f172c55SRobert Thurlow */ 1417*2f172c55SRobert Thurlow argop = kmem_alloc(argoplist_size, KM_SLEEP); 1418*2f172c55SRobert Thurlow 1419*2f172c55SRobert Thurlow args.ctag = TAG_GETATTR_FSLOCATION; 1420*2f172c55SRobert Thurlow /* PUTFH LOOKUP GETATTR */ 1421*2f172c55SRobert Thurlow args.array_len = 3; 1422*2f172c55SRobert Thurlow args.array = argop; 1423*2f172c55SRobert Thurlow 1424*2f172c55SRobert Thurlow /* 0. putfh file */ 1425*2f172c55SRobert Thurlow argop[0].argop = OP_CPUTFH; 1426*2f172c55SRobert Thurlow argop[0].nfs_argop4_u.opcputfh.sfh = sfh; 1427*2f172c55SRobert Thurlow 1428*2f172c55SRobert Thurlow /* 1. lookup name, can't be dotdot */ 1429*2f172c55SRobert Thurlow argop[1].argop = OP_CLOOKUP; 1430*2f172c55SRobert Thurlow argop[1].nfs_argop4_u.opclookup.cname = nm; 1431*2f172c55SRobert Thurlow 1432*2f172c55SRobert Thurlow /* 2. file attrs */ 1433*2f172c55SRobert Thurlow argop[2].argop = OP_GETATTR; 1434*2f172c55SRobert Thurlow argop[2].nfs_argop4_u.opgetattr.attr_request = 1435*2f172c55SRobert Thurlow FATTR4_FSID_MASK | FATTR4_FS_LOCATIONS_MASK | 1436*2f172c55SRobert Thurlow FATTR4_MOUNTED_ON_FILEID_MASK; 1437*2f172c55SRobert Thurlow argop[2].nfs_argop4_u.opgetattr.mi = mi; 1438*2f172c55SRobert Thurlow 1439*2f172c55SRobert Thurlow rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 1440*2f172c55SRobert Thurlow 1441*2f172c55SRobert Thurlow if (lock == TRUE) { 1442*2f172c55SRobert Thurlow nfs_rw_exit(&mi->mi_recovlock); 1443*2f172c55SRobert Thurlow if (sp != NULL) 1444*2f172c55SRobert Thurlow nfs_rw_exit(&sp->s_recovlock); 1445*2f172c55SRobert Thurlow } 1446*2f172c55SRobert Thurlow 1447*2f172c55SRobert Thurlow nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone()); 1448*2f172c55SRobert Thurlow nfscl->nfscl_stat.referrals.value.ui64++; 1449*2f172c55SRobert Thurlow DTRACE_PROBE3(nfs4clnt__func__referral__fsloc, 1450*2f172c55SRobert Thurlow nfs4_sharedfh_t *, sfh, char *, nm, nfs4_error_t *, &e); 1451*2f172c55SRobert Thurlow 1452*2f172c55SRobert Thurlow if (e.error != 0) { 1453*2f172c55SRobert Thurlow if (sp != NULL) 1454*2f172c55SRobert Thurlow nfs4_server_rele(sp); 1455*2f172c55SRobert Thurlow kmem_free(argop, argoplist_size); 1456*2f172c55SRobert Thurlow return (0); 1457*2f172c55SRobert Thurlow } 1458*2f172c55SRobert Thurlow 1459*2f172c55SRobert Thurlow /* 1460*2f172c55SRobert Thurlow * Check for all possible error conditions. 1461*2f172c55SRobert Thurlow * For valid replies without an ops array or for illegal 1462*2f172c55SRobert Thurlow * replies, return a failure. 1463*2f172c55SRobert Thurlow */ 1464*2f172c55SRobert Thurlow if (res.status != NFS4_OK || res.array_len < 3 || 1465*2f172c55SRobert Thurlow res.array[2].nfs_resop4_u.opgetattr.status != NFS4_OK) { 1466*2f172c55SRobert Thurlow retval = 0; 1467*2f172c55SRobert Thurlow goto exit; 1468*2f172c55SRobert Thurlow } 1469*2f172c55SRobert Thurlow 1470*2f172c55SRobert Thurlow /* 1471*2f172c55SRobert Thurlow * There isn't much value in putting the attributes 1472*2f172c55SRobert Thurlow * in the attr cache since fs_locations4 aren't 1473*2f172c55SRobert Thurlow * encountered very frequently, so just make them 1474*2f172c55SRobert Thurlow * available to the caller. 1475*2f172c55SRobert Thurlow */ 1476*2f172c55SRobert Thurlow *garp = res.array[2].nfs_resop4_u.opgetattr.ga_res; 1477*2f172c55SRobert Thurlow 1478*2f172c55SRobert Thurlow DTRACE_PROBE2(nfs4clnt__debug__referral__fsloc, 1479*2f172c55SRobert Thurlow nfs4_ga_res_t *, garp, char *, "nfs4_fetch_locations"); 1480*2f172c55SRobert Thurlow 1481*2f172c55SRobert Thurlow /* No fs_locations? -- return a failure */ 1482*2f172c55SRobert Thurlow if (garp->n4g_ext_res == NULL || 1483*2f172c55SRobert Thurlow garp->n4g_ext_res->n4g_fslocations.locations_val == NULL) { 1484*2f172c55SRobert Thurlow retval = 0; 1485*2f172c55SRobert Thurlow goto exit; 1486*2f172c55SRobert Thurlow } 1487*2f172c55SRobert Thurlow 1488*2f172c55SRobert Thurlow if (!garp->n4g_fsid_valid) 1489*2f172c55SRobert Thurlow retval = 0; 1490*2f172c55SRobert Thurlow 1491*2f172c55SRobert Thurlow exit: 1492*2f172c55SRobert Thurlow if (retval == 0) { 1493*2f172c55SRobert Thurlow /* the call was ok but failed validating the call results */ 1494*2f172c55SRobert Thurlow (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1495*2f172c55SRobert Thurlow } else { 1496*2f172c55SRobert Thurlow ASSERT(callres != NULL); 1497*2f172c55SRobert Thurlow *callres = res; 1498*2f172c55SRobert Thurlow } 1499*2f172c55SRobert Thurlow 1500*2f172c55SRobert Thurlow if (sp != NULL) 1501*2f172c55SRobert Thurlow nfs4_server_rele(sp); 1502*2f172c55SRobert Thurlow kmem_free(argop, argoplist_size); 1503*2f172c55SRobert Thurlow return (retval); 1504*2f172c55SRobert Thurlow } 1505*2f172c55SRobert Thurlow 1506*2f172c55SRobert Thurlow /* tunable to disable referral mounts */ 1507*2f172c55SRobert Thurlow int nfs4_no_referrals = 0; 1508*2f172c55SRobert Thurlow 1509*2f172c55SRobert Thurlow /* 1510*2f172c55SRobert Thurlow * Returns NULL if the vnode cannot be created or found. 1511*2f172c55SRobert Thurlow */ 1512*2f172c55SRobert Thurlow vnode_t * 1513*2f172c55SRobert Thurlow find_referral_stubvp(vnode_t *dvp, char *nm, cred_t *cr) 1514*2f172c55SRobert Thurlow { 1515*2f172c55SRobert Thurlow nfs_fh4 *stub_fh, *dfh; 1516*2f172c55SRobert Thurlow nfs4_sharedfh_t *sfhp; 1517*2f172c55SRobert Thurlow char *newfhval; 1518*2f172c55SRobert Thurlow vnode_t *vp = NULL; 1519*2f172c55SRobert Thurlow fattr4_mounted_on_fileid mnt_on_fileid; 1520*2f172c55SRobert Thurlow nfs4_ga_res_t garp; 1521*2f172c55SRobert Thurlow mntinfo4_t *mi; 1522*2f172c55SRobert Thurlow COMPOUND4res_clnt callres; 1523*2f172c55SRobert Thurlow hrtime_t t; 1524*2f172c55SRobert Thurlow 1525*2f172c55SRobert Thurlow if (nfs4_no_referrals) 1526*2f172c55SRobert Thurlow return (NULL); 1527*2f172c55SRobert Thurlow 1528*2f172c55SRobert Thurlow /* 1529*2f172c55SRobert Thurlow * Get the mounted_on_fileid, unique on that server::fsid 1530*2f172c55SRobert Thurlow */ 1531*2f172c55SRobert Thurlow mi = VTOMI4(dvp); 1532*2f172c55SRobert Thurlow if (nfs4_fetch_locations(mi, VTOR4(dvp)->r_fh, nm, cr, 1533*2f172c55SRobert Thurlow &garp, &callres, FALSE) == 0) 1534*2f172c55SRobert Thurlow return (NULL); 1535*2f172c55SRobert Thurlow mnt_on_fileid = garp.n4g_mon_fid; 1536*2f172c55SRobert Thurlow (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1537*2f172c55SRobert Thurlow 1538*2f172c55SRobert Thurlow /* 1539*2f172c55SRobert Thurlow * Build a fake filehandle from the dir FH and the mounted_on_fileid 1540*2f172c55SRobert Thurlow */ 1541*2f172c55SRobert Thurlow dfh = &VTOR4(dvp)->r_fh->sfh_fh; 1542*2f172c55SRobert Thurlow stub_fh = kmem_alloc(sizeof (nfs_fh4), KM_SLEEP); 1543*2f172c55SRobert Thurlow stub_fh->nfs_fh4_val = kmem_alloc(dfh->nfs_fh4_len + 1544*2f172c55SRobert Thurlow sizeof (fattr4_mounted_on_fileid), KM_SLEEP); 1545*2f172c55SRobert Thurlow newfhval = stub_fh->nfs_fh4_val; 1546*2f172c55SRobert Thurlow 1547*2f172c55SRobert Thurlow /* copy directory's file handle */ 1548*2f172c55SRobert Thurlow bcopy(dfh->nfs_fh4_val, newfhval, dfh->nfs_fh4_len); 1549*2f172c55SRobert Thurlow stub_fh->nfs_fh4_len = dfh->nfs_fh4_len; 1550*2f172c55SRobert Thurlow newfhval = newfhval + dfh->nfs_fh4_len; 1551*2f172c55SRobert Thurlow 1552*2f172c55SRobert Thurlow /* Add mounted_on_fileid. Use bcopy to avoid alignment problem */ 1553*2f172c55SRobert Thurlow bcopy((char *)&mnt_on_fileid, newfhval, 1554*2f172c55SRobert Thurlow sizeof (fattr4_mounted_on_fileid)); 1555*2f172c55SRobert Thurlow stub_fh->nfs_fh4_len += sizeof (fattr4_mounted_on_fileid); 1556*2f172c55SRobert Thurlow 1557*2f172c55SRobert Thurlow sfhp = sfh4_put(stub_fh, VTOMI4(dvp), NULL); 1558*2f172c55SRobert Thurlow kmem_free(stub_fh->nfs_fh4_val, dfh->nfs_fh4_len + 1559*2f172c55SRobert Thurlow sizeof (fattr4_mounted_on_fileid)); 1560*2f172c55SRobert Thurlow kmem_free(stub_fh, sizeof (nfs_fh4)); 1561*2f172c55SRobert Thurlow if (sfhp == NULL) 1562*2f172c55SRobert Thurlow return (NULL); 1563*2f172c55SRobert Thurlow 1564*2f172c55SRobert Thurlow t = gethrtime(); 1565*2f172c55SRobert Thurlow garp.n4g_va.va_type = VDIR; 1566*2f172c55SRobert Thurlow vp = makenfs4node(sfhp, NULL, dvp->v_vfsp, t, 1567*2f172c55SRobert Thurlow cr, dvp, fn_get(VTOSV(dvp)->sv_name, nm, sfhp)); 1568*2f172c55SRobert Thurlow 1569*2f172c55SRobert Thurlow if (vp != NULL) 1570*2f172c55SRobert Thurlow vp->v_type = VDIR; 1571*2f172c55SRobert Thurlow 1572*2f172c55SRobert Thurlow sfh4_rele(&sfhp); 1573*2f172c55SRobert Thurlow return (vp); 1574*2f172c55SRobert Thurlow } 1575*2f172c55SRobert Thurlow 1576*2f172c55SRobert Thurlow int 1577*2f172c55SRobert Thurlow nfs4_setup_referral(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr) 1578*2f172c55SRobert Thurlow { 1579*2f172c55SRobert Thurlow vnode_t *nvp; 1580*2f172c55SRobert Thurlow rnode4_t *rp; 1581*2f172c55SRobert Thurlow 1582*2f172c55SRobert Thurlow if ((nvp = find_referral_stubvp(dvp, nm, cr)) == NULL) 1583*2f172c55SRobert Thurlow return (EINVAL); 1584*2f172c55SRobert Thurlow 1585*2f172c55SRobert Thurlow rp = VTOR4(nvp); 1586*2f172c55SRobert Thurlow mutex_enter(&rp->r_statelock); 1587*2f172c55SRobert Thurlow r4_stub_referral(rp); 1588*2f172c55SRobert Thurlow mutex_exit(&rp->r_statelock); 1589*2f172c55SRobert Thurlow dnlc_enter(dvp, nm, nvp); 1590*2f172c55SRobert Thurlow 1591*2f172c55SRobert Thurlow if (*vpp != NULL) 1592*2f172c55SRobert Thurlow VN_RELE(*vpp); /* no longer need this vnode */ 1593*2f172c55SRobert Thurlow 1594*2f172c55SRobert Thurlow *vpp = nvp; 1595*2f172c55SRobert Thurlow 1596*2f172c55SRobert Thurlow return (0); 1597*2f172c55SRobert Thurlow } 1598*2f172c55SRobert Thurlow 1599*2f172c55SRobert Thurlow /* 1600*2f172c55SRobert Thurlow * Fetch the location information and resolve the new server. 1601*2f172c55SRobert Thurlow * Caller needs to free up the XDR data which is returned. 1602*2f172c55SRobert Thurlow * Input: mount info, shared filehandle, nodename 1603*2f172c55SRobert Thurlow * Return: Index to the result or Error(-1) 1604*2f172c55SRobert Thurlow * Output: FsLocations Info, Resolved Server Info. 1605*2f172c55SRobert Thurlow */ 1606*2f172c55SRobert Thurlow int 1607*2f172c55SRobert Thurlow nfs4_process_referral(mntinfo4_t *mi, nfs4_sharedfh_t *sfh, 1608*2f172c55SRobert Thurlow char *nm, cred_t *cr, nfs4_ga_res_t *grp, COMPOUND4res_clnt *res, 1609*2f172c55SRobert Thurlow struct nfs_fsl_info *fsloc) 1610*2f172c55SRobert Thurlow { 1611*2f172c55SRobert Thurlow fs_location4 *fsp; 1612*2f172c55SRobert Thurlow struct nfs_fsl_info nfsfsloc; 1613*2f172c55SRobert Thurlow int ret, i, error; 1614*2f172c55SRobert Thurlow nfs4_ga_res_t garp; 1615*2f172c55SRobert Thurlow COMPOUND4res_clnt callres; 1616*2f172c55SRobert Thurlow struct knetconfig *knc; 1617*2f172c55SRobert Thurlow 1618*2f172c55SRobert Thurlow ret = nfs4_fetch_locations(mi, sfh, nm, cr, &garp, &callres, TRUE); 1619*2f172c55SRobert Thurlow if (ret == 0) 1620*2f172c55SRobert Thurlow return (-1); 1621*2f172c55SRobert Thurlow 1622*2f172c55SRobert Thurlow /* 1623*2f172c55SRobert Thurlow * As a lame attempt to figuring out if we're 1624*2f172c55SRobert Thurlow * handling a migration event or a referral, 1625*2f172c55SRobert Thurlow * look for rnodes with this fsid in the rnode 1626*2f172c55SRobert Thurlow * cache. 1627*2f172c55SRobert Thurlow * 1628*2f172c55SRobert Thurlow * If we can find one or more such rnodes, it 1629*2f172c55SRobert Thurlow * means we're handling a migration event and 1630*2f172c55SRobert Thurlow * we want to bail out in that case. 1631*2f172c55SRobert Thurlow */ 1632*2f172c55SRobert Thurlow if (r4find_by_fsid(mi, &garp.n4g_fsid)) { 1633*2f172c55SRobert Thurlow DTRACE_PROBE3(nfs4clnt__debug__referral__migration, 1634*2f172c55SRobert Thurlow mntinfo4_t *, mi, nfs4_ga_res_t *, &garp, 1635*2f172c55SRobert Thurlow char *, "nfs4_process_referral"); 1636*2f172c55SRobert Thurlow (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1637*2f172c55SRobert Thurlow return (-1); 1638*2f172c55SRobert Thurlow } 1639*2f172c55SRobert Thurlow 1640*2f172c55SRobert Thurlow /* 1641*2f172c55SRobert Thurlow * Find the first responsive server to mount. When we find 1642*2f172c55SRobert Thurlow * one, fsp will point to it. 1643*2f172c55SRobert Thurlow */ 1644*2f172c55SRobert Thurlow for (i = 0; i < garp.n4g_ext_res->n4g_fslocations.locations_len; i++) { 1645*2f172c55SRobert Thurlow 1646*2f172c55SRobert Thurlow fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[i]; 1647*2f172c55SRobert Thurlow if (fsp->server_len == 0 || fsp->server_val == NULL) 1648*2f172c55SRobert Thurlow continue; 1649*2f172c55SRobert Thurlow 1650*2f172c55SRobert Thurlow error = nfs4_callmapid(fsp->server_val, &nfsfsloc); 1651*2f172c55SRobert Thurlow if (error != 0) 1652*2f172c55SRobert Thurlow continue; 1653*2f172c55SRobert Thurlow 1654*2f172c55SRobert Thurlow error = nfs4_ping_server_common(nfsfsloc.knconf, 1655*2f172c55SRobert Thurlow nfsfsloc.addr, !(mi->mi_flags & MI4_INT)); 1656*2f172c55SRobert Thurlow if (error == RPC_SUCCESS) 1657*2f172c55SRobert Thurlow break; 1658*2f172c55SRobert Thurlow 1659*2f172c55SRobert Thurlow DTRACE_PROBE2(nfs4clnt__debug__referral__srvaddr, 1660*2f172c55SRobert Thurlow sockaddr_in *, (struct sockaddr_in *)nfsfsloc.addr->buf, 1661*2f172c55SRobert Thurlow char *, "nfs4_process_referral"); 1662*2f172c55SRobert Thurlow 1663*2f172c55SRobert Thurlow (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); 1664*2f172c55SRobert Thurlow } 1665*2f172c55SRobert Thurlow knc = nfsfsloc.knconf; 1666*2f172c55SRobert Thurlow if ((i >= garp.n4g_ext_res->n4g_fslocations.locations_len) || 1667*2f172c55SRobert Thurlow (knc->knc_protofmly == NULL) || (knc->knc_proto == NULL)) { 1668*2f172c55SRobert Thurlow DTRACE_PROBE2(nfs4clnt__debug__referral__nofsloc, 1669*2f172c55SRobert Thurlow nfs4_ga_res_t *, &garp, char *, "nfs4_process_referral"); 1670*2f172c55SRobert Thurlow (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1671*2f172c55SRobert Thurlow return (-1); 1672*2f172c55SRobert Thurlow } 1673*2f172c55SRobert Thurlow 1674*2f172c55SRobert Thurlow /* Send the results back */ 1675*2f172c55SRobert Thurlow *fsloc = nfsfsloc; 1676*2f172c55SRobert Thurlow *grp = garp; 1677*2f172c55SRobert Thurlow *res = callres; 1678*2f172c55SRobert Thurlow return (i); 1679*2f172c55SRobert Thurlow } 1680*2f172c55SRobert Thurlow 1681*2f172c55SRobert Thurlow /* 1682*2f172c55SRobert Thurlow * Referrals case - need to fetch referral data and then upcall to 1683*2f172c55SRobert Thurlow * user-level to get complete mount data. 1684*2f172c55SRobert Thurlow */ 1685*2f172c55SRobert Thurlow static ephemeral_servinfo_t * 1686*2f172c55SRobert Thurlow nfs4_trigger_esi_create_referral(vnode_t *vp, cred_t *cr) 1687*2f172c55SRobert Thurlow { 1688*2f172c55SRobert Thurlow struct knetconfig *sikncp, *svkncp; 1689*2f172c55SRobert Thurlow struct netbuf *bufp; 1690*2f172c55SRobert Thurlow ephemeral_servinfo_t *esi; 1691*2f172c55SRobert Thurlow vnode_t *dvp; 1692*2f172c55SRobert Thurlow rnode4_t *drp; 1693*2f172c55SRobert Thurlow fs_location4 *fsp; 1694*2f172c55SRobert Thurlow struct nfs_fsl_info nfsfsloc; 1695*2f172c55SRobert Thurlow nfs4_ga_res_t garp; 1696*2f172c55SRobert Thurlow char *p; 1697*2f172c55SRobert Thurlow char fn[MAXNAMELEN]; 1698*2f172c55SRobert Thurlow int i, index = -1; 1699*2f172c55SRobert Thurlow mntinfo4_t *mi; 1700*2f172c55SRobert Thurlow COMPOUND4res_clnt callres; 1701*2f172c55SRobert Thurlow 1702*2f172c55SRobert Thurlow /* 1703*2f172c55SRobert Thurlow * If we're passed in a stub vnode that 1704*2f172c55SRobert Thurlow * isn't a "referral" stub, bail out 1705*2f172c55SRobert Thurlow * and return a failure 1706*2f172c55SRobert Thurlow */ 1707*2f172c55SRobert Thurlow if (!RP_ISSTUB_REFERRAL(VTOR4(vp))) 1708*2f172c55SRobert Thurlow return (NULL); 1709*2f172c55SRobert Thurlow 1710*2f172c55SRobert Thurlow if (vtodv(vp, &dvp, CRED(), TRUE) != 0) 1711*2f172c55SRobert Thurlow return (NULL); 1712*2f172c55SRobert Thurlow 1713*2f172c55SRobert Thurlow drp = VTOR4(dvp); 1714*2f172c55SRobert Thurlow if (nfs_rw_enter_sig(&drp->r_rwlock, RW_READER, INTR4(dvp))) { 1715*2f172c55SRobert Thurlow VN_RELE(dvp); 1716*2f172c55SRobert Thurlow return (NULL); 1717*2f172c55SRobert Thurlow } 1718*2f172c55SRobert Thurlow 1719*2f172c55SRobert Thurlow if (vtoname(vp, fn, MAXNAMELEN) != 0) { 1720*2f172c55SRobert Thurlow nfs_rw_exit(&drp->r_rwlock); 1721*2f172c55SRobert Thurlow VN_RELE(dvp); 1722*2f172c55SRobert Thurlow return (NULL); 1723*2f172c55SRobert Thurlow } 1724*2f172c55SRobert Thurlow 1725*2f172c55SRobert Thurlow mi = VTOMI4(dvp); 1726*2f172c55SRobert Thurlow index = nfs4_process_referral(mi, drp->r_fh, fn, cr, 1727*2f172c55SRobert Thurlow &garp, &callres, &nfsfsloc); 1728*2f172c55SRobert Thurlow nfs_rw_exit(&drp->r_rwlock); 1729*2f172c55SRobert Thurlow VN_RELE(dvp); 1730*2f172c55SRobert Thurlow if (index < 0) 1731*2f172c55SRobert Thurlow return (NULL); 1732*2f172c55SRobert Thurlow 1733*2f172c55SRobert Thurlow fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[index]; 1734*2f172c55SRobert Thurlow esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP); 1735*2f172c55SRobert Thurlow 1736*2f172c55SRobert Thurlow /* initially set to be our type of ephemeral mount; may be added to */ 1737*2f172c55SRobert Thurlow esi->esi_mount_flags = NFSMNT_REFERRAL; 1738*2f172c55SRobert Thurlow 1739*2f172c55SRobert Thurlow esi->esi_hostname = 1740*2f172c55SRobert Thurlow kmem_zalloc(fsp->server_val->utf8string_len + 1, KM_SLEEP); 1741*2f172c55SRobert Thurlow bcopy(fsp->server_val->utf8string_val, esi->esi_hostname, 1742*2f172c55SRobert Thurlow fsp->server_val->utf8string_len); 1743*2f172c55SRobert Thurlow esi->esi_hostname[fsp->server_val->utf8string_len] = '\0'; 1744*2f172c55SRobert Thurlow 1745*2f172c55SRobert Thurlow bufp = kmem_alloc(sizeof (struct netbuf), KM_SLEEP); 1746*2f172c55SRobert Thurlow bufp->len = nfsfsloc.addr->len; 1747*2f172c55SRobert Thurlow bufp->maxlen = nfsfsloc.addr->maxlen; 1748*2f172c55SRobert Thurlow bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1749*2f172c55SRobert Thurlow bcopy(nfsfsloc.addr->buf, bufp->buf, bufp->len); 1750*2f172c55SRobert Thurlow esi->esi_addr = bufp; 1751*2f172c55SRobert Thurlow 1752*2f172c55SRobert Thurlow esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP); 1753*2f172c55SRobert Thurlow sikncp = esi->esi_knconf; 1754*2f172c55SRobert Thurlow 1755*2f172c55SRobert Thurlow DTRACE_PROBE2(nfs4clnt__debug__referral__nfsfsloc, 1756*2f172c55SRobert Thurlow struct nfs_fsl_info *, &nfsfsloc, 1757*2f172c55SRobert Thurlow char *, "nfs4_trigger_esi_create_referral"); 1758*2f172c55SRobert Thurlow 1759*2f172c55SRobert Thurlow svkncp = nfsfsloc.knconf; 1760*2f172c55SRobert Thurlow sikncp->knc_semantics = svkncp->knc_semantics; 1761*2f172c55SRobert Thurlow sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1762*2f172c55SRobert Thurlow (void) strlcat((char *)sikncp->knc_protofmly, 1763*2f172c55SRobert Thurlow (char *)svkncp->knc_protofmly, KNC_STRSIZE); 1764*2f172c55SRobert Thurlow sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1765*2f172c55SRobert Thurlow (void) strlcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto, 1766*2f172c55SRobert Thurlow KNC_STRSIZE); 1767*2f172c55SRobert Thurlow sikncp->knc_rdev = svkncp->knc_rdev; 1768*2f172c55SRobert Thurlow 1769*2f172c55SRobert Thurlow DTRACE_PROBE2(nfs4clnt__debug__referral__knetconf, 1770*2f172c55SRobert Thurlow struct knetconfig *, sikncp, 1771*2f172c55SRobert Thurlow char *, "nfs4_trigger_esi_create_referral"); 1772*2f172c55SRobert Thurlow 1773*2f172c55SRobert Thurlow esi->esi_netname = kmem_zalloc(nfsfsloc.netnm_len, KM_SLEEP); 1774*2f172c55SRobert Thurlow bcopy(nfsfsloc.netname, esi->esi_netname, nfsfsloc.netnm_len); 1775*2f172c55SRobert Thurlow esi->esi_syncaddr = NULL; 1776*2f172c55SRobert Thurlow 1777*2f172c55SRobert Thurlow esi->esi_path = p = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1778*2f172c55SRobert Thurlow esi->esi_path_len = MAXPATHLEN; 1779*2f172c55SRobert Thurlow *p++ = '/'; 1780*2f172c55SRobert Thurlow for (i = 0; i < fsp->rootpath.pathname4_len; i++) { 1781*2f172c55SRobert Thurlow component4 *comp; 1782*2f172c55SRobert Thurlow 1783*2f172c55SRobert Thurlow comp = &fsp->rootpath.pathname4_val[i]; 1784*2f172c55SRobert Thurlow /* If no space, null the string and bail */ 1785*2f172c55SRobert Thurlow if ((p - esi->esi_path) + comp->utf8string_len + 1 > MAXPATHLEN) 1786*2f172c55SRobert Thurlow goto err; 1787*2f172c55SRobert Thurlow bcopy(comp->utf8string_val, p, comp->utf8string_len); 1788*2f172c55SRobert Thurlow p += comp->utf8string_len; 1789*2f172c55SRobert Thurlow *p++ = '/'; 1790*2f172c55SRobert Thurlow } 1791*2f172c55SRobert Thurlow if (fsp->rootpath.pathname4_len != 0) 1792*2f172c55SRobert Thurlow *(p - 1) = '\0'; 1793*2f172c55SRobert Thurlow else 1794*2f172c55SRobert Thurlow *p = '\0'; 1795*2f172c55SRobert Thurlow p = esi->esi_path; 1796*2f172c55SRobert Thurlow esi->esi_path = strdup(p); 1797*2f172c55SRobert Thurlow esi->esi_path_len = strlen(p) + 1; 1798*2f172c55SRobert Thurlow kmem_free(p, MAXPATHLEN); 1799*2f172c55SRobert Thurlow 1800*2f172c55SRobert Thurlow /* Allocated in nfs4_process_referral() */ 1801*2f172c55SRobert Thurlow (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); 1802*2f172c55SRobert Thurlow (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1803*2f172c55SRobert Thurlow 1804*2f172c55SRobert Thurlow return (esi); 1805*2f172c55SRobert Thurlow err: 1806*2f172c55SRobert Thurlow kmem_free(esi->esi_path, esi->esi_path_len); 1807*2f172c55SRobert Thurlow kmem_free(esi->esi_hostname, fsp->server_val->utf8string_len + 1); 1808*2f172c55SRobert Thurlow kmem_free(esi->esi_addr->buf, esi->esi_addr->len); 1809*2f172c55SRobert Thurlow kmem_free(esi->esi_addr, sizeof (struct netbuf)); 1810*2f172c55SRobert Thurlow kmem_free(esi->esi_knconf->knc_protofmly, KNC_STRSIZE); 1811*2f172c55SRobert Thurlow kmem_free(esi->esi_knconf->knc_proto, KNC_STRSIZE); 1812*2f172c55SRobert Thurlow kmem_free(esi->esi_knconf, sizeof (*esi->esi_knconf)); 1813*2f172c55SRobert Thurlow kmem_free(esi->esi_netname, nfsfsloc.netnm_len); 1814*2f172c55SRobert Thurlow kmem_free(esi, sizeof (ephemeral_servinfo_t)); 1815*2f172c55SRobert Thurlow (void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc); 1816*2f172c55SRobert Thurlow (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres); 1817*2f172c55SRobert Thurlow return (NULL); 1818*2f172c55SRobert Thurlow } 1819*2f172c55SRobert Thurlow 1820b9238976Sth /* 1821b9238976Sth * Assemble the args, and call the generic VFS mount function to 1822b9238976Sth * finally perform the ephemeral mount. 1823b9238976Sth */ 1824b9238976Sth static int 1825b9238976Sth nfs4_trigger_domount(vnode_t *stubvp, domount_args_t *dma, vfs_t **vfsp, 18266962f5b8SThomas Haynes cred_t *cr, vnode_t **newvpp) 1827b9238976Sth { 1828b9238976Sth struct mounta *uap; 1829b9238976Sth char *mntpt, *orig_path, *path; 1830b9238976Sth const char *orig_mntpt; 1831b9238976Sth int retval; 1832b9238976Sth int mntpt_len; 1833b9238976Sth int spec_len; 1834b9238976Sth zone_t *zone = curproc->p_zone; 1835b9238976Sth bool_t has_leading_slash; 18366962f5b8SThomas Haynes int i; 1837b9238976Sth 1838b9238976Sth vfs_t *stubvfsp = stubvp->v_vfsp; 1839b9238976Sth ephemeral_servinfo_t *esi = dma->dma_esi; 1840b9238976Sth struct nfs_args *nargs = dma->dma_nargs; 1841b9238976Sth 1842b9238976Sth /* first, construct the mount point for the ephemeral mount */ 1843b9238976Sth orig_path = path = fn_path(VTOSV(stubvp)->sv_name); 1844b9238976Sth orig_mntpt = (char *)refstr_value(stubvfsp->vfs_mntpt); 1845b9238976Sth 1846b9238976Sth if (*orig_path == '.') 1847b9238976Sth orig_path++; 1848b9238976Sth 1849b9238976Sth /* 1850b9238976Sth * Get rid of zone's root path 1851b9238976Sth */ 1852b9238976Sth if (zone != global_zone) { 1853b9238976Sth /* 1854b9238976Sth * -1 for trailing '/' and -1 for EOS. 1855b9238976Sth */ 1856b9238976Sth if (strncmp(zone->zone_rootpath, orig_mntpt, 1857b9238976Sth zone->zone_rootpathlen - 1) == 0) { 1858b9238976Sth orig_mntpt += (zone->zone_rootpathlen - 2); 1859b9238976Sth } 1860b9238976Sth } 1861b9238976Sth 1862b9238976Sth mntpt_len = strlen(orig_mntpt) + strlen(orig_path); 1863b9238976Sth mntpt = kmem_zalloc(mntpt_len + 1, KM_SLEEP); 1864b9238976Sth (void) strcat(mntpt, orig_mntpt); 1865b9238976Sth (void) strcat(mntpt, orig_path); 1866b9238976Sth 1867b9238976Sth kmem_free(path, strlen(path) + 1); 1868b9238976Sth path = esi->esi_path; 1869b9238976Sth if (*path == '.') 1870b9238976Sth path++; 1871b9238976Sth if (path[0] == '/' && path[1] == '/') 1872b9238976Sth path++; 1873b9238976Sth has_leading_slash = (*path == '/'); 1874b9238976Sth 1875b9238976Sth spec_len = strlen(dma->dma_hostlist); 1876b9238976Sth spec_len += strlen(path); 1877b9238976Sth 1878b9238976Sth /* We are going to have to add this in */ 1879b9238976Sth if (!has_leading_slash) 1880b9238976Sth spec_len++; 1881b9238976Sth 1882b9238976Sth /* We need to get the ':' for dma_hostlist:esi_path */ 1883b9238976Sth spec_len++; 1884b9238976Sth 1885b9238976Sth uap = kmem_zalloc(sizeof (struct mounta), KM_SLEEP); 1886b9238976Sth uap->spec = kmem_zalloc(spec_len + 1, KM_SLEEP); 1887b9238976Sth (void) snprintf(uap->spec, spec_len + 1, "%s:%s%s", dma->dma_hostlist, 1888b9238976Sth has_leading_slash ? "" : "/", path); 1889b9238976Sth 1890b9238976Sth uap->dir = mntpt; 1891b9238976Sth 1892b9238976Sth uap->flags = MS_SYSSPACE | MS_DATA; 1893b9238976Sth /* fstype-independent mount options not covered elsewhere */ 1894b9238976Sth /* copy parent's mount(1M) "-m" flag */ 1895b9238976Sth if (stubvfsp->vfs_flag & VFS_NOMNTTAB) 1896b9238976Sth uap->flags |= MS_NOMNTTAB; 1897b9238976Sth 1898b9238976Sth uap->fstype = MNTTYPE_NFS4; 1899b9238976Sth uap->dataptr = (char *)nargs; 1900b9238976Sth /* not needed for MS_SYSSPACE */ 1901b9238976Sth uap->datalen = 0; 1902b9238976Sth 1903b9238976Sth /* use optptr to pass in extra mount options */ 1904b9238976Sth uap->flags |= MS_OPTIONSTR; 1905b9238976Sth uap->optptr = nfs4_trigger_create_mntopts(stubvfsp); 1906b9238976Sth if (uap->optptr == NULL) { 1907b9238976Sth retval = EINVAL; 1908b9238976Sth goto done; 1909b9238976Sth } 1910546a3997SThomas Haynes 1911b9238976Sth /* domount() expects us to count the trailing NUL */ 1912b9238976Sth uap->optlen = strlen(uap->optptr) + 1; 1913b9238976Sth 19146962f5b8SThomas Haynes /* 19156962f5b8SThomas Haynes * If we get EBUSY, we try again once to see if we can perform 19166962f5b8SThomas Haynes * the mount. We do this because of a spurious race condition. 19176962f5b8SThomas Haynes */ 19186962f5b8SThomas Haynes for (i = 0; i < 2; i++) { 19196962f5b8SThomas Haynes int error; 19206962f5b8SThomas Haynes bool_t was_mounted; 19216962f5b8SThomas Haynes 19226962f5b8SThomas Haynes retval = domount(NULL, uap, stubvp, cr, vfsp); 19236962f5b8SThomas Haynes if (retval == 0) { 19246962f5b8SThomas Haynes retval = VFS_ROOT(*vfsp, newvpp); 19256962f5b8SThomas Haynes VFS_RELE(*vfsp); 19266962f5b8SThomas Haynes break; 19276962f5b8SThomas Haynes } else if (retval != EBUSY) { 19286962f5b8SThomas Haynes break; 19296962f5b8SThomas Haynes } 19306962f5b8SThomas Haynes 19316962f5b8SThomas Haynes /* 19326962f5b8SThomas Haynes * We might find it mounted by the other racer... 19336962f5b8SThomas Haynes */ 19346962f5b8SThomas Haynes error = nfs4_trigger_mounted_already(stubvp, 19356962f5b8SThomas Haynes newvpp, &was_mounted, vfsp); 19366962f5b8SThomas Haynes if (error) { 19376962f5b8SThomas Haynes goto done; 19386962f5b8SThomas Haynes } else if (was_mounted) { 19396962f5b8SThomas Haynes retval = 0; 19406962f5b8SThomas Haynes break; 19416962f5b8SThomas Haynes } 19426962f5b8SThomas Haynes } 1943546a3997SThomas Haynes 1944b9238976Sth done: 1945b9238976Sth if (uap->optptr) 1946b9238976Sth nfs4_trigger_destroy_mntopts(uap->optptr); 1947b9238976Sth 1948b9238976Sth kmem_free(uap->spec, spec_len + 1); 1949b9238976Sth kmem_free(uap, sizeof (struct mounta)); 1950b9238976Sth kmem_free(mntpt, mntpt_len + 1); 1951b9238976Sth 1952b9238976Sth return (retval); 1953b9238976Sth } 1954b9238976Sth 1955b9238976Sth /* 1956b9238976Sth * Build an nfs_args structure for passing to domount(). 1957b9238976Sth * 1958b9238976Sth * Ephemeral mount-type specific data comes from the ephemeral_servinfo_t; 1959b9238976Sth * generic data - common to all ephemeral mount types - is read directly 1960b9238976Sth * from the parent mount's servinfo4_t and mntinfo4_t, via the stub vnode. 1961b9238976Sth */ 1962b9238976Sth static struct nfs_args * 1963b9238976Sth nfs4_trigger_nargs_create(mntinfo4_t *mi, servinfo4_t *svp, 1964b9238976Sth ephemeral_servinfo_t *esi) 1965b9238976Sth { 1966b9238976Sth sec_data_t *secdata; 1967b9238976Sth struct nfs_args *nargs; 1968b9238976Sth 1969b9238976Sth /* setup the nfs args */ 1970b9238976Sth nargs = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP); 1971b9238976Sth 1972b9238976Sth (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1973b9238976Sth 1974b9238976Sth nargs->addr = esi->esi_addr; 1975b9238976Sth 1976b9238976Sth /* for AUTH_DH by negotiation */ 1977b9238976Sth if (esi->esi_syncaddr || esi->esi_netname) { 1978b9238976Sth nargs->flags |= NFSMNT_SECURE; 1979b9238976Sth nargs->syncaddr = esi->esi_syncaddr; 1980b9238976Sth nargs->netname = esi->esi_netname; 1981b9238976Sth } 1982b9238976Sth 1983b9238976Sth nargs->flags |= NFSMNT_KNCONF; 1984b9238976Sth nargs->knconf = esi->esi_knconf; 1985b9238976Sth nargs->flags |= NFSMNT_HOSTNAME; 1986b9238976Sth nargs->hostname = esi->esi_hostname; 1987b9238976Sth nargs->fh = esi->esi_path; 1988b9238976Sth 1989b9238976Sth /* general mount settings, all copied from parent mount */ 1990b9238976Sth mutex_enter(&mi->mi_lock); 1991b9238976Sth 1992b9238976Sth if (!(mi->mi_flags & MI4_HARD)) 1993b9238976Sth nargs->flags |= NFSMNT_SOFT; 1994b9238976Sth 1995b9238976Sth nargs->flags |= NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_TIMEO | 1996b9238976Sth NFSMNT_RETRANS; 1997b9238976Sth nargs->wsize = mi->mi_stsize; 1998b9238976Sth nargs->rsize = mi->mi_tsize; 1999b9238976Sth nargs->timeo = mi->mi_timeo; 2000b9238976Sth nargs->retrans = mi->mi_retrans; 2001b9238976Sth 2002b9238976Sth if (mi->mi_flags & MI4_INT) 2003b9238976Sth nargs->flags |= NFSMNT_INT; 2004b9238976Sth if (mi->mi_flags & MI4_NOAC) 2005b9238976Sth nargs->flags |= NFSMNT_NOAC; 2006b9238976Sth 2007b9238976Sth nargs->flags |= NFSMNT_ACREGMIN | NFSMNT_ACREGMAX | NFSMNT_ACDIRMIN | 2008b9238976Sth NFSMNT_ACDIRMAX; 2009b9238976Sth nargs->acregmin = HR2SEC(mi->mi_acregmin); 2010b9238976Sth nargs->acregmax = HR2SEC(mi->mi_acregmax); 2011b9238976Sth nargs->acdirmin = HR2SEC(mi->mi_acdirmin); 2012b9238976Sth nargs->acdirmax = HR2SEC(mi->mi_acdirmax); 2013b9238976Sth 2014*2f172c55SRobert Thurlow /* add any specific flags for this type of ephemeral mount */ 2015*2f172c55SRobert Thurlow nargs->flags |= esi->esi_mount_flags; 2016*2f172c55SRobert Thurlow 2017b9238976Sth if (mi->mi_flags & MI4_NOCTO) 2018b9238976Sth nargs->flags |= NFSMNT_NOCTO; 2019b9238976Sth if (mi->mi_flags & MI4_GRPID) 2020b9238976Sth nargs->flags |= NFSMNT_GRPID; 2021b9238976Sth if (mi->mi_flags & MI4_LLOCK) 2022b9238976Sth nargs->flags |= NFSMNT_LLOCK; 2023b9238976Sth if (mi->mi_flags & MI4_NOPRINT) 2024b9238976Sth nargs->flags |= NFSMNT_NOPRINT; 2025b9238976Sth if (mi->mi_flags & MI4_DIRECTIO) 2026b9238976Sth nargs->flags |= NFSMNT_DIRECTIO; 2027*2f172c55SRobert Thurlow if (mi->mi_flags & MI4_PUBLIC && nargs->flags & NFSMNT_MIRRORMOUNT) 2028b9238976Sth nargs->flags |= NFSMNT_PUBLIC; 2029b9238976Sth 2030*2f172c55SRobert Thurlow /* Do some referral-specific option tweaking */ 2031*2f172c55SRobert Thurlow if (nargs->flags & NFSMNT_REFERRAL) { 2032*2f172c55SRobert Thurlow nargs->flags &= ~NFSMNT_DORDMA; 2033*2f172c55SRobert Thurlow nargs->flags |= NFSMNT_TRYRDMA; 2034*2f172c55SRobert Thurlow } 2035b9238976Sth 2036*2f172c55SRobert Thurlow mutex_exit(&mi->mi_lock); 2037b9238976Sth 2038b9238976Sth /* 2039b9238976Sth * Security data & negotiation policy. 2040b9238976Sth * 2041*2f172c55SRobert Thurlow * For mirror mounts, we need to preserve the parent mount's 2042*2f172c55SRobert Thurlow * preference for security negotiation, translating SV4_TRYSECDEFAULT 2043*2f172c55SRobert Thurlow * to NFSMNT_SECDEFAULT if present. 2044*2f172c55SRobert Thurlow * 2045*2f172c55SRobert Thurlow * For referrals, we always want security negotiation and will 2046*2f172c55SRobert Thurlow * set NFSMNT_SECDEFAULT and we will not copy current secdata. 2047*2f172c55SRobert Thurlow * The reason is that we can't negotiate down from a parent's 2048*2f172c55SRobert Thurlow * Kerberos flavor to AUTH_SYS. 2049b9238976Sth * 2050b9238976Sth * If SV4_TRYSECDEFAULT is not set, that indicates that a specific 2051b9238976Sth * security flavour was requested, with data in sv_secdata, and that 2052b9238976Sth * no negotiation should occur. If this specified flavour fails, that's 2053b9238976Sth * it. We will copy sv_secdata, and not set NFSMNT_SECDEFAULT. 2054b9238976Sth * 2055b9238976Sth * If SV4_TRYSECDEFAULT is set, then we start with a passed-in 2056b9238976Sth * default flavour, in sv_secdata, but then negotiate a new flavour. 2057b9238976Sth * Possible flavours are recorded in an array in sv_secinfo, with 2058b9238976Sth * currently in-use flavour pointed to by sv_currsec. 2059b9238976Sth * 2060b9238976Sth * If sv_currsec is set, i.e. if negotiation has already occurred, 2061b9238976Sth * we will copy sv_currsec. Otherwise, copy sv_secdata. Regardless, 2062b9238976Sth * we will set NFSMNT_SECDEFAULT, to enable negotiation. 2063b9238976Sth */ 2064*2f172c55SRobert Thurlow if (nargs->flags & NFSMNT_REFERRAL) { 2065*2f172c55SRobert Thurlow /* enable negotiation for referral mount */ 2066*2f172c55SRobert Thurlow nargs->flags |= NFSMNT_SECDEFAULT; 2067*2f172c55SRobert Thurlow secdata = kmem_alloc(sizeof (sec_data_t), KM_SLEEP); 2068*2f172c55SRobert Thurlow secdata->secmod = secdata->rpcflavor = AUTH_SYS; 2069*2f172c55SRobert Thurlow secdata->data = NULL; 2070*2f172c55SRobert Thurlow } 2071*2f172c55SRobert Thurlow 2072*2f172c55SRobert Thurlow else if (svp->sv_flags & SV4_TRYSECDEFAULT) { 2073*2f172c55SRobert Thurlow /* enable negotiation for mirror mount */ 2074b9238976Sth nargs->flags |= NFSMNT_SECDEFAULT; 2075b9238976Sth 2076b9238976Sth /* 2077b9238976Sth * As a starting point for negotiation, copy parent 2078b9238976Sth * mount's negotiated flavour (sv_currsec) if available, 2079b9238976Sth * or its passed-in flavour (sv_secdata) if not. 2080b9238976Sth */ 2081b9238976Sth if (svp->sv_currsec != NULL) 2082b9238976Sth secdata = copy_sec_data(svp->sv_currsec); 2083b9238976Sth else if (svp->sv_secdata != NULL) 2084b9238976Sth secdata = copy_sec_data(svp->sv_secdata); 2085b9238976Sth else 2086b9238976Sth secdata = NULL; 2087b9238976Sth } else { 2088b9238976Sth /* do not enable negotiation; copy parent's passed-in flavour */ 2089b9238976Sth if (svp->sv_secdata != NULL) 2090b9238976Sth secdata = copy_sec_data(svp->sv_secdata); 2091b9238976Sth else 2092b9238976Sth secdata = NULL; 2093b9238976Sth } 2094b9238976Sth 2095b9238976Sth nfs_rw_exit(&svp->sv_lock); 2096b9238976Sth 2097b9238976Sth nargs->flags |= NFSMNT_NEWARGS; 2098b9238976Sth nargs->nfs_args_ext = NFS_ARGS_EXTB; 2099b9238976Sth nargs->nfs_ext_u.nfs_extB.secdata = secdata; 2100b9238976Sth 2101b9238976Sth /* for NFS RO failover; caller will set if necessary */ 2102b9238976Sth nargs->nfs_ext_u.nfs_extB.next = NULL; 2103b9238976Sth 2104b9238976Sth return (nargs); 2105b9238976Sth } 2106b9238976Sth 2107b9238976Sth static void 2108b9238976Sth nfs4_trigger_nargs_destroy(struct nfs_args *nargs) 2109b9238976Sth { 2110b9238976Sth /* 2111b9238976Sth * Either the mount failed, in which case the data is not needed, or 2112b9238976Sth * nfs4_mount() has either taken copies of what it needs or, 2113b9238976Sth * where it has merely copied the ptr, it has set *our* ptr to NULL, 2114b9238976Sth * whereby nfs4_free_args() will ignore it. 2115b9238976Sth */ 2116b9238976Sth nfs4_free_args(nargs); 2117b9238976Sth kmem_free(nargs, sizeof (struct nfs_args)); 2118b9238976Sth } 2119b9238976Sth 2120b9238976Sth /* 2121b9238976Sth * When we finally get into the mounting, we need to add this 2122b9238976Sth * node to the ephemeral tree. 2123b9238976Sth * 2124b9238976Sth * This is called from nfs4_mount(). 2125b9238976Sth */ 2126d3a14591SThomas Haynes int 2127b9238976Sth nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp) 2128b9238976Sth { 2129b9238976Sth mntinfo4_t *mi_parent; 2130b9238976Sth nfs4_ephemeral_t *eph; 2131b9238976Sth nfs4_ephemeral_tree_t *net; 2132b9238976Sth 2133b9238976Sth nfs4_ephemeral_t *prior; 2134b9238976Sth nfs4_ephemeral_t *child; 2135b9238976Sth 2136b9238976Sth nfs4_ephemeral_t *peer; 2137b9238976Sth 2138b9238976Sth nfs4_trigger_globals_t *ntg; 2139b9238976Sth zone_t *zone = curproc->p_zone; 2140b9238976Sth 2141d3a14591SThomas Haynes int rc = 0; 2142d3a14591SThomas Haynes 2143b9238976Sth mi_parent = VTOMI4(mvp); 2144b9238976Sth 2145b9238976Sth /* 2146b9238976Sth * Get this before grabbing anything else! 2147b9238976Sth */ 2148b9238976Sth ntg = zone_getspecific(nfs4_ephemeral_key, zone); 2149b9238976Sth if (!ntg->ntg_thread_started) { 2150b9238976Sth nfs4_ephemeral_start_harvester(ntg); 2151b9238976Sth } 2152b9238976Sth 2153b9238976Sth mutex_enter(&mi_parent->mi_lock); 2154b9238976Sth mutex_enter(&mi->mi_lock); 2155b9238976Sth 2156d3a14591SThomas Haynes net = mi->mi_ephemeral_tree = 2157d3a14591SThomas Haynes mi_parent->mi_ephemeral_tree; 2158d3a14591SThomas Haynes 2159d3a14591SThomas Haynes /* 2160d3a14591SThomas Haynes * If the mi_ephemeral_tree is NULL, then it 2161d3a14591SThomas Haynes * means that either the harvester or a manual 2162d3a14591SThomas Haynes * umount has cleared the tree out right before 2163d3a14591SThomas Haynes * we got here. 2164d3a14591SThomas Haynes * 2165d3a14591SThomas Haynes * There is nothing we can do here, so return 2166d3a14591SThomas Haynes * to the caller and let them decide whether they 2167d3a14591SThomas Haynes * try again. 2168d3a14591SThomas Haynes */ 2169d3a14591SThomas Haynes if (net == NULL) { 2170d3a14591SThomas Haynes mutex_exit(&mi->mi_lock); 2171d3a14591SThomas Haynes mutex_exit(&mi_parent->mi_lock); 2172d3a14591SThomas Haynes 2173d3a14591SThomas Haynes return (EBUSY); 2174d3a14591SThomas Haynes } 2175d3a14591SThomas Haynes 2176*2f172c55SRobert Thurlow /* 2177*2f172c55SRobert Thurlow * We've just tied the mntinfo to the tree, so 2178*2f172c55SRobert Thurlow * now we bump the refcnt and hold it there until 2179*2f172c55SRobert Thurlow * this mntinfo is removed from the tree. 2180*2f172c55SRobert Thurlow */ 2181d3a14591SThomas Haynes nfs4_ephemeral_tree_hold(net); 2182d3a14591SThomas Haynes 2183b9238976Sth /* 2184b9238976Sth * We need to tack together the ephemeral mount 2185b9238976Sth * with this new mntinfo. 2186b9238976Sth */ 2187b9238976Sth eph = kmem_zalloc(sizeof (*eph), KM_SLEEP); 2188b9238976Sth eph->ne_mount = mi; 2189b9238976Sth eph->ne_ref_time = gethrestime_sec(); 2190b9238976Sth 2191b9238976Sth /* 2192b9238976Sth * We need to tell the ephemeral mount when 2193b9238976Sth * to time out. 2194b9238976Sth */ 2195b9238976Sth eph->ne_mount_to = ntg->ntg_mount_to; 2196b9238976Sth 2197b9238976Sth mi->mi_ephemeral = eph; 2198b9238976Sth 2199b9238976Sth /* 2200b9238976Sth * If the enclosing mntinfo4 is also ephemeral, 2201b9238976Sth * then we need to point to its enclosing parent. 2202b9238976Sth * Else the enclosing mntinfo4 is the enclosing parent. 2203b9238976Sth * 2204b9238976Sth * We also need to weave this ephemeral node 2205b9238976Sth * into the tree. 2206b9238976Sth */ 2207b9238976Sth if (mi_parent->mi_flags & MI4_EPHEMERAL) { 2208b9238976Sth /* 2209b9238976Sth * We need to decide if we are 2210b9238976Sth * the root node of this branch 2211b9238976Sth * or if we are a sibling of this 2212b9238976Sth * branch. 2213b9238976Sth */ 2214b9238976Sth prior = mi_parent->mi_ephemeral; 2215d3a14591SThomas Haynes if (prior == NULL) { 2216d3a14591SThomas Haynes /* 2217d3a14591SThomas Haynes * Race condition, clean up, and 2218d3a14591SThomas Haynes * let caller handle mntinfo. 2219d3a14591SThomas Haynes */ 2220d3a14591SThomas Haynes mi->mi_flags &= ~MI4_EPHEMERAL; 2221d3a14591SThomas Haynes mi->mi_ephemeral = NULL; 2222d3a14591SThomas Haynes kmem_free(eph, sizeof (*eph)); 2223*2f172c55SRobert Thurlow nfs4_ephemeral_tree_rele(net); 2224d3a14591SThomas Haynes rc = EBUSY; 2225b9238976Sth } else { 2226d3a14591SThomas Haynes if (prior->ne_child == NULL) { 2227d3a14591SThomas Haynes prior->ne_child = eph; 2228d3a14591SThomas Haynes } else { 2229d3a14591SThomas Haynes child = prior->ne_child; 2230b9238976Sth 2231d3a14591SThomas Haynes prior->ne_child = eph; 2232d3a14591SThomas Haynes eph->ne_peer = child; 2233b9238976Sth 2234d3a14591SThomas Haynes child->ne_prior = eph; 2235d3a14591SThomas Haynes } 2236b9238976Sth 2237d3a14591SThomas Haynes eph->ne_prior = prior; 2238d3a14591SThomas Haynes } 2239b9238976Sth } else { 2240b9238976Sth /* 2241b9238976Sth * The parent mntinfo4 is the non-ephemeral 2242b9238976Sth * root of the ephemeral tree. We 2243b9238976Sth * need to decide if we are the root 2244b9238976Sth * node of that tree or if we are a 2245b9238976Sth * sibling of the root node. 2246b9238976Sth * 2247b9238976Sth * We are the root if there is no 2248b9238976Sth * other node. 2249b9238976Sth */ 2250b9238976Sth if (net->net_root == NULL) { 2251b9238976Sth net->net_root = eph; 2252b9238976Sth } else { 2253b9238976Sth eph->ne_peer = peer = net->net_root; 2254b9238976Sth ASSERT(peer != NULL); 2255b9238976Sth net->net_root = eph; 2256b9238976Sth 2257b9238976Sth peer->ne_prior = eph; 2258b9238976Sth } 2259b9238976Sth 2260b9238976Sth eph->ne_prior = NULL; 2261b9238976Sth } 2262b9238976Sth 2263b9238976Sth mutex_exit(&mi->mi_lock); 2264b9238976Sth mutex_exit(&mi_parent->mi_lock); 2265d3a14591SThomas Haynes 2266d3a14591SThomas Haynes return (rc); 2267b9238976Sth } 2268b9238976Sth 2269b9238976Sth /* 2270b9238976Sth * Commit the changes to the ephemeral tree for removing this node. 2271b9238976Sth */ 2272b9238976Sth static void 2273b9238976Sth nfs4_ephemeral_umount_cleanup(nfs4_ephemeral_t *eph) 2274b9238976Sth { 2275b9238976Sth nfs4_ephemeral_t *e = eph; 2276b9238976Sth nfs4_ephemeral_t *peer; 2277b9238976Sth nfs4_ephemeral_t *prior; 2278b9238976Sth 2279b9238976Sth peer = eph->ne_peer; 2280b9238976Sth prior = e->ne_prior; 2281b9238976Sth 2282b9238976Sth /* 2283b9238976Sth * If this branch root was not the 2284b9238976Sth * tree root, then we need to fix back pointers. 2285b9238976Sth */ 2286b9238976Sth if (prior) { 2287b9238976Sth if (prior->ne_child == e) { 2288b9238976Sth prior->ne_child = peer; 2289b9238976Sth } else { 2290b9238976Sth prior->ne_peer = peer; 2291b9238976Sth } 2292b9238976Sth 2293b9238976Sth if (peer) 2294b9238976Sth peer->ne_prior = prior; 2295b9238976Sth } else if (peer) { 2296b9238976Sth peer->ne_mount->mi_ephemeral_tree->net_root = peer; 2297b9238976Sth peer->ne_prior = NULL; 2298b9238976Sth } else { 2299b9238976Sth e->ne_mount->mi_ephemeral_tree->net_root = NULL; 2300b9238976Sth } 2301b9238976Sth } 2302b9238976Sth 2303b9238976Sth /* 2304b9238976Sth * We want to avoid recursion at all costs. So we need to 2305b9238976Sth * unroll the tree. We do this by a depth first traversal to 2306b9238976Sth * leaf nodes. We blast away the leaf and work our way back 2307b9238976Sth * up and down the tree. 2308b9238976Sth */ 2309b9238976Sth static int 2310b9238976Sth nfs4_ephemeral_unmount_engine(nfs4_ephemeral_t *eph, 2311b9238976Sth int isTreeRoot, int flag, cred_t *cr) 2312b9238976Sth { 2313b9238976Sth nfs4_ephemeral_t *e = eph; 2314b9238976Sth nfs4_ephemeral_t *prior; 2315b9238976Sth mntinfo4_t *mi; 2316b9238976Sth vfs_t *vfsp; 2317b9238976Sth int error; 2318b9238976Sth 2319b9238976Sth /* 2320b9238976Sth * We use the loop while unrolling the ephemeral tree. 2321b9238976Sth */ 2322b9238976Sth for (;;) { 2323b9238976Sth /* 2324b9238976Sth * First we walk down the child. 2325b9238976Sth */ 2326b9238976Sth if (e->ne_child) { 2327b9238976Sth prior = e; 2328b9238976Sth e = e->ne_child; 2329b9238976Sth continue; 2330b9238976Sth } 2331b9238976Sth 2332b9238976Sth /* 2333b9238976Sth * If we are the root of the branch we are removing, 2334b9238976Sth * we end it here. But if the branch is the root of 2335b9238976Sth * the tree, we have to forge on. We do not consider 2336b9238976Sth * the peer list for the root because while it may 2337b9238976Sth * be okay to remove, it is both extra work and a 2338b9238976Sth * potential for a false-positive error to stall the 2339b9238976Sth * unmount attempt. 2340b9238976Sth */ 2341b9238976Sth if (e == eph && isTreeRoot == FALSE) 2342b9238976Sth return (0); 2343b9238976Sth 2344b9238976Sth /* 2345b9238976Sth * Next we walk down the peer list. 2346b9238976Sth */ 2347b9238976Sth if (e->ne_peer) { 2348b9238976Sth prior = e; 2349b9238976Sth e = e->ne_peer; 2350b9238976Sth continue; 2351b9238976Sth } 2352b9238976Sth 2353b9238976Sth /* 2354b9238976Sth * We can only remove the node passed in by the 2355b9238976Sth * caller if it is the root of the ephemeral tree. 2356b9238976Sth * Otherwise, the caller will remove it. 2357b9238976Sth */ 2358b9238976Sth if (e == eph && isTreeRoot == FALSE) 2359b9238976Sth return (0); 2360b9238976Sth 2361b9238976Sth /* 2362b9238976Sth * Okay, we have a leaf node, time 2363b9238976Sth * to prune it! 2364b9238976Sth * 2365b9238976Sth * Note that prior can only be NULL if 2366b9238976Sth * and only if it is the root of the 2367b9238976Sth * ephemeral tree. 2368b9238976Sth */ 2369b9238976Sth prior = e->ne_prior; 2370b9238976Sth 2371b9238976Sth mi = e->ne_mount; 2372b9238976Sth mutex_enter(&mi->mi_lock); 2373b9238976Sth vfsp = mi->mi_vfsp; 2374b9238976Sth 2375b9238976Sth /* 2376b9238976Sth * Cleared by umount2_engine. 2377b9238976Sth */ 2378b9238976Sth VFS_HOLD(vfsp); 2379b9238976Sth 2380b9238976Sth /* 2381b9238976Sth * Inform nfs4_unmount to not recursively 2382b9238976Sth * descend into this node's children when it 2383b9238976Sth * gets processed. 2384b9238976Sth */ 2385b9238976Sth mi->mi_flags |= MI4_EPHEMERAL_RECURSED; 2386b9238976Sth mutex_exit(&mi->mi_lock); 2387b9238976Sth 2388b9238976Sth error = umount2_engine(vfsp, flag, cr, FALSE); 2389b9238976Sth if (error) { 2390b9238976Sth /* 2391b9238976Sth * We need to reenable nfs4_unmount's ability 2392b9238976Sth * to recursively descend on this node. 2393b9238976Sth */ 2394b9238976Sth mutex_enter(&mi->mi_lock); 2395b9238976Sth mi->mi_flags &= ~MI4_EPHEMERAL_RECURSED; 2396b9238976Sth mutex_exit(&mi->mi_lock); 2397b9238976Sth 2398b9238976Sth return (error); 2399b9238976Sth } 2400b9238976Sth 2401b9238976Sth /* 2402b9238976Sth * If we are the current node, we do not want to 2403b9238976Sth * touch anything else. At this point, the only 2404b9238976Sth * way the current node can have survived to here 2405b9238976Sth * is if it is the root of the ephemeral tree and 2406b9238976Sth * we are unmounting the enclosing mntinfo4. 2407b9238976Sth */ 2408b9238976Sth if (e == eph) { 2409b9238976Sth ASSERT(prior == NULL); 2410b9238976Sth return (0); 2411b9238976Sth } 2412b9238976Sth 2413b9238976Sth /* 2414b9238976Sth * Stitch up the prior node. Note that since 2415b9238976Sth * we have handled the root of the tree, prior 2416b9238976Sth * must be non-NULL. 2417b9238976Sth */ 2418b9238976Sth ASSERT(prior != NULL); 2419b9238976Sth if (prior->ne_child == e) { 2420b9238976Sth prior->ne_child = NULL; 2421b9238976Sth } else { 2422b9238976Sth ASSERT(prior->ne_peer == e); 2423b9238976Sth 2424b9238976Sth prior->ne_peer = NULL; 2425b9238976Sth } 2426b9238976Sth 2427b9238976Sth e = prior; 2428b9238976Sth } 2429b9238976Sth 2430b9238976Sth /* NOTREACHED */ 2431b9238976Sth } 2432b9238976Sth 2433b9238976Sth /* 2434b9238976Sth * Common code to safely release net_cnt_lock and net_tree_lock 2435b9238976Sth */ 2436b9238976Sth void 2437b9238976Sth nfs4_ephemeral_umount_unlock(bool_t *pmust_unlock, 2438*2f172c55SRobert Thurlow nfs4_ephemeral_tree_t **pnet) 2439b9238976Sth { 2440b9238976Sth nfs4_ephemeral_tree_t *net = *pnet; 2441b9238976Sth 2442b9238976Sth if (*pmust_unlock) { 2443b9238976Sth mutex_enter(&net->net_cnt_lock); 2444b9238976Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_UMOUNTING; 2445b9238976Sth mutex_exit(&net->net_cnt_lock); 2446b9238976Sth 2447b9238976Sth mutex_exit(&net->net_tree_lock); 2448b9238976Sth 2449b9238976Sth *pmust_unlock = FALSE; 2450b9238976Sth } 2451b9238976Sth } 2452b9238976Sth 2453b9238976Sth /* 2454b9238976Sth * While we may have removed any child or sibling nodes of this 2455b9238976Sth * ephemeral node, we can not nuke it until we know that there 2456b9238976Sth * were no actived vnodes on it. This will do that final 2457b9238976Sth * work once we know it is not busy. 2458b9238976Sth */ 2459b9238976Sth void 2460b9238976Sth nfs4_ephemeral_umount_activate(mntinfo4_t *mi, bool_t *pmust_unlock, 2461*2f172c55SRobert Thurlow nfs4_ephemeral_tree_t **pnet) 2462b9238976Sth { 2463b9238976Sth /* 2464b9238976Sth * Now we need to get rid of the ephemeral data if it exists. 2465b9238976Sth */ 2466b9238976Sth mutex_enter(&mi->mi_lock); 2467b9238976Sth if (mi->mi_ephemeral) { 2468b9238976Sth /* 2469b9238976Sth * If we are the root node of an ephemeral branch 2470b9238976Sth * which is being removed, then we need to fixup 2471b9238976Sth * pointers into and out of the node. 2472b9238976Sth */ 2473b9238976Sth if (!(mi->mi_flags & MI4_EPHEMERAL_RECURSED)) 2474b9238976Sth nfs4_ephemeral_umount_cleanup(mi->mi_ephemeral); 2475b9238976Sth 2476*2f172c55SRobert Thurlow nfs4_ephemeral_tree_rele(*pnet); 2477b9238976Sth ASSERT(mi->mi_ephemeral != NULL); 2478b9238976Sth 2479b9238976Sth kmem_free(mi->mi_ephemeral, sizeof (*mi->mi_ephemeral)); 2480b9238976Sth mi->mi_ephemeral = NULL; 2481b9238976Sth } 2482b9238976Sth mutex_exit(&mi->mi_lock); 2483b9238976Sth 2484*2f172c55SRobert Thurlow nfs4_ephemeral_umount_unlock(pmust_unlock, pnet); 2485b9238976Sth } 2486b9238976Sth 2487b9238976Sth /* 2488b9238976Sth * Unmount an ephemeral node. 2489*2f172c55SRobert Thurlow * 2490*2f172c55SRobert Thurlow * Note that if this code fails, then it must unlock. 2491*2f172c55SRobert Thurlow * 2492*2f172c55SRobert Thurlow * If it succeeds, then the caller must be prepared to do so. 2493b9238976Sth */ 2494b9238976Sth int 2495b9238976Sth nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr, 2496*2f172c55SRobert Thurlow bool_t *pmust_unlock, nfs4_ephemeral_tree_t **pnet) 2497b9238976Sth { 2498b9238976Sth int error = 0; 2499b9238976Sth nfs4_ephemeral_t *eph; 2500b9238976Sth nfs4_ephemeral_tree_t *net; 2501b9238976Sth int is_derooting = FALSE; 2502b9238976Sth int is_recursed = FALSE; 2503d3a14591SThomas Haynes int was_locked = FALSE; 2504d3a14591SThomas Haynes 2505d3a14591SThomas Haynes /* 2506d3a14591SThomas Haynes * Make sure to set the default state for cleaning 2507d3a14591SThomas Haynes * up the tree in the caller (and on the way out). 2508d3a14591SThomas Haynes */ 2509*2f172c55SRobert Thurlow *pmust_unlock = FALSE; 2510b9238976Sth 2511b9238976Sth /* 2512b9238976Sth * The active vnodes on this file system may be ephemeral 2513b9238976Sth * children. We need to check for and try to unmount them 2514b9238976Sth * here. If any can not be unmounted, we are going 2515b9238976Sth * to return EBUSY. 2516b9238976Sth */ 2517b9238976Sth mutex_enter(&mi->mi_lock); 2518b9238976Sth 2519b9238976Sth /* 2520b9238976Sth * If an ephemeral tree, we need to check to see if 2521b9238976Sth * the lock is already held. If it is, then we need 2522b9238976Sth * to see if we are being called as a result of 2523b9238976Sth * the recursive removal of some node of the tree or 2524b9238976Sth * if we are another attempt to remove the tree. 2525b9238976Sth * 2526b9238976Sth * mi_flags & MI4_EPHEMERAL indicates an ephemeral 2527b9238976Sth * node. mi_ephemeral being non-NULL also does this. 2528b9238976Sth * 2529b9238976Sth * mi_ephemeral_tree being non-NULL is sufficient 2530b9238976Sth * to also indicate either it is an ephemeral node 2531b9238976Sth * or the enclosing mntinfo4. 2532b9238976Sth * 2533b9238976Sth * Do we need MI4_EPHEMERAL? Yes, it is useful for 2534b9238976Sth * when we delete the ephemeral node and need to 2535b9238976Sth * differentiate from an ephemeral node and the 2536b9238976Sth * enclosing root node. 2537b9238976Sth */ 2538b9238976Sth *pnet = net = mi->mi_ephemeral_tree; 2539eabd0450Sth if (net == NULL) { 2540b9238976Sth mutex_exit(&mi->mi_lock); 2541eabd0450Sth return (0); 2542eabd0450Sth } 2543b9238976Sth 2544eabd0450Sth eph = mi->mi_ephemeral; 2545eabd0450Sth is_recursed = mi->mi_flags & MI4_EPHEMERAL_RECURSED; 2546eabd0450Sth is_derooting = (eph == NULL); 2547b9238976Sth 2548*2f172c55SRobert Thurlow mutex_enter(&net->net_cnt_lock); 2549*2f172c55SRobert Thurlow 2550eabd0450Sth /* 2551eabd0450Sth * If this is not recursion, then we need to 2552*2f172c55SRobert Thurlow * check to see if a harvester thread has 2553*2f172c55SRobert Thurlow * already grabbed the lock. 2554eabd0450Sth * 2555*2f172c55SRobert Thurlow * After we exit this branch, we may not 2556*2f172c55SRobert Thurlow * blindly return, we need to jump to 2557*2f172c55SRobert Thurlow * is_busy! 2558eabd0450Sth */ 2559eabd0450Sth if (!is_recursed) { 2560eabd0450Sth if (net->net_status & 2561eabd0450Sth NFS4_EPHEMERAL_TREE_LOCKED) { 2562b9238976Sth /* 2563d3a14591SThomas Haynes * If the tree is locked, we need 2564d3a14591SThomas Haynes * to decide whether we are the 2565d3a14591SThomas Haynes * harvester or some explicit call 2566d3a14591SThomas Haynes * for a umount. The only way that 2567d3a14591SThomas Haynes * we are the harvester is if 2568d3a14591SThomas Haynes * MS_SYSSPACE is set. 2569d3a14591SThomas Haynes * 2570d3a14591SThomas Haynes * We only let the harvester through 2571d3a14591SThomas Haynes * at this point. 2572eabd0450Sth * 2573eabd0450Sth * We return EBUSY so that the 2574eabd0450Sth * caller knows something is 2575eabd0450Sth * going on. Note that by that 2576eabd0450Sth * time, the umount in the other 2577eabd0450Sth * thread may have already occured. 2578b9238976Sth */ 2579d3a14591SThomas Haynes if (!(flag & MS_SYSSPACE)) { 2580d3a14591SThomas Haynes mutex_exit(&net->net_cnt_lock); 2581d3a14591SThomas Haynes mutex_exit(&mi->mi_lock); 2582d3a14591SThomas Haynes 2583d3a14591SThomas Haynes return (EBUSY); 2584d3a14591SThomas Haynes } 2585d3a14591SThomas Haynes 2586d3a14591SThomas Haynes was_locked = TRUE; 2587d3a14591SThomas Haynes } 2588eabd0450Sth } 2589*2f172c55SRobert Thurlow 2590*2f172c55SRobert Thurlow mutex_exit(&net->net_cnt_lock); 2591eabd0450Sth mutex_exit(&mi->mi_lock); 2592b9238976Sth 2593eabd0450Sth /* 2594d3a14591SThomas Haynes * If we are not the harvester, we need to check 2595d3a14591SThomas Haynes * to see if we need to grab the tree lock. 2596eabd0450Sth */ 2597d3a14591SThomas Haynes if (was_locked == FALSE) { 2598d3a14591SThomas Haynes /* 2599d3a14591SThomas Haynes * If we grab the lock, it means that no other 2600d3a14591SThomas Haynes * operation is working on the tree. If we don't 2601d3a14591SThomas Haynes * grab it, we need to decide if this is because 2602d3a14591SThomas Haynes * we are a recursive call or a new operation. 2603d3a14591SThomas Haynes */ 2604d3a14591SThomas Haynes if (mutex_tryenter(&net->net_tree_lock)) { 2605d3a14591SThomas Haynes *pmust_unlock = TRUE; 2606d3a14591SThomas Haynes } else { 2607b9238976Sth /* 2608d3a14591SThomas Haynes * If we are a recursive call, we can 2609d3a14591SThomas Haynes * proceed without the lock. 2610d3a14591SThomas Haynes * Otherwise we have to wait until 2611d3a14591SThomas Haynes * the lock becomes free. 2612b9238976Sth */ 2613d3a14591SThomas Haynes if (!is_recursed) { 2614d3a14591SThomas Haynes mutex_enter(&net->net_cnt_lock); 2615d3a14591SThomas Haynes if (net->net_status & 2616d3a14591SThomas Haynes (NFS4_EPHEMERAL_TREE_DEROOTING 2617d3a14591SThomas Haynes | NFS4_EPHEMERAL_TREE_INVALID)) { 2618d3a14591SThomas Haynes mutex_exit(&net->net_cnt_lock); 2619d3a14591SThomas Haynes goto is_busy; 2620d3a14591SThomas Haynes } 2621d3a14591SThomas Haynes mutex_exit(&net->net_cnt_lock); 2622b9238976Sth 2623d3a14591SThomas Haynes /* 2624d3a14591SThomas Haynes * We can't hold any other locks whilst 2625d3a14591SThomas Haynes * we wait on this to free up. 2626d3a14591SThomas Haynes */ 2627d3a14591SThomas Haynes mutex_enter(&net->net_tree_lock); 2628b9238976Sth 2629d3a14591SThomas Haynes /* 2630d3a14591SThomas Haynes * Note that while mi->mi_ephemeral 2631d3a14591SThomas Haynes * may change and thus we have to 2632d3a14591SThomas Haynes * update eph, it is the case that 2633d3a14591SThomas Haynes * we have tied down net and 2634d3a14591SThomas Haynes * do not care if mi->mi_ephemeral_tree 2635d3a14591SThomas Haynes * has changed. 2636d3a14591SThomas Haynes */ 2637d3a14591SThomas Haynes mutex_enter(&mi->mi_lock); 2638d3a14591SThomas Haynes eph = mi->mi_ephemeral; 2639d3a14591SThomas Haynes mutex_exit(&mi->mi_lock); 2640d3a14591SThomas Haynes 2641d3a14591SThomas Haynes /* 2642d3a14591SThomas Haynes * Okay, we need to see if either the 2643d3a14591SThomas Haynes * tree got nuked or the current node 2644d3a14591SThomas Haynes * got nuked. Both of which will cause 2645d3a14591SThomas Haynes * an error. 2646d3a14591SThomas Haynes * 2647d3a14591SThomas Haynes * Note that a subsequent retry of the 2648d3a14591SThomas Haynes * umount shall work. 2649d3a14591SThomas Haynes */ 2650d3a14591SThomas Haynes mutex_enter(&net->net_cnt_lock); 2651d3a14591SThomas Haynes if (net->net_status & 2652d3a14591SThomas Haynes NFS4_EPHEMERAL_TREE_INVALID || 2653d3a14591SThomas Haynes (!is_derooting && eph == NULL)) { 2654d3a14591SThomas Haynes mutex_exit(&net->net_cnt_lock); 2655d3a14591SThomas Haynes mutex_exit(&net->net_tree_lock); 2656d3a14591SThomas Haynes goto is_busy; 2657d3a14591SThomas Haynes } 2658eabd0450Sth mutex_exit(&net->net_cnt_lock); 2659d3a14591SThomas Haynes *pmust_unlock = TRUE; 2660eabd0450Sth } 2661eabd0450Sth } 2662eabd0450Sth } 2663eabd0450Sth 2664eabd0450Sth /* 2665eabd0450Sth * Only once we have grabbed the lock can we mark what we 2666eabd0450Sth * are planning on doing to the ephemeral tree. 2667eabd0450Sth */ 2668eabd0450Sth if (*pmust_unlock) { 2669eabd0450Sth mutex_enter(&net->net_cnt_lock); 2670eabd0450Sth net->net_status |= NFS4_EPHEMERAL_TREE_UMOUNTING; 2671eabd0450Sth 2672eabd0450Sth /* 2673eabd0450Sth * Check to see if we are nuking the root. 2674eabd0450Sth */ 2675eabd0450Sth if (is_derooting) 2676eabd0450Sth net->net_status |= 2677eabd0450Sth NFS4_EPHEMERAL_TREE_DEROOTING; 2678eabd0450Sth mutex_exit(&net->net_cnt_lock); 2679eabd0450Sth } 2680eabd0450Sth 2681eabd0450Sth if (!is_derooting) { 2682eabd0450Sth /* 2683eabd0450Sth * Only work on children if the caller has not already 2684eabd0450Sth * done so. 2685eabd0450Sth */ 2686eabd0450Sth if (!is_recursed) { 2687eabd0450Sth ASSERT(eph != NULL); 2688eabd0450Sth 2689eabd0450Sth error = nfs4_ephemeral_unmount_engine(eph, 2690eabd0450Sth FALSE, flag, cr); 2691eabd0450Sth if (error) 2692eabd0450Sth goto is_busy; 2693eabd0450Sth } 2694eabd0450Sth } else { 2695eabd0450Sth eph = net->net_root; 2696eabd0450Sth 2697eabd0450Sth /* 2698eabd0450Sth * Only work if there is something there. 2699eabd0450Sth */ 2700eabd0450Sth if (eph) { 2701eabd0450Sth error = nfs4_ephemeral_unmount_engine(eph, TRUE, 2702eabd0450Sth flag, cr); 2703eabd0450Sth if (error) { 2704eabd0450Sth mutex_enter(&net->net_cnt_lock); 2705eabd0450Sth net->net_status &= 2706eabd0450Sth ~NFS4_EPHEMERAL_TREE_DEROOTING; 2707eabd0450Sth mutex_exit(&net->net_cnt_lock); 2708eabd0450Sth goto is_busy; 2709eabd0450Sth } 2710b9238976Sth 2711b9238976Sth /* 2712eabd0450Sth * Nothing else which goes wrong will 2713eabd0450Sth * invalidate the blowing away of the 2714eabd0450Sth * ephmeral tree. 2715b9238976Sth */ 2716eabd0450Sth net->net_root = NULL; 2717b9238976Sth } 2718eabd0450Sth 2719eabd0450Sth /* 2720eabd0450Sth * We have derooted and we have caused the tree to be 2721d3a14591SThomas Haynes * invalidated. 2722eabd0450Sth */ 2723eabd0450Sth mutex_enter(&net->net_cnt_lock); 2724eabd0450Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_DEROOTING; 2725eabd0450Sth net->net_status |= NFS4_EPHEMERAL_TREE_INVALID; 2726*2f172c55SRobert Thurlow DTRACE_NFSV4_1(nfs4clnt__dbg__ephemeral__tree__derooting, 2727*2f172c55SRobert Thurlow uint_t, net->net_refcnt); 2728*2f172c55SRobert Thurlow 2729*2f172c55SRobert Thurlow /* 2730*2f172c55SRobert Thurlow * We will not finalize this node, so safe to 2731*2f172c55SRobert Thurlow * release it. 2732*2f172c55SRobert Thurlow */ 2733*2f172c55SRobert Thurlow nfs4_ephemeral_tree_decr(net); 2734eabd0450Sth mutex_exit(&net->net_cnt_lock); 2735eabd0450Sth 2736d3a14591SThomas Haynes if (was_locked == FALSE) 2737d3a14591SThomas Haynes mutex_exit(&net->net_tree_lock); 2738d3a14591SThomas Haynes 2739d3a14591SThomas Haynes /* 2740d3a14591SThomas Haynes * We have just blown away any notation of this 2741*2f172c55SRobert Thurlow * tree being locked or having a refcnt. 2742*2f172c55SRobert Thurlow * We can't let the caller try to clean things up. 2743d3a14591SThomas Haynes */ 2744d3a14591SThomas Haynes *pmust_unlock = FALSE; 2745d3a14591SThomas Haynes 2746eabd0450Sth /* 2747d708af74SThomas Haynes * At this point, the tree should no longer be 2748d708af74SThomas Haynes * associated with the mntinfo4. We need to pull 2749d708af74SThomas Haynes * it off there and let the harvester take 2750eabd0450Sth * care of it once the refcnt drops. 2751eabd0450Sth */ 2752eabd0450Sth mutex_enter(&mi->mi_lock); 2753eabd0450Sth mi->mi_ephemeral_tree = NULL; 2754b9238976Sth mutex_exit(&mi->mi_lock); 2755b9238976Sth } 2756b9238976Sth 2757b9238976Sth return (0); 2758b9238976Sth 2759b9238976Sth is_busy: 2760b9238976Sth 2761*2f172c55SRobert Thurlow nfs4_ephemeral_umount_unlock(pmust_unlock, pnet); 2762b9238976Sth 2763b9238976Sth return (error); 2764b9238976Sth } 2765b9238976Sth 2766b9238976Sth /* 2767b9238976Sth * Do the umount and record any error in the parent. 2768b9238976Sth */ 2769b9238976Sth static void 2770b9238976Sth nfs4_ephemeral_record_umount(vfs_t *vfsp, int flag, 2771b9238976Sth nfs4_ephemeral_t *e, nfs4_ephemeral_t *prior) 2772b9238976Sth { 2773b9238976Sth int error; 2774b9238976Sth 2775b9238976Sth error = umount2_engine(vfsp, flag, kcred, FALSE); 2776b9238976Sth if (error) { 2777b9238976Sth if (prior) { 2778b9238976Sth if (prior->ne_child == e) 2779b9238976Sth prior->ne_state |= 2780b9238976Sth NFS4_EPHEMERAL_CHILD_ERROR; 2781b9238976Sth else 2782b9238976Sth prior->ne_state |= 2783b9238976Sth NFS4_EPHEMERAL_PEER_ERROR; 2784b9238976Sth } 2785b9238976Sth } 2786b9238976Sth } 2787b9238976Sth 2788b9238976Sth /* 2789b9238976Sth * For each tree in the forest (where the forest is in 2790b9238976Sth * effect all of the ephemeral trees for this zone), 2791b9238976Sth * scan to see if a node can be unmounted. Note that 2792b9238976Sth * unlike nfs4_ephemeral_unmount_engine(), we do 2793b9238976Sth * not process the current node before children or 2794b9238976Sth * siblings. I.e., if a node can be unmounted, we 2795b9238976Sth * do not recursively check to see if the nodes 2796b9238976Sth * hanging off of it can also be unmounted. 2797b9238976Sth * 2798b9238976Sth * Instead, we delve down deep to try and remove the 2799b9238976Sth * children first. Then, because we share code with 2800b9238976Sth * nfs4_ephemeral_unmount_engine(), we will try 2801b9238976Sth * them again. This could be a performance issue in 2802b9238976Sth * the future. 2803b9238976Sth * 2804b9238976Sth * Also note that unlike nfs4_ephemeral_unmount_engine(), 2805b9238976Sth * we do not halt on an error. We will not remove the 2806b9238976Sth * current node, but we will keep on trying to remove 2807b9238976Sth * the others. 2808b9238976Sth * 2809b9238976Sth * force indicates that we want the unmount to occur 2810b9238976Sth * even if there is something blocking it. 2811b9238976Sth * 2812b9238976Sth * time_check indicates that we want to see if the 2813b9238976Sth * mount has expired past mount_to or not. Typically 2814b9238976Sth * we want to do this and only on a shutdown of the 2815b9238976Sth * zone would we want to ignore the check. 2816b9238976Sth */ 2817b9238976Sth static void 2818b9238976Sth nfs4_ephemeral_harvest_forest(nfs4_trigger_globals_t *ntg, 2819b9238976Sth bool_t force, bool_t time_check) 2820b9238976Sth { 2821b9238976Sth nfs4_ephemeral_tree_t *net; 2822b9238976Sth nfs4_ephemeral_tree_t *prev = NULL; 2823b9238976Sth nfs4_ephemeral_tree_t *next; 2824b9238976Sth nfs4_ephemeral_t *e; 2825b9238976Sth nfs4_ephemeral_t *prior; 2826b9238976Sth time_t now = gethrestime_sec(); 2827b9238976Sth 2828b9238976Sth nfs4_ephemeral_tree_t *harvest = NULL; 2829b9238976Sth 2830b9238976Sth int flag; 2831b9238976Sth 2832b9238976Sth mntinfo4_t *mi; 2833b9238976Sth vfs_t *vfsp; 2834b9238976Sth 2835b9238976Sth if (force) 2836d3a14591SThomas Haynes flag = MS_FORCE | MS_SYSSPACE; 2837b9238976Sth else 2838d3a14591SThomas Haynes flag = MS_SYSSPACE; 2839b9238976Sth 2840b9238976Sth mutex_enter(&ntg->ntg_forest_lock); 2841b9238976Sth for (net = ntg->ntg_forest; net != NULL; net = next) { 2842b9238976Sth next = net->net_next; 2843b9238976Sth 2844d3a14591SThomas Haynes nfs4_ephemeral_tree_hold(net); 2845b9238976Sth 2846b9238976Sth mutex_enter(&net->net_tree_lock); 2847b9238976Sth 2848b9238976Sth /* 2849b9238976Sth * Let the unmount code know that the 2850b9238976Sth * tree is already locked! 2851b9238976Sth */ 2852b9238976Sth mutex_enter(&net->net_cnt_lock); 2853b9238976Sth net->net_status |= NFS4_EPHEMERAL_TREE_LOCKED; 2854b9238976Sth mutex_exit(&net->net_cnt_lock); 2855b9238976Sth 2856b9238976Sth /* 2857b9238976Sth * If the intent is force all ephemeral nodes to 2858b9238976Sth * be unmounted in this zone, we can short circuit a 2859b9238976Sth * lot of tree traversal and simply zap the root node. 2860b9238976Sth */ 2861b9238976Sth if (force) { 2862b9238976Sth if (net->net_root) { 2863b9238976Sth mi = net->net_root->ne_mount; 2864b9238976Sth vfsp = mi->mi_vfsp; 2865b9238976Sth 2866b9238976Sth /* 2867b9238976Sth * Cleared by umount2_engine. 2868b9238976Sth */ 2869b9238976Sth VFS_HOLD(vfsp); 2870b9238976Sth 2871b9238976Sth (void) umount2_engine(vfsp, flag, 2872b9238976Sth kcred, FALSE); 2873b9238976Sth 2874b9238976Sth goto check_done; 2875b9238976Sth } 2876b9238976Sth } 2877b9238976Sth 2878b9238976Sth e = net->net_root; 2879b9238976Sth if (e) 2880b9238976Sth e->ne_state = NFS4_EPHEMERAL_VISIT_CHILD; 2881b9238976Sth 2882b9238976Sth while (e) { 2883b9238976Sth if (e->ne_state == NFS4_EPHEMERAL_VISIT_CHILD) { 2884b9238976Sth e->ne_state = NFS4_EPHEMERAL_VISIT_SIBLING; 2885b9238976Sth if (e->ne_child) { 2886b9238976Sth e = e->ne_child; 2887b9238976Sth e->ne_state = 2888b9238976Sth NFS4_EPHEMERAL_VISIT_CHILD; 2889b9238976Sth } 2890b9238976Sth 2891b9238976Sth continue; 2892b9238976Sth } else if (e->ne_state == 2893b9238976Sth NFS4_EPHEMERAL_VISIT_SIBLING) { 2894b9238976Sth e->ne_state = NFS4_EPHEMERAL_PROCESS_ME; 2895b9238976Sth if (e->ne_peer) { 2896b9238976Sth e = e->ne_peer; 2897b9238976Sth e->ne_state = 2898b9238976Sth NFS4_EPHEMERAL_VISIT_CHILD; 2899b9238976Sth } 2900b9238976Sth 2901b9238976Sth continue; 2902b9238976Sth } else if (e->ne_state == 2903b9238976Sth NFS4_EPHEMERAL_CHILD_ERROR) { 2904b9238976Sth prior = e->ne_prior; 2905b9238976Sth 2906b9238976Sth /* 2907b9238976Sth * If a child reported an error, do 2908b9238976Sth * not bother trying to unmount. 2909b9238976Sth * 2910b9238976Sth * If your prior node is a parent, 2911b9238976Sth * pass the error up such that they 2912b9238976Sth * also do not try to unmount. 2913b9238976Sth * 2914b9238976Sth * However, if your prior is a sibling, 2915b9238976Sth * let them try to unmount if they can. 2916b9238976Sth */ 2917b9238976Sth if (prior) { 2918b9238976Sth if (prior->ne_child == e) 2919b9238976Sth prior->ne_state |= 2920b9238976Sth NFS4_EPHEMERAL_CHILD_ERROR; 2921b9238976Sth else 2922b9238976Sth prior->ne_state |= 2923b9238976Sth NFS4_EPHEMERAL_PEER_ERROR; 2924b9238976Sth } 2925b9238976Sth 2926b9238976Sth /* 2927b9238976Sth * Clear the error and if needed, process peers. 2928b9238976Sth * 2929b9238976Sth * Once we mask out the error, we know whether 2930b9238976Sth * or we have to process another node. 2931b9238976Sth */ 2932b9238976Sth e->ne_state &= ~NFS4_EPHEMERAL_CHILD_ERROR; 2933b9238976Sth if (e->ne_state == NFS4_EPHEMERAL_PROCESS_ME) 2934b9238976Sth e = prior; 2935b9238976Sth 2936b9238976Sth continue; 2937b9238976Sth } else if (e->ne_state == 2938b9238976Sth NFS4_EPHEMERAL_PEER_ERROR) { 2939b9238976Sth prior = e->ne_prior; 2940b9238976Sth 2941b9238976Sth if (prior) { 2942b9238976Sth if (prior->ne_child == e) 2943b9238976Sth prior->ne_state = 2944b9238976Sth NFS4_EPHEMERAL_CHILD_ERROR; 2945b9238976Sth else 2946b9238976Sth prior->ne_state = 2947b9238976Sth NFS4_EPHEMERAL_PEER_ERROR; 2948b9238976Sth } 2949b9238976Sth 2950b9238976Sth /* 2951b9238976Sth * Clear the error from this node and do the 2952b9238976Sth * correct processing. 2953b9238976Sth */ 2954b9238976Sth e->ne_state &= ~NFS4_EPHEMERAL_PEER_ERROR; 2955b9238976Sth continue; 2956b9238976Sth } 2957b9238976Sth 2958b9238976Sth prior = e->ne_prior; 2959b9238976Sth e->ne_state = NFS4_EPHEMERAL_OK; 2960b9238976Sth 2961b9238976Sth /* 2962b9238976Sth * It must be the case that we need to process 2963b9238976Sth * this node. 2964b9238976Sth */ 2965b9238976Sth if (!time_check || 2966b9238976Sth now - e->ne_ref_time > e->ne_mount_to) { 2967b9238976Sth mi = e->ne_mount; 2968b9238976Sth vfsp = mi->mi_vfsp; 2969b9238976Sth 2970b9238976Sth /* 2971b9238976Sth * Cleared by umount2_engine. 2972b9238976Sth */ 2973b9238976Sth VFS_HOLD(vfsp); 2974b9238976Sth 2975b9238976Sth /* 2976b9238976Sth * Note that we effectively work down to the 2977b9238976Sth * leaf nodes first, try to unmount them, 2978b9238976Sth * then work our way back up into the leaf 2979b9238976Sth * nodes. 2980b9238976Sth * 2981b9238976Sth * Also note that we deal with a lot of 2982b9238976Sth * complexity by sharing the work with 2983b9238976Sth * the manual unmount code. 2984b9238976Sth */ 2985b9238976Sth nfs4_ephemeral_record_umount(vfsp, flag, 2986b9238976Sth e, prior); 2987b9238976Sth } 2988b9238976Sth 2989b9238976Sth e = prior; 2990b9238976Sth } 2991b9238976Sth 2992b9238976Sth check_done: 2993b9238976Sth 2994b9238976Sth /* 2995d3a14591SThomas Haynes * At this point we are done processing this tree. 2996d3a14591SThomas Haynes * 2997*2f172c55SRobert Thurlow * If the tree is invalid and we were the only reference 2998d3a14591SThomas Haynes * to it, then we push it on the local linked list 2999d3a14591SThomas Haynes * to remove it at the end. We avoid that action now 3000d3a14591SThomas Haynes * to keep the tree processing going along at a fair clip. 3001d3a14591SThomas Haynes * 3002*2f172c55SRobert Thurlow * Else, even if we were the only reference, we 3003*2f172c55SRobert Thurlow * allow it to be reused as needed. 3004b9238976Sth */ 3005b9238976Sth mutex_enter(&net->net_cnt_lock); 3006*2f172c55SRobert Thurlow nfs4_ephemeral_tree_decr(net); 3007*2f172c55SRobert Thurlow if (net->net_refcnt == 0 && 3008b9238976Sth net->net_status & NFS4_EPHEMERAL_TREE_INVALID) { 3009b9238976Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED; 3010b9238976Sth mutex_exit(&net->net_cnt_lock); 3011b9238976Sth mutex_exit(&net->net_tree_lock); 3012b9238976Sth 3013b9238976Sth if (prev) 3014b9238976Sth prev->net_next = net->net_next; 3015b9238976Sth else 3016b9238976Sth ntg->ntg_forest = net->net_next; 3017b9238976Sth 3018b9238976Sth net->net_next = harvest; 3019b9238976Sth harvest = net; 3020b9238976Sth continue; 3021b9238976Sth } 3022b9238976Sth 3023b9238976Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED; 3024b9238976Sth mutex_exit(&net->net_cnt_lock); 3025b9238976Sth mutex_exit(&net->net_tree_lock); 3026b9238976Sth 3027b9238976Sth prev = net; 3028b9238976Sth } 3029b9238976Sth mutex_exit(&ntg->ntg_forest_lock); 3030b9238976Sth 3031b9238976Sth for (net = harvest; net != NULL; net = next) { 3032b9238976Sth next = net->net_next; 3033b9238976Sth 3034b9238976Sth mutex_destroy(&net->net_tree_lock); 3035b9238976Sth mutex_destroy(&net->net_cnt_lock); 3036b9238976Sth kmem_free(net, sizeof (*net)); 3037b9238976Sth } 3038b9238976Sth } 3039b9238976Sth 3040b9238976Sth /* 3041b9238976Sth * This is the thread which decides when the harvesting 3042b9238976Sth * can proceed and when to kill it off for this zone. 3043b9238976Sth */ 3044b9238976Sth static void 3045b9238976Sth nfs4_ephemeral_harvester(nfs4_trigger_globals_t *ntg) 3046b9238976Sth { 3047b9238976Sth clock_t timeleft; 3048b9238976Sth zone_t *zone = curproc->p_zone; 3049b9238976Sth 3050b9238976Sth for (;;) { 3051d3d50737SRafael Vanoni timeleft = zone_status_timedwait(zone, ddi_get_lbolt() + 3052b9238976Sth nfs4_trigger_thread_timer * hz, ZONE_IS_SHUTTING_DOWN); 3053b9238976Sth 3054b9238976Sth /* 3055b9238976Sth * zone is exiting... 3056b9238976Sth */ 3057b9238976Sth if (timeleft != -1) { 3058b9238976Sth ASSERT(zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN); 3059b9238976Sth zthread_exit(); 3060b9238976Sth /* NOTREACHED */ 3061b9238976Sth } 3062b9238976Sth 3063b9238976Sth /* 3064b9238976Sth * Only bother scanning if there is potential 3065b9238976Sth * work to be done. 3066b9238976Sth */ 3067b9238976Sth if (ntg->ntg_forest == NULL) 3068b9238976Sth continue; 3069b9238976Sth 3070b9238976Sth /* 3071b9238976Sth * Now scan the list and get rid of everything which 3072b9238976Sth * is old. 3073b9238976Sth */ 3074b9238976Sth nfs4_ephemeral_harvest_forest(ntg, FALSE, TRUE); 3075b9238976Sth } 3076b9238976Sth 3077b9238976Sth /* NOTREACHED */ 3078b9238976Sth } 3079b9238976Sth 3080b9238976Sth /* 3081b9238976Sth * The zone specific glue needed to start the unmount harvester. 3082b9238976Sth * 3083b9238976Sth * Note that we want to avoid holding the mutex as long as possible, 3084b9238976Sth * hence the multiple checks. 3085b9238976Sth * 3086b9238976Sth * The caller should avoid us getting down here in the first 3087b9238976Sth * place. 3088b9238976Sth */ 3089b9238976Sth static void 3090b9238976Sth nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *ntg) 3091b9238976Sth { 3092b9238976Sth /* 3093b9238976Sth * It got started before we got here... 3094b9238976Sth */ 3095b9238976Sth if (ntg->ntg_thread_started) 3096b9238976Sth return; 3097b9238976Sth 3098b9238976Sth mutex_enter(&nfs4_ephemeral_thread_lock); 3099b9238976Sth 3100b9238976Sth if (ntg->ntg_thread_started) { 3101b9238976Sth mutex_exit(&nfs4_ephemeral_thread_lock); 3102b9238976Sth return; 3103b9238976Sth } 3104b9238976Sth 3105b9238976Sth /* 3106b9238976Sth * Start the unmounter harvester thread for this zone. 3107b9238976Sth */ 3108b9238976Sth (void) zthread_create(NULL, 0, nfs4_ephemeral_harvester, 3109b9238976Sth ntg, 0, minclsyspri); 3110b9238976Sth 3111b9238976Sth ntg->ntg_thread_started = TRUE; 3112b9238976Sth mutex_exit(&nfs4_ephemeral_thread_lock); 3113b9238976Sth } 3114b9238976Sth 3115b9238976Sth /*ARGSUSED*/ 3116b9238976Sth static void * 3117b9238976Sth nfs4_ephemeral_zsd_create(zoneid_t zoneid) 3118b9238976Sth { 3119b9238976Sth nfs4_trigger_globals_t *ntg; 3120b9238976Sth 3121b9238976Sth ntg = kmem_zalloc(sizeof (*ntg), KM_SLEEP); 3122b9238976Sth ntg->ntg_thread_started = FALSE; 3123b9238976Sth 3124b9238976Sth /* 3125b9238976Sth * This is the default.... 3126b9238976Sth */ 3127b9238976Sth ntg->ntg_mount_to = nfs4_trigger_thread_timer; 3128b9238976Sth 3129b9238976Sth mutex_init(&ntg->ntg_forest_lock, NULL, 3130b9238976Sth MUTEX_DEFAULT, NULL); 3131b9238976Sth 3132b9238976Sth return (ntg); 3133b9238976Sth } 3134b9238976Sth 3135b9238976Sth /* 3136b9238976Sth * Try a nice gentle walk down the forest and convince 3137b9238976Sth * all of the trees to gracefully give it up. 3138b9238976Sth */ 3139b9238976Sth /*ARGSUSED*/ 3140b9238976Sth static void 3141b9238976Sth nfs4_ephemeral_zsd_shutdown(zoneid_t zoneid, void *arg) 3142b9238976Sth { 3143b9238976Sth nfs4_trigger_globals_t *ntg = arg; 3144b9238976Sth 3145b9238976Sth if (!ntg) 3146b9238976Sth return; 3147b9238976Sth 3148b9238976Sth nfs4_ephemeral_harvest_forest(ntg, FALSE, FALSE); 3149b9238976Sth } 3150b9238976Sth 3151b9238976Sth /* 3152b9238976Sth * Race along the forest and rip all of the trees out by 3153b9238976Sth * their rootballs! 3154b9238976Sth */ 3155b9238976Sth /*ARGSUSED*/ 3156b9238976Sth static void 3157b9238976Sth nfs4_ephemeral_zsd_destroy(zoneid_t zoneid, void *arg) 3158b9238976Sth { 3159b9238976Sth nfs4_trigger_globals_t *ntg = arg; 3160b9238976Sth 3161b9238976Sth if (!ntg) 3162b9238976Sth return; 3163b9238976Sth 3164b9238976Sth nfs4_ephemeral_harvest_forest(ntg, TRUE, FALSE); 3165b9238976Sth 3166b9238976Sth mutex_destroy(&ntg->ntg_forest_lock); 3167b9238976Sth kmem_free(ntg, sizeof (*ntg)); 3168b9238976Sth } 3169b9238976Sth 3170b9238976Sth /* 3171b9238976Sth * This is the zone independent cleanup needed for 3172b9238976Sth * emphemeral mount processing. 3173b9238976Sth */ 3174b9238976Sth void 3175b9238976Sth nfs4_ephemeral_fini(void) 3176b9238976Sth { 3177b9238976Sth (void) zone_key_delete(nfs4_ephemeral_key); 3178b9238976Sth mutex_destroy(&nfs4_ephemeral_thread_lock); 3179b9238976Sth } 3180b9238976Sth 3181b9238976Sth /* 3182b9238976Sth * This is the zone independent initialization needed for 3183b9238976Sth * emphemeral mount processing. 3184b9238976Sth */ 3185b9238976Sth void 3186b9238976Sth nfs4_ephemeral_init(void) 3187b9238976Sth { 3188b9238976Sth mutex_init(&nfs4_ephemeral_thread_lock, NULL, MUTEX_DEFAULT, 3189b9238976Sth NULL); 3190b9238976Sth 3191b9238976Sth zone_key_create(&nfs4_ephemeral_key, nfs4_ephemeral_zsd_create, 3192b9238976Sth nfs4_ephemeral_zsd_shutdown, nfs4_ephemeral_zsd_destroy); 3193b9238976Sth } 3194b9238976Sth 3195b9238976Sth /* 3196b9238976Sth * nfssys() calls this function to set the per-zone 3197b9238976Sth * value of mount_to to drive when an ephemeral mount is 3198b9238976Sth * timed out. Each mount will grab a copy of this value 3199b9238976Sth * when mounted. 3200b9238976Sth */ 3201b9238976Sth void 3202b9238976Sth nfs4_ephemeral_set_mount_to(uint_t mount_to) 3203b9238976Sth { 3204b9238976Sth nfs4_trigger_globals_t *ntg; 3205b9238976Sth zone_t *zone = curproc->p_zone; 3206b9238976Sth 3207b9238976Sth ntg = zone_getspecific(nfs4_ephemeral_key, zone); 3208b9238976Sth 3209b9238976Sth ntg->ntg_mount_to = mount_to; 3210b9238976Sth } 3211b9238976Sth 3212b9238976Sth /* 3213b9238976Sth * Walk the list of v4 mount options; if they are currently set in vfsp, 3214b9238976Sth * append them to a new comma-separated mount option string, and return it. 3215b9238976Sth * 3216b9238976Sth * Caller should free by calling nfs4_trigger_destroy_mntopts(). 3217b9238976Sth */ 3218b9238976Sth static char * 3219b9238976Sth nfs4_trigger_create_mntopts(vfs_t *vfsp) 3220b9238976Sth { 3221b9238976Sth uint_t i; 3222b9238976Sth char *mntopts; 3223b9238976Sth struct vfssw *vswp; 3224b9238976Sth mntopts_t *optproto; 3225b9238976Sth 3226b9238976Sth mntopts = kmem_zalloc(MAX_MNTOPT_STR, KM_SLEEP); 3227b9238976Sth 3228b9238976Sth /* get the list of applicable mount options for v4; locks *vswp */ 3229b9238976Sth vswp = vfs_getvfssw(MNTTYPE_NFS4); 3230b9238976Sth optproto = &vswp->vsw_optproto; 3231b9238976Sth 3232b9238976Sth for (i = 0; i < optproto->mo_count; i++) { 3233b9238976Sth struct mntopt *mop = &optproto->mo_list[i]; 3234b9238976Sth 3235b9238976Sth if (mop->mo_flags & MO_EMPTY) 3236b9238976Sth continue; 3237b9238976Sth 3238b9238976Sth if (nfs4_trigger_add_mntopt(mntopts, mop->mo_name, vfsp)) { 3239b9238976Sth kmem_free(mntopts, MAX_MNTOPT_STR); 3240b9238976Sth vfs_unrefvfssw(vswp); 3241b9238976Sth return (NULL); 3242b9238976Sth } 3243b9238976Sth } 3244b9238976Sth 3245b9238976Sth vfs_unrefvfssw(vswp); 3246b9238976Sth 3247b9238976Sth /* 3248b9238976Sth * MNTOPT_XATTR is not in the v4 mount opt proto list, 3249b9238976Sth * and it may only be passed via MS_OPTIONSTR, so we 3250b9238976Sth * must handle it here. 3251b9238976Sth * 3252b9238976Sth * Ideally, it would be in the list, but NFS does not specify its 3253b9238976Sth * own opt proto list, it uses instead the default one. Since 3254b9238976Sth * not all filesystems support extended attrs, it would not be 3255b9238976Sth * appropriate to add it there. 3256b9238976Sth */ 3257b9238976Sth if (nfs4_trigger_add_mntopt(mntopts, MNTOPT_XATTR, vfsp) || 3258b9238976Sth nfs4_trigger_add_mntopt(mntopts, MNTOPT_NOXATTR, vfsp)) { 3259b9238976Sth kmem_free(mntopts, MAX_MNTOPT_STR); 3260b9238976Sth return (NULL); 3261b9238976Sth } 3262b9238976Sth 3263b9238976Sth return (mntopts); 3264b9238976Sth } 3265b9238976Sth 3266b9238976Sth static void 3267b9238976Sth nfs4_trigger_destroy_mntopts(char *mntopts) 3268b9238976Sth { 3269b9238976Sth if (mntopts) 3270b9238976Sth kmem_free(mntopts, MAX_MNTOPT_STR); 3271b9238976Sth } 3272b9238976Sth 3273b9238976Sth /* 3274b9238976Sth * Check a single mount option (optname). Add to mntopts if it is set in VFS. 3275b9238976Sth */ 3276b9238976Sth static int 3277b9238976Sth nfs4_trigger_add_mntopt(char *mntopts, char *optname, vfs_t *vfsp) 3278b9238976Sth { 3279b9238976Sth if (mntopts == NULL || optname == NULL || vfsp == NULL) 3280b9238976Sth return (EINVAL); 3281b9238976Sth 3282b9238976Sth if (vfs_optionisset(vfsp, optname, NULL)) { 3283b9238976Sth size_t mntoptslen = strlen(mntopts); 3284b9238976Sth size_t optnamelen = strlen(optname); 3285b9238976Sth 3286b9238976Sth /* +1 for ',', +1 for NUL */ 3287b9238976Sth if (mntoptslen + optnamelen + 2 > MAX_MNTOPT_STR) 3288b9238976Sth return (EOVERFLOW); 3289b9238976Sth 3290b9238976Sth /* first or subsequent mount option? */ 3291b9238976Sth if (*mntopts != '\0') 3292b9238976Sth (void) strcat(mntopts, ","); 3293b9238976Sth 3294b9238976Sth (void) strcat(mntopts, optname); 3295b9238976Sth } 3296b9238976Sth 3297b9238976Sth return (0); 3298b9238976Sth } 3299b9238976Sth 3300b9238976Sth static enum clnt_stat 3301*2f172c55SRobert Thurlow nfs4_ping_server_common(struct knetconfig *knc, struct netbuf *addr, int nointr) 3302b9238976Sth { 3303*2f172c55SRobert Thurlow int retries; 3304b9238976Sth uint_t max_msgsize; 3305b9238976Sth enum clnt_stat status; 3306b9238976Sth CLIENT *cl; 3307b9238976Sth struct timeval timeout; 3308b9238976Sth 3309b9238976Sth /* as per recov_newserver() */ 3310b9238976Sth max_msgsize = 0; 3311b9238976Sth retries = 1; 3312b9238976Sth timeout.tv_sec = 2; 3313b9238976Sth timeout.tv_usec = 0; 3314b9238976Sth 3315*2f172c55SRobert Thurlow if (clnt_tli_kcreate(knc, addr, NFS_PROGRAM, NFS_V4, 3316*2f172c55SRobert Thurlow max_msgsize, retries, CRED(), &cl) != 0) 3317b9238976Sth return (RPC_FAILED); 3318b9238976Sth 3319b9238976Sth if (nointr) 3320b9238976Sth cl->cl_nosignal = TRUE; 3321b9238976Sth status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL, xdr_void, NULL, 3322b9238976Sth timeout); 3323b9238976Sth if (nointr) 3324b9238976Sth cl->cl_nosignal = FALSE; 3325b9238976Sth 3326b9238976Sth AUTH_DESTROY(cl->cl_auth); 3327b9238976Sth CLNT_DESTROY(cl); 3328b9238976Sth 3329b9238976Sth return (status); 3330b9238976Sth } 3331*2f172c55SRobert Thurlow 3332*2f172c55SRobert Thurlow static enum clnt_stat 3333*2f172c55SRobert Thurlow nfs4_trigger_ping_server(servinfo4_t *svp, int nointr) 3334*2f172c55SRobert Thurlow { 3335*2f172c55SRobert Thurlow return (nfs4_ping_server_common(svp->sv_knconf, &svp->sv_addr, nointr)); 3336*2f172c55SRobert Thurlow } 3337