1*b9238976Sth /* 2*b9238976Sth * CDDL HEADER START 3*b9238976Sth * 4*b9238976Sth * The contents of this file are subject to the terms of the 5*b9238976Sth * Common Development and Distribution License (the "License"). 6*b9238976Sth * You may not use this file except in compliance with the License. 7*b9238976Sth * 8*b9238976Sth * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*b9238976Sth * or http://www.opensolaris.org/os/licensing. 10*b9238976Sth * See the License for the specific language governing permissions 11*b9238976Sth * and limitations under the License. 12*b9238976Sth * 13*b9238976Sth * When distributing Covered Code, include this CDDL HEADER in each 14*b9238976Sth * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*b9238976Sth * If applicable, add the following below this CDDL HEADER, with the 16*b9238976Sth * fields enclosed by brackets "[]" replaced with your own identifying 17*b9238976Sth * information: Portions Copyright [yyyy] [name of copyright owner] 18*b9238976Sth * 19*b9238976Sth * CDDL HEADER END 20*b9238976Sth */ 21*b9238976Sth 22*b9238976Sth /* 23*b9238976Sth * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24*b9238976Sth * Use is subject to license terms. 25*b9238976Sth */ 26*b9238976Sth 27*b9238976Sth #pragma ident "%Z%%M% %I% %E% SMI" 28*b9238976Sth 29*b9238976Sth /* 30*b9238976Sth * Support for ephemeral mounts, e.g. mirror-mounts. These mounts are 31*b9238976Sth * triggered from a "stub" rnode via a special set of vnodeops. 32*b9238976Sth */ 33*b9238976Sth 34*b9238976Sth #include <sys/param.h> 35*b9238976Sth #include <sys/types.h> 36*b9238976Sth #include <sys/systm.h> 37*b9238976Sth #include <sys/cred.h> 38*b9238976Sth #include <sys/time.h> 39*b9238976Sth #include <sys/vnode.h> 40*b9238976Sth #include <sys/vfs.h> 41*b9238976Sth #include <sys/vfs_opreg.h> 42*b9238976Sth #include <sys/file.h> 43*b9238976Sth #include <sys/filio.h> 44*b9238976Sth #include <sys/uio.h> 45*b9238976Sth #include <sys/buf.h> 46*b9238976Sth #include <sys/mman.h> 47*b9238976Sth #include <sys/pathname.h> 48*b9238976Sth #include <sys/dirent.h> 49*b9238976Sth #include <sys/debug.h> 50*b9238976Sth #include <sys/vmsystm.h> 51*b9238976Sth #include <sys/fcntl.h> 52*b9238976Sth #include <sys/flock.h> 53*b9238976Sth #include <sys/swap.h> 54*b9238976Sth #include <sys/errno.h> 55*b9238976Sth #include <sys/strsubr.h> 56*b9238976Sth #include <sys/sysmacros.h> 57*b9238976Sth #include <sys/kmem.h> 58*b9238976Sth #include <sys/mount.h> 59*b9238976Sth #include <sys/cmn_err.h> 60*b9238976Sth #include <sys/pathconf.h> 61*b9238976Sth #include <sys/utsname.h> 62*b9238976Sth #include <sys/dnlc.h> 63*b9238976Sth #include <sys/acl.h> 64*b9238976Sth #include <sys/systeminfo.h> 65*b9238976Sth #include <sys/policy.h> 66*b9238976Sth #include <sys/sdt.h> 67*b9238976Sth #include <sys/list.h> 68*b9238976Sth #include <sys/stat.h> 69*b9238976Sth #include <sys/mntent.h> 70*b9238976Sth 71*b9238976Sth #include <rpc/types.h> 72*b9238976Sth #include <rpc/auth.h> 73*b9238976Sth #include <rpc/clnt.h> 74*b9238976Sth 75*b9238976Sth #include <nfs/nfs.h> 76*b9238976Sth #include <nfs/nfs_clnt.h> 77*b9238976Sth #include <nfs/nfs_acl.h> 78*b9238976Sth #include <nfs/lm.h> 79*b9238976Sth #include <nfs/nfs4.h> 80*b9238976Sth #include <nfs/nfs4_kprot.h> 81*b9238976Sth #include <nfs/rnode4.h> 82*b9238976Sth #include <nfs/nfs4_clnt.h> 83*b9238976Sth 84*b9238976Sth #include <vm/hat.h> 85*b9238976Sth #include <vm/as.h> 86*b9238976Sth #include <vm/page.h> 87*b9238976Sth #include <vm/pvn.h> 88*b9238976Sth #include <vm/seg.h> 89*b9238976Sth #include <vm/seg_map.h> 90*b9238976Sth #include <vm/seg_kpm.h> 91*b9238976Sth #include <vm/seg_vn.h> 92*b9238976Sth 93*b9238976Sth #include <fs/fs_subr.h> 94*b9238976Sth 95*b9238976Sth #include <sys/ddi.h> 96*b9238976Sth #include <sys/int_fmtio.h> 97*b9238976Sth 98*b9238976Sth #include <util/string.h> 99*b9238976Sth 100*b9238976Sth /* 101*b9238976Sth * The automatic unmounter thread stuff! 102*b9238976Sth */ 103*b9238976Sth static int nfs4_trigger_thread_timer = 20; /* in seconds */ 104*b9238976Sth 105*b9238976Sth /* 106*b9238976Sth * Just a default.... 107*b9238976Sth */ 108*b9238976Sth static uint_t nfs4_trigger_mount_to = 240; 109*b9238976Sth 110*b9238976Sth typedef struct nfs4_trigger_globals { 111*b9238976Sth kmutex_t ntg_forest_lock; 112*b9238976Sth uint_t ntg_mount_to; 113*b9238976Sth int ntg_thread_started; 114*b9238976Sth nfs4_ephemeral_tree_t *ntg_forest; 115*b9238976Sth } nfs4_trigger_globals_t; 116*b9238976Sth 117*b9238976Sth kmutex_t nfs4_ephemeral_thread_lock; 118*b9238976Sth 119*b9238976Sth zone_key_t nfs4_ephemeral_key = ZONE_KEY_UNINITIALIZED; 120*b9238976Sth 121*b9238976Sth static void nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *); 122*b9238976Sth 123*b9238976Sth /* 124*b9238976Sth * Used for ephemeral mounts; contains data either duplicated from 125*b9238976Sth * servinfo4_t, or hand-crafted, depending on type of ephemeral mount. 126*b9238976Sth * 127*b9238976Sth * It's intended that this structure is used solely for ephemeral 128*b9238976Sth * mount-type specific data, for passing this data to 129*b9238976Sth * nfs4_trigger_nargs_create(). 130*b9238976Sth */ 131*b9238976Sth typedef struct ephemeral_servinfo { 132*b9238976Sth char *esi_hostname; 133*b9238976Sth char *esi_netname; 134*b9238976Sth char *esi_path; 135*b9238976Sth int esi_path_len; 136*b9238976Sth int esi_mount_flags; 137*b9238976Sth struct netbuf *esi_addr; 138*b9238976Sth struct netbuf *esi_syncaddr; 139*b9238976Sth struct knetconfig *esi_knconf; 140*b9238976Sth } ephemeral_servinfo_t; 141*b9238976Sth 142*b9238976Sth /* 143*b9238976Sth * Collect together the mount-type specific and generic data args. 144*b9238976Sth */ 145*b9238976Sth typedef struct domount_args { 146*b9238976Sth ephemeral_servinfo_t *dma_esi; 147*b9238976Sth char *dma_hostlist; /* comma-sep. for RO failover */ 148*b9238976Sth struct nfs_args *dma_nargs; 149*b9238976Sth } domount_args_t; 150*b9238976Sth 151*b9238976Sth 152*b9238976Sth /* 153*b9238976Sth * The vnode ops functions for a trigger stub vnode 154*b9238976Sth */ 155*b9238976Sth static int nfs4_trigger_open(vnode_t **, int, cred_t *); 156*b9238976Sth static int nfs4_trigger_getattr(vnode_t *, struct vattr *, int, cred_t *); 157*b9238976Sth static int nfs4_trigger_setattr(vnode_t *, struct vattr *, int, cred_t *, 158*b9238976Sth caller_context_t *); 159*b9238976Sth static int nfs4_trigger_access(vnode_t *, int, int, cred_t *); 160*b9238976Sth static int nfs4_trigger_readlink(vnode_t *, struct uio *, cred_t *); 161*b9238976Sth static int nfs4_trigger_lookup(vnode_t *, char *, vnode_t **, 162*b9238976Sth struct pathname *, int, vnode_t *, cred_t *); 163*b9238976Sth static int nfs4_trigger_create(vnode_t *, char *, struct vattr *, 164*b9238976Sth enum vcexcl, int, vnode_t **, cred_t *, int); 165*b9238976Sth static int nfs4_trigger_remove(vnode_t *, char *, cred_t *); 166*b9238976Sth static int nfs4_trigger_link(vnode_t *, vnode_t *, char *, cred_t *); 167*b9238976Sth static int nfs4_trigger_rename(vnode_t *, char *, vnode_t *, char *, 168*b9238976Sth cred_t *); 169*b9238976Sth static int nfs4_trigger_mkdir(vnode_t *, char *, struct vattr *, 170*b9238976Sth vnode_t **, cred_t *); 171*b9238976Sth static int nfs4_trigger_rmdir(vnode_t *, char *, vnode_t *, cred_t *); 172*b9238976Sth static int nfs4_trigger_symlink(vnode_t *, char *, struct vattr *, char *, 173*b9238976Sth cred_t *); 174*b9238976Sth static int nfs4_trigger_cmp(vnode_t *, vnode_t *); 175*b9238976Sth 176*b9238976Sth /* 177*b9238976Sth * Regular NFSv4 vnodeops that we need to reference directly 178*b9238976Sth */ 179*b9238976Sth extern int nfs4_getattr(vnode_t *, struct vattr *, int, cred_t *); 180*b9238976Sth extern void nfs4_inactive(vnode_t *, cred_t *); 181*b9238976Sth extern int nfs4_rwlock(vnode_t *, int, caller_context_t *); 182*b9238976Sth extern void nfs4_rwunlock(vnode_t *, int, caller_context_t *); 183*b9238976Sth extern int nfs4_lookup(vnode_t *, char *, vnode_t **, 184*b9238976Sth struct pathname *, int, vnode_t *, cred_t *); 185*b9238976Sth extern int nfs4_pathconf(vnode_t *, int, ulong_t *, cred_t *); 186*b9238976Sth extern int nfs4_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *); 187*b9238976Sth extern int nfs4_fid(vnode_t *, fid_t *); 188*b9238976Sth extern int nfs4_realvp(vnode_t *, vnode_t **); 189*b9238976Sth 190*b9238976Sth static int nfs4_trigger_mount(vnode_t *, vnode_t **); 191*b9238976Sth static int nfs4_trigger_domount(vnode_t *, domount_args_t *, vfs_t **, 192*b9238976Sth cred_t *); 193*b9238976Sth static domount_args_t *nfs4_trigger_domount_args_create(vnode_t *); 194*b9238976Sth static void nfs4_trigger_domount_args_destroy(domount_args_t *dma, 195*b9238976Sth vnode_t *vp); 196*b9238976Sth static ephemeral_servinfo_t *nfs4_trigger_esi_create(vnode_t *, servinfo4_t *); 197*b9238976Sth static void nfs4_trigger_esi_destroy(ephemeral_servinfo_t *, vnode_t *); 198*b9238976Sth static ephemeral_servinfo_t *nfs4_trigger_esi_create_mirrormount(vnode_t *, 199*b9238976Sth servinfo4_t *); 200*b9238976Sth static struct nfs_args *nfs4_trigger_nargs_create(mntinfo4_t *, servinfo4_t *, 201*b9238976Sth ephemeral_servinfo_t *); 202*b9238976Sth static void nfs4_trigger_nargs_destroy(struct nfs_args *); 203*b9238976Sth static char *nfs4_trigger_create_mntopts(vfs_t *); 204*b9238976Sth static void nfs4_trigger_destroy_mntopts(char *); 205*b9238976Sth static int nfs4_trigger_add_mntopt(char *, char *, vfs_t *); 206*b9238976Sth static enum clnt_stat nfs4_trigger_ping_server(servinfo4_t *, int); 207*b9238976Sth 208*b9238976Sth extern int umount2_engine(vfs_t *, int, cred_t *, int); 209*b9238976Sth 210*b9238976Sth 211*b9238976Sth vnodeops_t *nfs4_trigger_vnodeops; 212*b9238976Sth 213*b9238976Sth /* 214*b9238976Sth * These are the vnodeops that we must define for stub vnodes. 215*b9238976Sth * 216*b9238976Sth * 217*b9238976Sth * Many of the VOPs defined for NFSv4 do not need to be defined here, 218*b9238976Sth * for various reasons. This will result in the VFS default function being 219*b9238976Sth * used: 220*b9238976Sth * 221*b9238976Sth * - These VOPs require a previous VOP_OPEN to have occurred. That will have 222*b9238976Sth * lost the reference to the stub vnode, meaning these should not be called: 223*b9238976Sth * close, read, write, ioctl, readdir, seek. 224*b9238976Sth * 225*b9238976Sth * - These VOPs are meaningless for vnodes without data pages. Since the 226*b9238976Sth * stub vnode is of type VDIR, these should not be called: 227*b9238976Sth * space, getpage, putpage, map, addmap, delmap, pageio, fsync. 228*b9238976Sth * 229*b9238976Sth * - These VOPs are otherwise not applicable, and should not be called: 230*b9238976Sth * dump, setsecattr. 231*b9238976Sth * 232*b9238976Sth * 233*b9238976Sth * These VOPs we do not want to define, but nor do we want the VFS default 234*b9238976Sth * action. Instead, we specify the VFS error function, with fs_error(), but 235*b9238976Sth * note that fs_error() is not actually called. Instead it results in the 236*b9238976Sth * use of the error function defined for the particular VOP, in vn_ops_table[]: 237*b9238976Sth * 238*b9238976Sth * - frlock, dispose, shrlock. 239*b9238976Sth * 240*b9238976Sth * 241*b9238976Sth * These VOPs we define to use the corresponding regular NFSv4 vnodeop. 242*b9238976Sth * NOTE: if any of these ops involve an OTW call with the stub FH, then 243*b9238976Sth * that call must be wrapped with save_mnt_secinfo()/check_mnt_secinfo() 244*b9238976Sth * to protect the security data in the servinfo4_t for the "parent" 245*b9238976Sth * filesystem that contains the stub. 246*b9238976Sth * 247*b9238976Sth * - These VOPs should not trigger a mount, so that "ls -l" does not: 248*b9238976Sth * pathconf, getsecattr. 249*b9238976Sth * 250*b9238976Sth * - These VOPs would not make sense to trigger: 251*b9238976Sth * inactive, rwlock, rwunlock, fid, realvp. 252*b9238976Sth */ 253*b9238976Sth const fs_operation_def_t nfs4_trigger_vnodeops_template[] = { 254*b9238976Sth VOPNAME_OPEN, { .vop_open = nfs4_trigger_open }, 255*b9238976Sth VOPNAME_GETATTR, { .vop_getattr = nfs4_trigger_getattr }, 256*b9238976Sth VOPNAME_SETATTR, { .vop_setattr = nfs4_trigger_setattr }, 257*b9238976Sth VOPNAME_ACCESS, { .vop_access = nfs4_trigger_access }, 258*b9238976Sth VOPNAME_LOOKUP, { .vop_lookup = nfs4_trigger_lookup }, 259*b9238976Sth VOPNAME_CREATE, { .vop_create = nfs4_trigger_create }, 260*b9238976Sth VOPNAME_REMOVE, { .vop_remove = nfs4_trigger_remove }, 261*b9238976Sth VOPNAME_LINK, { .vop_link = nfs4_trigger_link }, 262*b9238976Sth VOPNAME_RENAME, { .vop_rename = nfs4_trigger_rename }, 263*b9238976Sth VOPNAME_MKDIR, { .vop_mkdir = nfs4_trigger_mkdir }, 264*b9238976Sth VOPNAME_RMDIR, { .vop_rmdir = nfs4_trigger_rmdir }, 265*b9238976Sth VOPNAME_SYMLINK, { .vop_symlink = nfs4_trigger_symlink }, 266*b9238976Sth VOPNAME_READLINK, { .vop_readlink = nfs4_trigger_readlink }, 267*b9238976Sth VOPNAME_INACTIVE, { .vop_inactive = nfs4_inactive }, 268*b9238976Sth VOPNAME_FID, { .vop_fid = nfs4_fid }, 269*b9238976Sth VOPNAME_RWLOCK, { .vop_rwlock = nfs4_rwlock }, 270*b9238976Sth VOPNAME_RWUNLOCK, { .vop_rwunlock = nfs4_rwunlock }, 271*b9238976Sth VOPNAME_REALVP, { .vop_realvp = nfs4_realvp }, 272*b9238976Sth VOPNAME_GETSECATTR, { .vop_getsecattr = nfs4_getsecattr }, 273*b9238976Sth VOPNAME_PATHCONF, { .vop_pathconf = nfs4_pathconf }, 274*b9238976Sth VOPNAME_FRLOCK, { .error = fs_error }, 275*b9238976Sth VOPNAME_DISPOSE, { .error = fs_error }, 276*b9238976Sth VOPNAME_SHRLOCK, { .error = fs_error }, 277*b9238976Sth VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 278*b9238976Sth NULL, NULL 279*b9238976Sth }; 280*b9238976Sth 281*b9238976Sth /* 282*b9238976Sth * Trigger ops for stub vnodes; for mirror mounts, etc. 283*b9238976Sth * 284*b9238976Sth * The general idea is that a "triggering" op will first call 285*b9238976Sth * nfs4_trigger_mount(), which will find out whether a mount has already 286*b9238976Sth * been triggered. 287*b9238976Sth * 288*b9238976Sth * If it has, then nfs4_trigger_mount() sets newvp to the root vnode 289*b9238976Sth * of the covering vfs. 290*b9238976Sth * 291*b9238976Sth * If a mount has not yet been triggered, nfs4_trigger_mount() will do so, 292*b9238976Sth * and again set newvp, as above. 293*b9238976Sth * 294*b9238976Sth * The triggering op may then re-issue the VOP by calling it on newvp. 295*b9238976Sth * 296*b9238976Sth * Note that some ops may perform custom action, and may or may not need 297*b9238976Sth * to trigger a mount. 298*b9238976Sth * 299*b9238976Sth * Some ops need to call the regular NFSv4 vnodeop for a stub vnode. We 300*b9238976Sth * obviously can't do this with VOP_<whatever>, since it's a stub vnode 301*b9238976Sth * and that would just recurse. Instead, we call the v4 op directly, 302*b9238976Sth * by name. This is OK, since we know that the vnode is for NFSv4, 303*b9238976Sth * otherwise it couldn't be a stub. 304*b9238976Sth * 305*b9238976Sth */ 306*b9238976Sth 307*b9238976Sth static int 308*b9238976Sth nfs4_trigger_open(vnode_t **vpp, int flag, cred_t *cr) 309*b9238976Sth { 310*b9238976Sth int error; 311*b9238976Sth vnode_t *newvp; 312*b9238976Sth 313*b9238976Sth error = nfs4_trigger_mount(*vpp, &newvp); 314*b9238976Sth if (error) 315*b9238976Sth return (error); 316*b9238976Sth 317*b9238976Sth /* Release the stub vnode, as we're losing the reference to it */ 318*b9238976Sth VN_RELE(*vpp); 319*b9238976Sth 320*b9238976Sth /* Give the caller the root vnode of the newly-mounted fs */ 321*b9238976Sth *vpp = newvp; 322*b9238976Sth 323*b9238976Sth /* return with VN_HELD(newvp) */ 324*b9238976Sth return (VOP_OPEN(vpp, flag, cr)); 325*b9238976Sth } 326*b9238976Sth 327*b9238976Sth /* 328*b9238976Sth * For the majority of cases, nfs4_trigger_getattr() will not trigger 329*b9238976Sth * a mount. However, if ATTR_TRIGGER is set, we are being informed 330*b9238976Sth * that we need to force the mount before we attempt to determine 331*b9238976Sth * the attributes. The intent is an atomic operation for security 332*b9238976Sth * testing. 333*b9238976Sth */ 334*b9238976Sth static int 335*b9238976Sth nfs4_trigger_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr) 336*b9238976Sth { 337*b9238976Sth int error; 338*b9238976Sth 339*b9238976Sth if (flags & ATTR_TRIGGER) { 340*b9238976Sth vnode_t *newvp; 341*b9238976Sth 342*b9238976Sth error = nfs4_trigger_mount(vp, &newvp); 343*b9238976Sth if (error) 344*b9238976Sth return (error); 345*b9238976Sth 346*b9238976Sth error = VOP_GETATTR(newvp, vap, flags, cr); 347*b9238976Sth VN_RELE(newvp); 348*b9238976Sth } else { 349*b9238976Sth error = nfs4_getattr(vp, vap, flags, cr); 350*b9238976Sth } 351*b9238976Sth 352*b9238976Sth return (error); 353*b9238976Sth } 354*b9238976Sth 355*b9238976Sth static int 356*b9238976Sth nfs4_trigger_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, 357*b9238976Sth caller_context_t *ct) 358*b9238976Sth { 359*b9238976Sth int error; 360*b9238976Sth vnode_t *newvp; 361*b9238976Sth 362*b9238976Sth error = nfs4_trigger_mount(vp, &newvp); 363*b9238976Sth if (error) 364*b9238976Sth return (error); 365*b9238976Sth 366*b9238976Sth error = VOP_SETATTR(newvp, vap, flags, cr, ct); 367*b9238976Sth VN_RELE(newvp); 368*b9238976Sth 369*b9238976Sth return (error); 370*b9238976Sth } 371*b9238976Sth 372*b9238976Sth static int 373*b9238976Sth nfs4_trigger_access(vnode_t *vp, int mode, int flags, cred_t *cr) 374*b9238976Sth { 375*b9238976Sth int error; 376*b9238976Sth vnode_t *newvp; 377*b9238976Sth 378*b9238976Sth error = nfs4_trigger_mount(vp, &newvp); 379*b9238976Sth if (error) 380*b9238976Sth return (error); 381*b9238976Sth 382*b9238976Sth error = VOP_ACCESS(newvp, mode, flags, cr); 383*b9238976Sth VN_RELE(newvp); 384*b9238976Sth 385*b9238976Sth return (error); 386*b9238976Sth } 387*b9238976Sth 388*b9238976Sth static int 389*b9238976Sth nfs4_trigger_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, 390*b9238976Sth int flags, vnode_t *rdir, cred_t *cr) 391*b9238976Sth { 392*b9238976Sth int error; 393*b9238976Sth vnode_t *newdvp; 394*b9238976Sth rnode4_t *drp = VTOR4(dvp); 395*b9238976Sth 396*b9238976Sth ASSERT(RP_ISSTUB(drp)); 397*b9238976Sth 398*b9238976Sth /* for now, we only support mirror-mounts */ 399*b9238976Sth ASSERT(RP_ISSTUB_MIRRORMOUNT(drp)); 400*b9238976Sth 401*b9238976Sth /* 402*b9238976Sth * It's not legal to lookup ".." for an fs root, so we mustn't pass 403*b9238976Sth * that up. Instead, pass onto the regular op, regardless of whether 404*b9238976Sth * we've triggered a mount. 405*b9238976Sth */ 406*b9238976Sth if (strcmp(nm, "..") == 0) 407*b9238976Sth return (nfs4_lookup(dvp, nm, vpp, pnp, flags, rdir, cr)); 408*b9238976Sth 409*b9238976Sth error = nfs4_trigger_mount(dvp, &newdvp); 410*b9238976Sth if (error) 411*b9238976Sth return (error); 412*b9238976Sth 413*b9238976Sth error = VOP_LOOKUP(newdvp, nm, vpp, pnp, flags, rdir, cr); 414*b9238976Sth VN_RELE(newdvp); 415*b9238976Sth 416*b9238976Sth return (error); 417*b9238976Sth } 418*b9238976Sth 419*b9238976Sth static int 420*b9238976Sth nfs4_trigger_create(vnode_t *dvp, char *nm, struct vattr *va, 421*b9238976Sth enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr, 422*b9238976Sth int flags) 423*b9238976Sth { 424*b9238976Sth int error; 425*b9238976Sth vnode_t *newdvp; 426*b9238976Sth 427*b9238976Sth error = nfs4_trigger_mount(dvp, &newdvp); 428*b9238976Sth if (error) 429*b9238976Sth return (error); 430*b9238976Sth 431*b9238976Sth error = VOP_CREATE(newdvp, nm, va, exclusive, mode, vpp, cr, flags); 432*b9238976Sth VN_RELE(newdvp); 433*b9238976Sth 434*b9238976Sth return (error); 435*b9238976Sth } 436*b9238976Sth 437*b9238976Sth static int 438*b9238976Sth nfs4_trigger_remove(vnode_t *dvp, char *nm, cred_t *cr) 439*b9238976Sth { 440*b9238976Sth int error; 441*b9238976Sth vnode_t *newdvp; 442*b9238976Sth 443*b9238976Sth error = nfs4_trigger_mount(dvp, &newdvp); 444*b9238976Sth if (error) 445*b9238976Sth return (error); 446*b9238976Sth 447*b9238976Sth error = VOP_REMOVE(newdvp, nm, cr); 448*b9238976Sth VN_RELE(newdvp); 449*b9238976Sth 450*b9238976Sth return (error); 451*b9238976Sth } 452*b9238976Sth 453*b9238976Sth static int 454*b9238976Sth nfs4_trigger_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr) 455*b9238976Sth { 456*b9238976Sth int error; 457*b9238976Sth vnode_t *newtdvp; 458*b9238976Sth 459*b9238976Sth error = nfs4_trigger_mount(tdvp, &newtdvp); 460*b9238976Sth if (error) 461*b9238976Sth return (error); 462*b9238976Sth 463*b9238976Sth /* 464*b9238976Sth * We don't check whether svp is a stub. Let the NFSv4 code 465*b9238976Sth * detect that error, and return accordingly. 466*b9238976Sth */ 467*b9238976Sth error = VOP_LINK(newtdvp, svp, tnm, cr); 468*b9238976Sth VN_RELE(newtdvp); 469*b9238976Sth 470*b9238976Sth return (error); 471*b9238976Sth } 472*b9238976Sth 473*b9238976Sth static int 474*b9238976Sth nfs4_trigger_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, 475*b9238976Sth cred_t *cr) 476*b9238976Sth { 477*b9238976Sth int error; 478*b9238976Sth vnode_t *newsdvp; 479*b9238976Sth rnode4_t *tdrp = VTOR4(tdvp); 480*b9238976Sth 481*b9238976Sth /* 482*b9238976Sth * We know that sdvp is a stub, otherwise we would not be here. 483*b9238976Sth * 484*b9238976Sth * If tdvp is also be a stub, there are two possibilities: it 485*b9238976Sth * is either the same stub as sdvp [i.e. VN_CMP(sdvp, tdvp)] 486*b9238976Sth * or it is a different stub [!VN_CMP(sdvp, tdvp)]. 487*b9238976Sth * 488*b9238976Sth * In the former case, just trigger sdvp, and treat tdvp as 489*b9238976Sth * though it were not a stub. 490*b9238976Sth * 491*b9238976Sth * In the latter case, it might be a different stub for the 492*b9238976Sth * same server fs as sdvp, or for a different server fs. 493*b9238976Sth * Regardless, from the client perspective this would still 494*b9238976Sth * be a cross-filesystem rename, and should not be allowed, 495*b9238976Sth * so return EXDEV, without triggering either mount. 496*b9238976Sth */ 497*b9238976Sth if (RP_ISSTUB(tdrp) && !VN_CMP(sdvp, tdvp)) 498*b9238976Sth return (EXDEV); 499*b9238976Sth 500*b9238976Sth error = nfs4_trigger_mount(sdvp, &newsdvp); 501*b9238976Sth if (error) 502*b9238976Sth return (error); 503*b9238976Sth 504*b9238976Sth error = VOP_RENAME(newsdvp, snm, tdvp, tnm, cr); 505*b9238976Sth 506*b9238976Sth VN_RELE(newsdvp); 507*b9238976Sth 508*b9238976Sth return (error); 509*b9238976Sth } 510*b9238976Sth 511*b9238976Sth static int 512*b9238976Sth nfs4_trigger_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp, 513*b9238976Sth cred_t *cr) 514*b9238976Sth { 515*b9238976Sth int error; 516*b9238976Sth vnode_t *newdvp; 517*b9238976Sth 518*b9238976Sth error = nfs4_trigger_mount(dvp, &newdvp); 519*b9238976Sth if (error) 520*b9238976Sth return (error); 521*b9238976Sth 522*b9238976Sth error = VOP_MKDIR(newdvp, nm, va, vpp, cr); 523*b9238976Sth VN_RELE(newdvp); 524*b9238976Sth 525*b9238976Sth return (error); 526*b9238976Sth } 527*b9238976Sth 528*b9238976Sth static int 529*b9238976Sth nfs4_trigger_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr) 530*b9238976Sth { 531*b9238976Sth int error; 532*b9238976Sth vnode_t *newdvp; 533*b9238976Sth 534*b9238976Sth error = nfs4_trigger_mount(dvp, &newdvp); 535*b9238976Sth if (error) 536*b9238976Sth return (error); 537*b9238976Sth 538*b9238976Sth error = VOP_RMDIR(newdvp, nm, cdir, cr); 539*b9238976Sth VN_RELE(newdvp); 540*b9238976Sth 541*b9238976Sth return (error); 542*b9238976Sth } 543*b9238976Sth 544*b9238976Sth static int 545*b9238976Sth nfs4_trigger_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm, 546*b9238976Sth cred_t *cr) 547*b9238976Sth { 548*b9238976Sth int error; 549*b9238976Sth vnode_t *newdvp; 550*b9238976Sth 551*b9238976Sth error = nfs4_trigger_mount(dvp, &newdvp); 552*b9238976Sth if (error) 553*b9238976Sth return (error); 554*b9238976Sth 555*b9238976Sth error = VOP_SYMLINK(newdvp, lnm, tva, tnm, cr); 556*b9238976Sth VN_RELE(newdvp); 557*b9238976Sth 558*b9238976Sth return (error); 559*b9238976Sth } 560*b9238976Sth 561*b9238976Sth static int 562*b9238976Sth nfs4_trigger_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr) 563*b9238976Sth { 564*b9238976Sth int error; 565*b9238976Sth vnode_t *newvp; 566*b9238976Sth 567*b9238976Sth error = nfs4_trigger_mount(vp, &newvp); 568*b9238976Sth if (error) 569*b9238976Sth return (error); 570*b9238976Sth 571*b9238976Sth error = VOP_READLINK(newvp, uiop, cr); 572*b9238976Sth VN_RELE(newvp); 573*b9238976Sth 574*b9238976Sth return (error); 575*b9238976Sth } 576*b9238976Sth 577*b9238976Sth /* end of trigger vnode ops */ 578*b9238976Sth 579*b9238976Sth 580*b9238976Sth /* 581*b9238976Sth * Mount upon a trigger vnode; for mirror-mounts, etc. 582*b9238976Sth * 583*b9238976Sth * The mount may have already occurred, via another thread. If not, 584*b9238976Sth * assemble the location information - which may require fetching - and 585*b9238976Sth * perform the mount. 586*b9238976Sth * 587*b9238976Sth * Sets newvp to be the root of the fs that is now covering vp. Note 588*b9238976Sth * that we return with VN_HELD(*newvp). 589*b9238976Sth * 590*b9238976Sth * The caller is responsible for passing the VOP onto the covering fs. 591*b9238976Sth */ 592*b9238976Sth static int 593*b9238976Sth nfs4_trigger_mount(vnode_t *vp, vnode_t **newvpp) 594*b9238976Sth { 595*b9238976Sth int error; 596*b9238976Sth vfs_t *vfsp; 597*b9238976Sth rnode4_t *rp = VTOR4(vp); 598*b9238976Sth mntinfo4_t *mi = VTOMI4(vp); 599*b9238976Sth domount_args_t *dma; 600*b9238976Sth 601*b9238976Sth nfs4_ephemeral_tree_t *net; 602*b9238976Sth 603*b9238976Sth bool_t must_unlock = FALSE; 604*b9238976Sth bool_t is_building = FALSE; 605*b9238976Sth 606*b9238976Sth cred_t *zcred; 607*b9238976Sth 608*b9238976Sth nfs4_trigger_globals_t *ntg; 609*b9238976Sth 610*b9238976Sth zone_t *zone = curproc->p_zone; 611*b9238976Sth 612*b9238976Sth ASSERT(RP_ISSTUB(rp)); 613*b9238976Sth 614*b9238976Sth /* for now, we only support mirror-mounts */ 615*b9238976Sth ASSERT(RP_ISSTUB_MIRRORMOUNT(rp)); 616*b9238976Sth 617*b9238976Sth *newvpp = NULL; 618*b9238976Sth 619*b9238976Sth /* 620*b9238976Sth * Has the mount already occurred? 621*b9238976Sth */ 622*b9238976Sth error = vn_vfsrlock_wait(vp); 623*b9238976Sth if (error) 624*b9238976Sth goto done; 625*b9238976Sth vfsp = vn_mountedvfs(vp); 626*b9238976Sth if (vfsp != NULL) { 627*b9238976Sth /* the mount has already occurred */ 628*b9238976Sth error = VFS_ROOT(vfsp, newvpp); 629*b9238976Sth if (!error) { 630*b9238976Sth /* need to update the reference time */ 631*b9238976Sth mutex_enter(&mi->mi_lock); 632*b9238976Sth if (mi->mi_ephemeral) 633*b9238976Sth mi->mi_ephemeral->ne_ref_time = 634*b9238976Sth gethrestime_sec(); 635*b9238976Sth mutex_exit(&mi->mi_lock); 636*b9238976Sth } 637*b9238976Sth 638*b9238976Sth vn_vfsunlock(vp); 639*b9238976Sth goto done; 640*b9238976Sth } 641*b9238976Sth vn_vfsunlock(vp); 642*b9238976Sth 643*b9238976Sth ntg = zone_getspecific(nfs4_ephemeral_key, zone); 644*b9238976Sth ASSERT(ntg != NULL); 645*b9238976Sth 646*b9238976Sth mutex_enter(&mi->mi_lock); 647*b9238976Sth 648*b9238976Sth /* 649*b9238976Sth * We need to lock down the ephemeral tree. 650*b9238976Sth */ 651*b9238976Sth if (mi->mi_ephemeral_tree == NULL) { 652*b9238976Sth net = kmem_zalloc(sizeof (*net), KM_SLEEP); 653*b9238976Sth mutex_init(&net->net_tree_lock, NULL, MUTEX_DEFAULT, NULL); 654*b9238976Sth mutex_init(&net->net_cnt_lock, NULL, MUTEX_DEFAULT, NULL); 655*b9238976Sth net->net_refcnt = 1; 656*b9238976Sth net->net_status = NFS4_EPHEMERAL_TREE_BUILDING; 657*b9238976Sth is_building = TRUE; 658*b9238976Sth 659*b9238976Sth /* 660*b9238976Sth * We need to add it to the zone specific list for 661*b9238976Sth * automatic unmounting and harvesting of deadwood. 662*b9238976Sth */ 663*b9238976Sth mutex_enter(&ntg->ntg_forest_lock); 664*b9238976Sth if (ntg->ntg_forest != NULL) 665*b9238976Sth net->net_next = ntg->ntg_forest; 666*b9238976Sth ntg->ntg_forest = net; 667*b9238976Sth mutex_exit(&ntg->ntg_forest_lock); 668*b9238976Sth 669*b9238976Sth /* 670*b9238976Sth * No lock order confusion with mi_lock because no 671*b9238976Sth * other node could have grabbed net_tree_lock. 672*b9238976Sth */ 673*b9238976Sth mutex_enter(&net->net_tree_lock); 674*b9238976Sth mi->mi_ephemeral_tree = net; 675*b9238976Sth net->net_mount = mi; 676*b9238976Sth mutex_exit(&mi->mi_lock); 677*b9238976Sth } else { 678*b9238976Sth net = mi->mi_ephemeral_tree; 679*b9238976Sth mutex_exit(&mi->mi_lock); 680*b9238976Sth 681*b9238976Sth mutex_enter(&net->net_cnt_lock); 682*b9238976Sth net->net_refcnt++; 683*b9238976Sth mutex_exit(&net->net_cnt_lock); 684*b9238976Sth 685*b9238976Sth /* 686*b9238976Sth * Note that we do not do any checks to 687*b9238976Sth * see if the parent has been nuked. 688*b9238976Sth * We count on the vfs layer having protected 689*b9238976Sth * us from feet shooters. 690*b9238976Sth */ 691*b9238976Sth mutex_enter(&net->net_tree_lock); 692*b9238976Sth } 693*b9238976Sth 694*b9238976Sth mutex_enter(&net->net_cnt_lock); 695*b9238976Sth net->net_status |= NFS4_EPHEMERAL_TREE_MOUNTING; 696*b9238976Sth mutex_exit(&net->net_cnt_lock); 697*b9238976Sth 698*b9238976Sth must_unlock = TRUE; 699*b9238976Sth 700*b9238976Sth dma = nfs4_trigger_domount_args_create(vp); 701*b9238976Sth if (dma == NULL) { 702*b9238976Sth error = EINVAL; 703*b9238976Sth goto done; 704*b9238976Sth } 705*b9238976Sth 706*b9238976Sth /* 707*b9238976Sth * Need to be root for this call to make mount work. 708*b9238976Sth * Note that since we define mirror mounts to work 709*b9238976Sth * for any user, we allow the mount to proceed. And 710*b9238976Sth * we realize that the server will perform security 711*b9238976Sth * checks to make sure that the client is allowed 712*b9238976Sth * access. Finally, once the mount takes place, 713*b9238976Sth * directory permissions will ensure that the 714*b9238976Sth * content is secure. 715*b9238976Sth */ 716*b9238976Sth zcred = zone_get_kcred(getzoneid()); 717*b9238976Sth ASSERT(zcred != NULL); 718*b9238976Sth 719*b9238976Sth error = nfs4_trigger_domount(vp, dma, &vfsp, zcred); 720*b9238976Sth nfs4_trigger_domount_args_destroy(dma, vp); 721*b9238976Sth 722*b9238976Sth crfree(zcred); 723*b9238976Sth 724*b9238976Sth if (!error) 725*b9238976Sth error = VFS_ROOT(vfsp, newvpp); 726*b9238976Sth done: 727*b9238976Sth if (must_unlock) { 728*b9238976Sth mutex_enter(&net->net_cnt_lock); 729*b9238976Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_MOUNTING; 730*b9238976Sth if (is_building) 731*b9238976Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_BUILDING; 732*b9238976Sth net->net_refcnt--; 733*b9238976Sth mutex_exit(&net->net_cnt_lock); 734*b9238976Sth 735*b9238976Sth mutex_exit(&net->net_tree_lock); 736*b9238976Sth } 737*b9238976Sth 738*b9238976Sth if (!error && (newvpp == NULL || *newvpp == NULL)) 739*b9238976Sth error = ENOSYS; 740*b9238976Sth 741*b9238976Sth return (error); 742*b9238976Sth } 743*b9238976Sth 744*b9238976Sth /* 745*b9238976Sth * Collect together both the generic & mount-type specific args. 746*b9238976Sth */ 747*b9238976Sth static domount_args_t * 748*b9238976Sth nfs4_trigger_domount_args_create(vnode_t *vp) 749*b9238976Sth { 750*b9238976Sth int nointr; 751*b9238976Sth char *hostlist; 752*b9238976Sth servinfo4_t *svp; 753*b9238976Sth struct nfs_args *nargs, *nargs_head; 754*b9238976Sth enum clnt_stat status; 755*b9238976Sth ephemeral_servinfo_t *esi, *esi_first; 756*b9238976Sth domount_args_t *dma; 757*b9238976Sth mntinfo4_t *mi = VTOMI4(vp); 758*b9238976Sth 759*b9238976Sth nointr = !(mi->mi_flags & MI4_INT); 760*b9238976Sth hostlist = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 761*b9238976Sth 762*b9238976Sth svp = mi->mi_curr_serv; 763*b9238976Sth /* check if the current server is responding */ 764*b9238976Sth status = nfs4_trigger_ping_server(svp, nointr); 765*b9238976Sth if (status == RPC_SUCCESS) { 766*b9238976Sth esi_first = nfs4_trigger_esi_create(vp, svp); 767*b9238976Sth if (esi_first == NULL) { 768*b9238976Sth kmem_free(hostlist, MAXPATHLEN); 769*b9238976Sth return (NULL); 770*b9238976Sth } 771*b9238976Sth 772*b9238976Sth (void) strlcpy(hostlist, esi_first->esi_hostname, MAXPATHLEN); 773*b9238976Sth 774*b9238976Sth nargs_head = nfs4_trigger_nargs_create(mi, svp, esi_first); 775*b9238976Sth } else { 776*b9238976Sth /* current server did not respond */ 777*b9238976Sth esi_first = NULL; 778*b9238976Sth nargs_head = NULL; 779*b9238976Sth } 780*b9238976Sth nargs = nargs_head; 781*b9238976Sth 782*b9238976Sth /* 783*b9238976Sth * NFS RO failover. 784*b9238976Sth * 785*b9238976Sth * If we have multiple servinfo4 structures, linked via sv_next, 786*b9238976Sth * we must create one nfs_args for each, linking the nfs_args via 787*b9238976Sth * nfs_ext_u.nfs_extB.next. 788*b9238976Sth * 789*b9238976Sth * We need to build a corresponding esi for each, too, but that is 790*b9238976Sth * used solely for building nfs_args, and may be immediately 791*b9238976Sth * discarded, as domount() requires the info from just one esi, 792*b9238976Sth * but all the nfs_args. 793*b9238976Sth * 794*b9238976Sth * Currently, the NFS mount code will hang if not all servers 795*b9238976Sth * requested are available. To avoid that, we need to ping each 796*b9238976Sth * server, here, and remove it from the list if it is not 797*b9238976Sth * responding. This has the side-effect of that server then 798*b9238976Sth * being permanently unavailable for this failover mount, even if 799*b9238976Sth * it recovers. That's unfortunate, but the best we can do until 800*b9238976Sth * the mount code path is fixed. 801*b9238976Sth */ 802*b9238976Sth 803*b9238976Sth /* 804*b9238976Sth * If the current server was down, loop indefinitely until we find 805*b9238976Sth * at least one responsive server. 806*b9238976Sth */ 807*b9238976Sth do { 808*b9238976Sth /* no locking needed for sv_next; it is only set at fs mount */ 809*b9238976Sth for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 810*b9238976Sth struct nfs_args *next; 811*b9238976Sth 812*b9238976Sth /* 813*b9238976Sth * nargs_head: the head of the nfs_args list 814*b9238976Sth * nargs: the current tail of the list 815*b9238976Sth * next: the newly-created element to be added 816*b9238976Sth */ 817*b9238976Sth 818*b9238976Sth /* 819*b9238976Sth * We've already tried the current server, above; 820*b9238976Sth * if it was responding, we have already included it 821*b9238976Sth * and it may now be ignored. 822*b9238976Sth * 823*b9238976Sth * Otherwise, try it again, since it may now have 824*b9238976Sth * recovered. 825*b9238976Sth */ 826*b9238976Sth if (svp == mi->mi_curr_serv && esi_first != NULL) 827*b9238976Sth continue; 828*b9238976Sth 829*b9238976Sth (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 830*b9238976Sth if (svp->sv_flags & SV4_NOTINUSE) { 831*b9238976Sth nfs_rw_exit(&svp->sv_lock); 832*b9238976Sth continue; 833*b9238976Sth } 834*b9238976Sth nfs_rw_exit(&svp->sv_lock); 835*b9238976Sth 836*b9238976Sth /* check if the server is responding */ 837*b9238976Sth status = nfs4_trigger_ping_server(svp, nointr); 838*b9238976Sth /* if the server did not respond, ignore it */ 839*b9238976Sth if (status != RPC_SUCCESS) 840*b9238976Sth continue; 841*b9238976Sth 842*b9238976Sth esi = nfs4_trigger_esi_create(vp, svp); 843*b9238976Sth if (esi == NULL) 844*b9238976Sth continue; 845*b9238976Sth 846*b9238976Sth /* 847*b9238976Sth * If the original current server (mi_curr_serv) 848*b9238976Sth * was down when when we first tried it, 849*b9238976Sth * (i.e. esi_first == NULL), 850*b9238976Sth * we select this new server (svp) to be the server 851*b9238976Sth * that we will actually contact (esi_first). 852*b9238976Sth * 853*b9238976Sth * Note that it's possible that mi_curr_serv == svp, 854*b9238976Sth * if that mi_curr_serv was down but has now recovered. 855*b9238976Sth */ 856*b9238976Sth next = nfs4_trigger_nargs_create(mi, svp, esi); 857*b9238976Sth if (esi_first == NULL) { 858*b9238976Sth ASSERT(nargs == NULL); 859*b9238976Sth ASSERT(nargs_head == NULL); 860*b9238976Sth nargs_head = next; 861*b9238976Sth esi_first = esi; 862*b9238976Sth (void) strlcpy(hostlist, 863*b9238976Sth esi_first->esi_hostname, MAXPATHLEN); 864*b9238976Sth } else { 865*b9238976Sth ASSERT(nargs_head != NULL); 866*b9238976Sth nargs->nfs_ext_u.nfs_extB.next = next; 867*b9238976Sth (void) strlcat(hostlist, ",", MAXPATHLEN); 868*b9238976Sth (void) strlcat(hostlist, esi->esi_hostname, 869*b9238976Sth MAXPATHLEN); 870*b9238976Sth /* esi was only needed for hostname & nargs */ 871*b9238976Sth nfs4_trigger_esi_destroy(esi, vp); 872*b9238976Sth } 873*b9238976Sth 874*b9238976Sth nargs = next; 875*b9238976Sth } 876*b9238976Sth 877*b9238976Sth /* if we've had no response at all, wait a second */ 878*b9238976Sth if (esi_first == NULL) 879*b9238976Sth delay(drv_usectohz(1000000)); 880*b9238976Sth 881*b9238976Sth } while (esi_first == NULL); 882*b9238976Sth ASSERT(nargs_head != NULL); 883*b9238976Sth 884*b9238976Sth dma = kmem_zalloc(sizeof (domount_args_t), KM_SLEEP); 885*b9238976Sth dma->dma_esi = esi_first; 886*b9238976Sth dma->dma_hostlist = hostlist; 887*b9238976Sth dma->dma_nargs = nargs_head; 888*b9238976Sth 889*b9238976Sth return (dma); 890*b9238976Sth } 891*b9238976Sth 892*b9238976Sth static void 893*b9238976Sth nfs4_trigger_domount_args_destroy(domount_args_t *dma, vnode_t *vp) 894*b9238976Sth { 895*b9238976Sth if (dma != NULL) { 896*b9238976Sth if (dma->dma_esi != NULL && vp != NULL) 897*b9238976Sth nfs4_trigger_esi_destroy(dma->dma_esi, vp); 898*b9238976Sth 899*b9238976Sth if (dma->dma_hostlist != NULL) 900*b9238976Sth kmem_free(dma->dma_hostlist, MAXPATHLEN); 901*b9238976Sth 902*b9238976Sth if (dma->dma_nargs != NULL) { 903*b9238976Sth struct nfs_args *nargs = dma->dma_nargs; 904*b9238976Sth 905*b9238976Sth do { 906*b9238976Sth struct nfs_args *next = 907*b9238976Sth nargs->nfs_ext_u.nfs_extB.next; 908*b9238976Sth 909*b9238976Sth nfs4_trigger_nargs_destroy(nargs); 910*b9238976Sth nargs = next; 911*b9238976Sth } while (nargs != NULL); 912*b9238976Sth } 913*b9238976Sth 914*b9238976Sth kmem_free(dma, sizeof (domount_args_t)); 915*b9238976Sth } 916*b9238976Sth } 917*b9238976Sth 918*b9238976Sth /* 919*b9238976Sth * The ephemeral_servinfo_t struct contains basic information we will need to 920*b9238976Sth * perform the mount. Whilst the structure is generic across different 921*b9238976Sth * types of ephemeral mount, the way we gather its contents differs. 922*b9238976Sth */ 923*b9238976Sth static ephemeral_servinfo_t * 924*b9238976Sth nfs4_trigger_esi_create(vnode_t *vp, servinfo4_t *svp) 925*b9238976Sth { 926*b9238976Sth ephemeral_servinfo_t *esi; 927*b9238976Sth rnode4_t *rp = VTOR4(vp); 928*b9238976Sth 929*b9238976Sth ASSERT(RP_ISSTUB(rp)); 930*b9238976Sth 931*b9238976Sth /* Call the ephemeral type-specific routine */ 932*b9238976Sth if (RP_ISSTUB_MIRRORMOUNT(rp)) 933*b9238976Sth esi = nfs4_trigger_esi_create_mirrormount(vp, svp); 934*b9238976Sth else 935*b9238976Sth esi = NULL; 936*b9238976Sth 937*b9238976Sth /* for now, we only support mirror-mounts */ 938*b9238976Sth ASSERT(esi != NULL); 939*b9238976Sth 940*b9238976Sth return (esi); 941*b9238976Sth } 942*b9238976Sth 943*b9238976Sth static void 944*b9238976Sth nfs4_trigger_esi_destroy(ephemeral_servinfo_t *esi, vnode_t *vp) 945*b9238976Sth { 946*b9238976Sth rnode4_t *rp = VTOR4(vp); 947*b9238976Sth 948*b9238976Sth ASSERT(RP_ISSTUB(rp)); 949*b9238976Sth 950*b9238976Sth /* for now, we only support mirror-mounts */ 951*b9238976Sth ASSERT(RP_ISSTUB_MIRRORMOUNT(rp)); 952*b9238976Sth 953*b9238976Sth /* Currently, no need for an ephemeral type-specific routine */ 954*b9238976Sth 955*b9238976Sth /* 956*b9238976Sth * The contents of ephemeral_servinfo_t goes into nfs_args, 957*b9238976Sth * and will be handled by nfs4_trigger_nargs_destroy(). 958*b9238976Sth * We need only free the structure itself. 959*b9238976Sth */ 960*b9238976Sth if (esi != NULL) 961*b9238976Sth kmem_free(esi, sizeof (ephemeral_servinfo_t)); 962*b9238976Sth } 963*b9238976Sth 964*b9238976Sth /* 965*b9238976Sth * Some of this may turn out to be common with other ephemeral types, 966*b9238976Sth * in which case it should be moved to nfs4_trigger_esi_create(), or a 967*b9238976Sth * common function called. 968*b9238976Sth */ 969*b9238976Sth static ephemeral_servinfo_t * 970*b9238976Sth nfs4_trigger_esi_create_mirrormount(vnode_t *vp, servinfo4_t *svp) 971*b9238976Sth { 972*b9238976Sth char *stubpath; 973*b9238976Sth struct knetconfig *sikncp, *svkncp; 974*b9238976Sth struct netbuf *bufp; 975*b9238976Sth ephemeral_servinfo_t *esi; 976*b9238976Sth 977*b9238976Sth esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP); 978*b9238976Sth 979*b9238976Sth /* initially set to be our type of ephemeral mount; may be added to */ 980*b9238976Sth esi->esi_mount_flags = NFSMNT_MIRRORMOUNT; 981*b9238976Sth 982*b9238976Sth /* 983*b9238976Sth * We're copying info from the stub rnode's servinfo4, but 984*b9238976Sth * we must create new copies, not pointers, since this information 985*b9238976Sth * is to be associated with the new mount, which will be 986*b9238976Sth * unmounted (and its structures freed) separately 987*b9238976Sth */ 988*b9238976Sth 989*b9238976Sth /* 990*b9238976Sth * Sizes passed to kmem_[z]alloc here must match those freed 991*b9238976Sth * in nfs4_free_args() 992*b9238976Sth */ 993*b9238976Sth 994*b9238976Sth /* 995*b9238976Sth * We hold sv_lock across kmem_zalloc() calls that may sleep, but this 996*b9238976Sth * is difficult to avoid: as we need to read svp to calculate the 997*b9238976Sth * sizes to be allocated. 998*b9238976Sth */ 999*b9238976Sth (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1000*b9238976Sth 1001*b9238976Sth esi->esi_hostname = kmem_zalloc(strlen(svp->sv_hostname) + 1, KM_SLEEP); 1002*b9238976Sth (void) strcat(esi->esi_hostname, svp->sv_hostname); 1003*b9238976Sth 1004*b9238976Sth esi->esi_addr = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 1005*b9238976Sth bufp = esi->esi_addr; 1006*b9238976Sth bufp->len = svp->sv_addr.len; 1007*b9238976Sth bufp->maxlen = svp->sv_addr.maxlen; 1008*b9238976Sth bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1009*b9238976Sth bcopy(svp->sv_addr.buf, bufp->buf, bufp->len); 1010*b9238976Sth 1011*b9238976Sth esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP); 1012*b9238976Sth sikncp = esi->esi_knconf; 1013*b9238976Sth svkncp = svp->sv_knconf; 1014*b9238976Sth sikncp->knc_semantics = svkncp->knc_semantics; 1015*b9238976Sth sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1016*b9238976Sth (void) strcat((char *)sikncp->knc_protofmly, 1017*b9238976Sth (char *)svkncp->knc_protofmly); 1018*b9238976Sth sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 1019*b9238976Sth (void) strcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto); 1020*b9238976Sth sikncp->knc_rdev = svkncp->knc_rdev; 1021*b9238976Sth 1022*b9238976Sth /* 1023*b9238976Sth * Used when AUTH_DH is negotiated. 1024*b9238976Sth * 1025*b9238976Sth * This is ephemeral mount-type specific, since it contains the 1026*b9238976Sth * server's time-sync syncaddr. 1027*b9238976Sth */ 1028*b9238976Sth if (svp->sv_dhsec) { 1029*b9238976Sth struct netbuf *bufp; 1030*b9238976Sth sec_data_t *sdata; 1031*b9238976Sth dh_k4_clntdata_t *data; 1032*b9238976Sth 1033*b9238976Sth sdata = svp->sv_dhsec; 1034*b9238976Sth data = (dh_k4_clntdata_t *)sdata->data; 1035*b9238976Sth ASSERT(sdata->rpcflavor == AUTH_DH); 1036*b9238976Sth 1037*b9238976Sth bufp = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 1038*b9238976Sth bufp->len = data->syncaddr.len; 1039*b9238976Sth bufp->maxlen = data->syncaddr.maxlen; 1040*b9238976Sth bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP); 1041*b9238976Sth bcopy(data->syncaddr.buf, bufp->buf, bufp->len); 1042*b9238976Sth esi->esi_syncaddr = bufp; 1043*b9238976Sth 1044*b9238976Sth if (data->netname != NULL) { 1045*b9238976Sth int nmlen = data->netnamelen; 1046*b9238976Sth 1047*b9238976Sth /* 1048*b9238976Sth * We need to copy from a dh_k4_clntdata_t 1049*b9238976Sth * netname/netnamelen pair to a NUL-terminated 1050*b9238976Sth * netname string suitable for putting in nfs_args, 1051*b9238976Sth * where the latter has no netnamelen field. 1052*b9238976Sth */ 1053*b9238976Sth esi->esi_netname = kmem_zalloc(nmlen + 1, KM_SLEEP); 1054*b9238976Sth bcopy(data->netname, esi->esi_netname, nmlen); 1055*b9238976Sth } 1056*b9238976Sth } else { 1057*b9238976Sth esi->esi_syncaddr = NULL; 1058*b9238976Sth esi->esi_netname = NULL; 1059*b9238976Sth } 1060*b9238976Sth 1061*b9238976Sth stubpath = fn_path(VTOSV(vp)->sv_name); 1062*b9238976Sth /* step over initial '.', to avoid e.g. sv_path: "/tank./ws" */ 1063*b9238976Sth ASSERT(*stubpath == '.'); 1064*b9238976Sth stubpath += 1; 1065*b9238976Sth 1066*b9238976Sth /* for nfs_args->fh */ 1067*b9238976Sth esi->esi_path_len = strlen(svp->sv_path) + strlen(stubpath) + 1; 1068*b9238976Sth esi->esi_path = kmem_zalloc(esi->esi_path_len, KM_SLEEP); 1069*b9238976Sth (void) strcat(esi->esi_path, svp->sv_path); 1070*b9238976Sth (void) strcat(esi->esi_path, stubpath); 1071*b9238976Sth 1072*b9238976Sth stubpath -= 1; 1073*b9238976Sth /* stubpath allocated by fn_path() */ 1074*b9238976Sth kmem_free(stubpath, strlen(stubpath) + 1); 1075*b9238976Sth 1076*b9238976Sth nfs_rw_exit(&svp->sv_lock); 1077*b9238976Sth 1078*b9238976Sth return (esi); 1079*b9238976Sth } 1080*b9238976Sth 1081*b9238976Sth /* 1082*b9238976Sth * Assemble the args, and call the generic VFS mount function to 1083*b9238976Sth * finally perform the ephemeral mount. 1084*b9238976Sth */ 1085*b9238976Sth static int 1086*b9238976Sth nfs4_trigger_domount(vnode_t *stubvp, domount_args_t *dma, vfs_t **vfsp, 1087*b9238976Sth cred_t *cr) 1088*b9238976Sth { 1089*b9238976Sth struct mounta *uap; 1090*b9238976Sth char *mntpt, *orig_path, *path; 1091*b9238976Sth const char *orig_mntpt; 1092*b9238976Sth int retval; 1093*b9238976Sth int mntpt_len; 1094*b9238976Sth int spec_len; 1095*b9238976Sth zone_t *zone = curproc->p_zone; 1096*b9238976Sth bool_t has_leading_slash; 1097*b9238976Sth 1098*b9238976Sth vfs_t *stubvfsp = stubvp->v_vfsp; 1099*b9238976Sth ephemeral_servinfo_t *esi = dma->dma_esi; 1100*b9238976Sth struct nfs_args *nargs = dma->dma_nargs; 1101*b9238976Sth 1102*b9238976Sth /* first, construct the mount point for the ephemeral mount */ 1103*b9238976Sth orig_path = path = fn_path(VTOSV(stubvp)->sv_name); 1104*b9238976Sth orig_mntpt = (char *)refstr_value(stubvfsp->vfs_mntpt); 1105*b9238976Sth 1106*b9238976Sth if (*orig_path == '.') 1107*b9238976Sth orig_path++; 1108*b9238976Sth 1109*b9238976Sth /* 1110*b9238976Sth * Get rid of zone's root path 1111*b9238976Sth */ 1112*b9238976Sth if (zone != global_zone) { 1113*b9238976Sth /* 1114*b9238976Sth * -1 for trailing '/' and -1 for EOS. 1115*b9238976Sth */ 1116*b9238976Sth if (strncmp(zone->zone_rootpath, orig_mntpt, 1117*b9238976Sth zone->zone_rootpathlen - 1) == 0) { 1118*b9238976Sth orig_mntpt += (zone->zone_rootpathlen - 2); 1119*b9238976Sth } 1120*b9238976Sth } 1121*b9238976Sth 1122*b9238976Sth mntpt_len = strlen(orig_mntpt) + strlen(orig_path); 1123*b9238976Sth mntpt = kmem_zalloc(mntpt_len + 1, KM_SLEEP); 1124*b9238976Sth (void) strcat(mntpt, orig_mntpt); 1125*b9238976Sth (void) strcat(mntpt, orig_path); 1126*b9238976Sth 1127*b9238976Sth kmem_free(path, strlen(path) + 1); 1128*b9238976Sth path = esi->esi_path; 1129*b9238976Sth if (*path == '.') 1130*b9238976Sth path++; 1131*b9238976Sth if (path[0] == '/' && path[1] == '/') 1132*b9238976Sth path++; 1133*b9238976Sth has_leading_slash = (*path == '/'); 1134*b9238976Sth 1135*b9238976Sth spec_len = strlen(dma->dma_hostlist); 1136*b9238976Sth spec_len += strlen(path); 1137*b9238976Sth 1138*b9238976Sth /* We are going to have to add this in */ 1139*b9238976Sth if (!has_leading_slash) 1140*b9238976Sth spec_len++; 1141*b9238976Sth 1142*b9238976Sth /* We need to get the ':' for dma_hostlist:esi_path */ 1143*b9238976Sth spec_len++; 1144*b9238976Sth 1145*b9238976Sth uap = kmem_zalloc(sizeof (struct mounta), KM_SLEEP); 1146*b9238976Sth uap->spec = kmem_zalloc(spec_len + 1, KM_SLEEP); 1147*b9238976Sth (void) snprintf(uap->spec, spec_len + 1, "%s:%s%s", dma->dma_hostlist, 1148*b9238976Sth has_leading_slash ? "" : "/", path); 1149*b9238976Sth 1150*b9238976Sth uap->dir = mntpt; 1151*b9238976Sth 1152*b9238976Sth uap->flags = MS_SYSSPACE | MS_DATA; 1153*b9238976Sth /* fstype-independent mount options not covered elsewhere */ 1154*b9238976Sth /* copy parent's mount(1M) "-m" flag */ 1155*b9238976Sth if (stubvfsp->vfs_flag & VFS_NOMNTTAB) 1156*b9238976Sth uap->flags |= MS_NOMNTTAB; 1157*b9238976Sth 1158*b9238976Sth uap->fstype = MNTTYPE_NFS4; 1159*b9238976Sth uap->dataptr = (char *)nargs; 1160*b9238976Sth /* not needed for MS_SYSSPACE */ 1161*b9238976Sth uap->datalen = 0; 1162*b9238976Sth 1163*b9238976Sth /* use optptr to pass in extra mount options */ 1164*b9238976Sth uap->flags |= MS_OPTIONSTR; 1165*b9238976Sth uap->optptr = nfs4_trigger_create_mntopts(stubvfsp); 1166*b9238976Sth if (uap->optptr == NULL) { 1167*b9238976Sth retval = EINVAL; 1168*b9238976Sth goto done; 1169*b9238976Sth } 1170*b9238976Sth /* domount() expects us to count the trailing NUL */ 1171*b9238976Sth uap->optlen = strlen(uap->optptr) + 1; 1172*b9238976Sth 1173*b9238976Sth retval = domount(NULL, uap, stubvp, cr, vfsp); 1174*b9238976Sth if (retval == 0) 1175*b9238976Sth VFS_RELE(*vfsp); 1176*b9238976Sth done: 1177*b9238976Sth if (uap->optptr) 1178*b9238976Sth nfs4_trigger_destroy_mntopts(uap->optptr); 1179*b9238976Sth 1180*b9238976Sth kmem_free(uap->spec, spec_len + 1); 1181*b9238976Sth kmem_free(uap, sizeof (struct mounta)); 1182*b9238976Sth kmem_free(mntpt, mntpt_len + 1); 1183*b9238976Sth 1184*b9238976Sth return (retval); 1185*b9238976Sth } 1186*b9238976Sth 1187*b9238976Sth /* 1188*b9238976Sth * Build an nfs_args structure for passing to domount(). 1189*b9238976Sth * 1190*b9238976Sth * Ephemeral mount-type specific data comes from the ephemeral_servinfo_t; 1191*b9238976Sth * generic data - common to all ephemeral mount types - is read directly 1192*b9238976Sth * from the parent mount's servinfo4_t and mntinfo4_t, via the stub vnode. 1193*b9238976Sth */ 1194*b9238976Sth static struct nfs_args * 1195*b9238976Sth nfs4_trigger_nargs_create(mntinfo4_t *mi, servinfo4_t *svp, 1196*b9238976Sth ephemeral_servinfo_t *esi) 1197*b9238976Sth { 1198*b9238976Sth sec_data_t *secdata; 1199*b9238976Sth struct nfs_args *nargs; 1200*b9238976Sth 1201*b9238976Sth /* setup the nfs args */ 1202*b9238976Sth nargs = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP); 1203*b9238976Sth 1204*b9238976Sth (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1205*b9238976Sth 1206*b9238976Sth nargs->addr = esi->esi_addr; 1207*b9238976Sth 1208*b9238976Sth /* for AUTH_DH by negotiation */ 1209*b9238976Sth if (esi->esi_syncaddr || esi->esi_netname) { 1210*b9238976Sth nargs->flags |= NFSMNT_SECURE; 1211*b9238976Sth nargs->syncaddr = esi->esi_syncaddr; 1212*b9238976Sth nargs->netname = esi->esi_netname; 1213*b9238976Sth } 1214*b9238976Sth 1215*b9238976Sth nargs->flags |= NFSMNT_KNCONF; 1216*b9238976Sth nargs->knconf = esi->esi_knconf; 1217*b9238976Sth nargs->flags |= NFSMNT_HOSTNAME; 1218*b9238976Sth nargs->hostname = esi->esi_hostname; 1219*b9238976Sth nargs->fh = esi->esi_path; 1220*b9238976Sth 1221*b9238976Sth /* general mount settings, all copied from parent mount */ 1222*b9238976Sth mutex_enter(&mi->mi_lock); 1223*b9238976Sth 1224*b9238976Sth if (!(mi->mi_flags & MI4_HARD)) 1225*b9238976Sth nargs->flags |= NFSMNT_SOFT; 1226*b9238976Sth 1227*b9238976Sth nargs->flags |= NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_TIMEO | 1228*b9238976Sth NFSMNT_RETRANS; 1229*b9238976Sth nargs->wsize = mi->mi_stsize; 1230*b9238976Sth nargs->rsize = mi->mi_tsize; 1231*b9238976Sth nargs->timeo = mi->mi_timeo; 1232*b9238976Sth nargs->retrans = mi->mi_retrans; 1233*b9238976Sth 1234*b9238976Sth if (mi->mi_flags & MI4_INT) 1235*b9238976Sth nargs->flags |= NFSMNT_INT; 1236*b9238976Sth if (mi->mi_flags & MI4_NOAC) 1237*b9238976Sth nargs->flags |= NFSMNT_NOAC; 1238*b9238976Sth 1239*b9238976Sth nargs->flags |= NFSMNT_ACREGMIN | NFSMNT_ACREGMAX | NFSMNT_ACDIRMIN | 1240*b9238976Sth NFSMNT_ACDIRMAX; 1241*b9238976Sth nargs->acregmin = HR2SEC(mi->mi_acregmin); 1242*b9238976Sth nargs->acregmax = HR2SEC(mi->mi_acregmax); 1243*b9238976Sth nargs->acdirmin = HR2SEC(mi->mi_acdirmin); 1244*b9238976Sth nargs->acdirmax = HR2SEC(mi->mi_acdirmax); 1245*b9238976Sth 1246*b9238976Sth if (mi->mi_flags & MI4_NOCTO) 1247*b9238976Sth nargs->flags |= NFSMNT_NOCTO; 1248*b9238976Sth if (mi->mi_flags & MI4_GRPID) 1249*b9238976Sth nargs->flags |= NFSMNT_GRPID; 1250*b9238976Sth if (mi->mi_flags & MI4_LLOCK) 1251*b9238976Sth nargs->flags |= NFSMNT_LLOCK; 1252*b9238976Sth if (mi->mi_flags & MI4_NOPRINT) 1253*b9238976Sth nargs->flags |= NFSMNT_NOPRINT; 1254*b9238976Sth if (mi->mi_flags & MI4_DIRECTIO) 1255*b9238976Sth nargs->flags |= NFSMNT_DIRECTIO; 1256*b9238976Sth if (mi->mi_flags & MI4_PUBLIC) 1257*b9238976Sth nargs->flags |= NFSMNT_PUBLIC; 1258*b9238976Sth 1259*b9238976Sth mutex_exit(&mi->mi_lock); 1260*b9238976Sth 1261*b9238976Sth /* add any specific flags for this type of ephemeral mount */ 1262*b9238976Sth nargs->flags |= esi->esi_mount_flags; 1263*b9238976Sth 1264*b9238976Sth /* 1265*b9238976Sth * Security data & negotiation policy. 1266*b9238976Sth * 1267*b9238976Sth * We need to preserve the parent mount's preference for security 1268*b9238976Sth * negotiation, translating SV4_TRYSECDEFAULT -> NFSMNT_SECDEFAULT. 1269*b9238976Sth * 1270*b9238976Sth * If SV4_TRYSECDEFAULT is not set, that indicates that a specific 1271*b9238976Sth * security flavour was requested, with data in sv_secdata, and that 1272*b9238976Sth * no negotiation should occur. If this specified flavour fails, that's 1273*b9238976Sth * it. We will copy sv_secdata, and not set NFSMNT_SECDEFAULT. 1274*b9238976Sth * 1275*b9238976Sth * If SV4_TRYSECDEFAULT is set, then we start with a passed-in 1276*b9238976Sth * default flavour, in sv_secdata, but then negotiate a new flavour. 1277*b9238976Sth * Possible flavours are recorded in an array in sv_secinfo, with 1278*b9238976Sth * currently in-use flavour pointed to by sv_currsec. 1279*b9238976Sth * 1280*b9238976Sth * If sv_currsec is set, i.e. if negotiation has already occurred, 1281*b9238976Sth * we will copy sv_currsec. Otherwise, copy sv_secdata. Regardless, 1282*b9238976Sth * we will set NFSMNT_SECDEFAULT, to enable negotiation. 1283*b9238976Sth */ 1284*b9238976Sth if (svp->sv_flags & SV4_TRYSECDEFAULT) { 1285*b9238976Sth /* enable negotiation for ephemeral mount */ 1286*b9238976Sth nargs->flags |= NFSMNT_SECDEFAULT; 1287*b9238976Sth 1288*b9238976Sth /* 1289*b9238976Sth * As a starting point for negotiation, copy parent 1290*b9238976Sth * mount's negotiated flavour (sv_currsec) if available, 1291*b9238976Sth * or its passed-in flavour (sv_secdata) if not. 1292*b9238976Sth */ 1293*b9238976Sth if (svp->sv_currsec != NULL) 1294*b9238976Sth secdata = copy_sec_data(svp->sv_currsec); 1295*b9238976Sth else if (svp->sv_secdata != NULL) 1296*b9238976Sth secdata = copy_sec_data(svp->sv_secdata); 1297*b9238976Sth else 1298*b9238976Sth secdata = NULL; 1299*b9238976Sth } else { 1300*b9238976Sth /* do not enable negotiation; copy parent's passed-in flavour */ 1301*b9238976Sth if (svp->sv_secdata != NULL) 1302*b9238976Sth secdata = copy_sec_data(svp->sv_secdata); 1303*b9238976Sth else 1304*b9238976Sth secdata = NULL; 1305*b9238976Sth } 1306*b9238976Sth 1307*b9238976Sth nfs_rw_exit(&svp->sv_lock); 1308*b9238976Sth 1309*b9238976Sth nargs->flags |= NFSMNT_NEWARGS; 1310*b9238976Sth nargs->nfs_args_ext = NFS_ARGS_EXTB; 1311*b9238976Sth nargs->nfs_ext_u.nfs_extB.secdata = secdata; 1312*b9238976Sth 1313*b9238976Sth /* for NFS RO failover; caller will set if necessary */ 1314*b9238976Sth nargs->nfs_ext_u.nfs_extB.next = NULL; 1315*b9238976Sth 1316*b9238976Sth return (nargs); 1317*b9238976Sth } 1318*b9238976Sth 1319*b9238976Sth static void 1320*b9238976Sth nfs4_trigger_nargs_destroy(struct nfs_args *nargs) 1321*b9238976Sth { 1322*b9238976Sth /* 1323*b9238976Sth * Either the mount failed, in which case the data is not needed, or 1324*b9238976Sth * nfs4_mount() has either taken copies of what it needs or, 1325*b9238976Sth * where it has merely copied the ptr, it has set *our* ptr to NULL, 1326*b9238976Sth * whereby nfs4_free_args() will ignore it. 1327*b9238976Sth */ 1328*b9238976Sth nfs4_free_args(nargs); 1329*b9238976Sth kmem_free(nargs, sizeof (struct nfs_args)); 1330*b9238976Sth } 1331*b9238976Sth 1332*b9238976Sth /* 1333*b9238976Sth * When we finally get into the mounting, we need to add this 1334*b9238976Sth * node to the ephemeral tree. 1335*b9238976Sth * 1336*b9238976Sth * This is called from nfs4_mount(). 1337*b9238976Sth */ 1338*b9238976Sth void 1339*b9238976Sth nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp) 1340*b9238976Sth { 1341*b9238976Sth mntinfo4_t *mi_parent; 1342*b9238976Sth nfs4_ephemeral_t *eph; 1343*b9238976Sth nfs4_ephemeral_tree_t *net; 1344*b9238976Sth 1345*b9238976Sth nfs4_ephemeral_t *prior; 1346*b9238976Sth nfs4_ephemeral_t *child; 1347*b9238976Sth 1348*b9238976Sth nfs4_ephemeral_t *peer; 1349*b9238976Sth 1350*b9238976Sth nfs4_trigger_globals_t *ntg; 1351*b9238976Sth zone_t *zone = curproc->p_zone; 1352*b9238976Sth 1353*b9238976Sth mi_parent = VTOMI4(mvp); 1354*b9238976Sth 1355*b9238976Sth /* 1356*b9238976Sth * Get this before grabbing anything else! 1357*b9238976Sth */ 1358*b9238976Sth ntg = zone_getspecific(nfs4_ephemeral_key, zone); 1359*b9238976Sth if (!ntg->ntg_thread_started) { 1360*b9238976Sth nfs4_ephemeral_start_harvester(ntg); 1361*b9238976Sth } 1362*b9238976Sth 1363*b9238976Sth mutex_enter(&mi_parent->mi_lock); 1364*b9238976Sth mutex_enter(&mi->mi_lock); 1365*b9238976Sth 1366*b9238976Sth /* 1367*b9238976Sth * We need to tack together the ephemeral mount 1368*b9238976Sth * with this new mntinfo. 1369*b9238976Sth */ 1370*b9238976Sth eph = kmem_zalloc(sizeof (*eph), KM_SLEEP); 1371*b9238976Sth eph->ne_mount = mi; 1372*b9238976Sth eph->ne_ref_time = gethrestime_sec(); 1373*b9238976Sth 1374*b9238976Sth /* 1375*b9238976Sth * We need to tell the ephemeral mount when 1376*b9238976Sth * to time out. 1377*b9238976Sth */ 1378*b9238976Sth eph->ne_mount_to = ntg->ntg_mount_to; 1379*b9238976Sth 1380*b9238976Sth mi->mi_flags |= MI4_EPHEMERAL; 1381*b9238976Sth mi->mi_ephemeral = eph; 1382*b9238976Sth 1383*b9238976Sth net = mi->mi_ephemeral_tree = 1384*b9238976Sth mi_parent->mi_ephemeral_tree; 1385*b9238976Sth ASSERT(net != NULL); 1386*b9238976Sth 1387*b9238976Sth /* 1388*b9238976Sth * If the enclosing mntinfo4 is also ephemeral, 1389*b9238976Sth * then we need to point to its enclosing parent. 1390*b9238976Sth * Else the enclosing mntinfo4 is the enclosing parent. 1391*b9238976Sth * 1392*b9238976Sth * We also need to weave this ephemeral node 1393*b9238976Sth * into the tree. 1394*b9238976Sth */ 1395*b9238976Sth if (mi_parent->mi_flags & MI4_EPHEMERAL) { 1396*b9238976Sth /* 1397*b9238976Sth * We need to decide if we are 1398*b9238976Sth * the root node of this branch 1399*b9238976Sth * or if we are a sibling of this 1400*b9238976Sth * branch. 1401*b9238976Sth */ 1402*b9238976Sth prior = mi_parent->mi_ephemeral; 1403*b9238976Sth ASSERT(prior != NULL); 1404*b9238976Sth if (prior->ne_child == NULL) { 1405*b9238976Sth prior->ne_child = eph; 1406*b9238976Sth } else { 1407*b9238976Sth child = prior->ne_child; 1408*b9238976Sth 1409*b9238976Sth prior->ne_child = eph; 1410*b9238976Sth eph->ne_peer = child; 1411*b9238976Sth 1412*b9238976Sth child->ne_prior = eph; 1413*b9238976Sth } 1414*b9238976Sth 1415*b9238976Sth eph->ne_prior = prior; 1416*b9238976Sth } else { 1417*b9238976Sth /* 1418*b9238976Sth * The parent mntinfo4 is the non-ephemeral 1419*b9238976Sth * root of the ephemeral tree. We 1420*b9238976Sth * need to decide if we are the root 1421*b9238976Sth * node of that tree or if we are a 1422*b9238976Sth * sibling of the root node. 1423*b9238976Sth * 1424*b9238976Sth * We are the root if there is no 1425*b9238976Sth * other node. 1426*b9238976Sth */ 1427*b9238976Sth if (net->net_root == NULL) { 1428*b9238976Sth net->net_root = eph; 1429*b9238976Sth } else { 1430*b9238976Sth eph->ne_peer = peer = net->net_root; 1431*b9238976Sth ASSERT(peer != NULL); 1432*b9238976Sth net->net_root = eph; 1433*b9238976Sth 1434*b9238976Sth peer->ne_prior = eph; 1435*b9238976Sth } 1436*b9238976Sth 1437*b9238976Sth eph->ne_prior = NULL; 1438*b9238976Sth } 1439*b9238976Sth 1440*b9238976Sth mutex_exit(&mi->mi_lock); 1441*b9238976Sth mutex_exit(&mi_parent->mi_lock); 1442*b9238976Sth } 1443*b9238976Sth 1444*b9238976Sth /* 1445*b9238976Sth * Commit the changes to the ephemeral tree for removing this node. 1446*b9238976Sth */ 1447*b9238976Sth static void 1448*b9238976Sth nfs4_ephemeral_umount_cleanup(nfs4_ephemeral_t *eph) 1449*b9238976Sth { 1450*b9238976Sth nfs4_ephemeral_t *e = eph; 1451*b9238976Sth nfs4_ephemeral_t *peer; 1452*b9238976Sth nfs4_ephemeral_t *prior; 1453*b9238976Sth 1454*b9238976Sth peer = eph->ne_peer; 1455*b9238976Sth prior = e->ne_prior; 1456*b9238976Sth 1457*b9238976Sth /* 1458*b9238976Sth * If this branch root was not the 1459*b9238976Sth * tree root, then we need to fix back pointers. 1460*b9238976Sth */ 1461*b9238976Sth if (prior) { 1462*b9238976Sth if (prior->ne_child == e) { 1463*b9238976Sth prior->ne_child = peer; 1464*b9238976Sth } else { 1465*b9238976Sth prior->ne_peer = peer; 1466*b9238976Sth } 1467*b9238976Sth 1468*b9238976Sth if (peer) 1469*b9238976Sth peer->ne_prior = prior; 1470*b9238976Sth } else if (peer) { 1471*b9238976Sth peer->ne_mount->mi_ephemeral_tree->net_root = peer; 1472*b9238976Sth peer->ne_prior = NULL; 1473*b9238976Sth } else { 1474*b9238976Sth e->ne_mount->mi_ephemeral_tree->net_root = NULL; 1475*b9238976Sth } 1476*b9238976Sth } 1477*b9238976Sth 1478*b9238976Sth /* 1479*b9238976Sth * We want to avoid recursion at all costs. So we need to 1480*b9238976Sth * unroll the tree. We do this by a depth first traversal to 1481*b9238976Sth * leaf nodes. We blast away the leaf and work our way back 1482*b9238976Sth * up and down the tree. 1483*b9238976Sth */ 1484*b9238976Sth static int 1485*b9238976Sth nfs4_ephemeral_unmount_engine(nfs4_ephemeral_t *eph, 1486*b9238976Sth int isTreeRoot, int flag, cred_t *cr) 1487*b9238976Sth { 1488*b9238976Sth nfs4_ephemeral_t *e = eph; 1489*b9238976Sth nfs4_ephemeral_t *prior; 1490*b9238976Sth mntinfo4_t *mi; 1491*b9238976Sth vfs_t *vfsp; 1492*b9238976Sth int error; 1493*b9238976Sth 1494*b9238976Sth /* 1495*b9238976Sth * We use the loop while unrolling the ephemeral tree. 1496*b9238976Sth */ 1497*b9238976Sth for (;;) { 1498*b9238976Sth /* 1499*b9238976Sth * First we walk down the child. 1500*b9238976Sth */ 1501*b9238976Sth if (e->ne_child) { 1502*b9238976Sth prior = e; 1503*b9238976Sth e = e->ne_child; 1504*b9238976Sth continue; 1505*b9238976Sth } 1506*b9238976Sth 1507*b9238976Sth /* 1508*b9238976Sth * If we are the root of the branch we are removing, 1509*b9238976Sth * we end it here. But if the branch is the root of 1510*b9238976Sth * the tree, we have to forge on. We do not consider 1511*b9238976Sth * the peer list for the root because while it may 1512*b9238976Sth * be okay to remove, it is both extra work and a 1513*b9238976Sth * potential for a false-positive error to stall the 1514*b9238976Sth * unmount attempt. 1515*b9238976Sth */ 1516*b9238976Sth if (e == eph && isTreeRoot == FALSE) 1517*b9238976Sth return (0); 1518*b9238976Sth 1519*b9238976Sth /* 1520*b9238976Sth * Next we walk down the peer list. 1521*b9238976Sth */ 1522*b9238976Sth if (e->ne_peer) { 1523*b9238976Sth prior = e; 1524*b9238976Sth e = e->ne_peer; 1525*b9238976Sth continue; 1526*b9238976Sth } 1527*b9238976Sth 1528*b9238976Sth /* 1529*b9238976Sth * We can only remove the node passed in by the 1530*b9238976Sth * caller if it is the root of the ephemeral tree. 1531*b9238976Sth * Otherwise, the caller will remove it. 1532*b9238976Sth */ 1533*b9238976Sth if (e == eph && isTreeRoot == FALSE) 1534*b9238976Sth return (0); 1535*b9238976Sth 1536*b9238976Sth /* 1537*b9238976Sth * Okay, we have a leaf node, time 1538*b9238976Sth * to prune it! 1539*b9238976Sth * 1540*b9238976Sth * Note that prior can only be NULL if 1541*b9238976Sth * and only if it is the root of the 1542*b9238976Sth * ephemeral tree. 1543*b9238976Sth */ 1544*b9238976Sth prior = e->ne_prior; 1545*b9238976Sth 1546*b9238976Sth mi = e->ne_mount; 1547*b9238976Sth mutex_enter(&mi->mi_lock); 1548*b9238976Sth vfsp = mi->mi_vfsp; 1549*b9238976Sth 1550*b9238976Sth /* 1551*b9238976Sth * Cleared by umount2_engine. 1552*b9238976Sth */ 1553*b9238976Sth VFS_HOLD(vfsp); 1554*b9238976Sth 1555*b9238976Sth /* 1556*b9238976Sth * Inform nfs4_unmount to not recursively 1557*b9238976Sth * descend into this node's children when it 1558*b9238976Sth * gets processed. 1559*b9238976Sth */ 1560*b9238976Sth mi->mi_flags |= MI4_EPHEMERAL_RECURSED; 1561*b9238976Sth mutex_exit(&mi->mi_lock); 1562*b9238976Sth 1563*b9238976Sth error = umount2_engine(vfsp, flag, cr, FALSE); 1564*b9238976Sth if (error) { 1565*b9238976Sth /* 1566*b9238976Sth * We need to reenable nfs4_unmount's ability 1567*b9238976Sth * to recursively descend on this node. 1568*b9238976Sth */ 1569*b9238976Sth mutex_enter(&mi->mi_lock); 1570*b9238976Sth mi->mi_flags &= ~MI4_EPHEMERAL_RECURSED; 1571*b9238976Sth mutex_exit(&mi->mi_lock); 1572*b9238976Sth 1573*b9238976Sth return (error); 1574*b9238976Sth } 1575*b9238976Sth 1576*b9238976Sth /* 1577*b9238976Sth * If we are the current node, we do not want to 1578*b9238976Sth * touch anything else. At this point, the only 1579*b9238976Sth * way the current node can have survived to here 1580*b9238976Sth * is if it is the root of the ephemeral tree and 1581*b9238976Sth * we are unmounting the enclosing mntinfo4. 1582*b9238976Sth */ 1583*b9238976Sth if (e == eph) { 1584*b9238976Sth ASSERT(prior == NULL); 1585*b9238976Sth return (0); 1586*b9238976Sth } 1587*b9238976Sth 1588*b9238976Sth /* 1589*b9238976Sth * Stitch up the prior node. Note that since 1590*b9238976Sth * we have handled the root of the tree, prior 1591*b9238976Sth * must be non-NULL. 1592*b9238976Sth */ 1593*b9238976Sth ASSERT(prior != NULL); 1594*b9238976Sth if (prior->ne_child == e) { 1595*b9238976Sth prior->ne_child = NULL; 1596*b9238976Sth } else { 1597*b9238976Sth ASSERT(prior->ne_peer == e); 1598*b9238976Sth 1599*b9238976Sth prior->ne_peer = NULL; 1600*b9238976Sth } 1601*b9238976Sth 1602*b9238976Sth e = prior; 1603*b9238976Sth } 1604*b9238976Sth 1605*b9238976Sth /* NOTREACHED */ 1606*b9238976Sth } 1607*b9238976Sth 1608*b9238976Sth /* 1609*b9238976Sth * Common code to safely release net_cnt_lock and net_tree_lock 1610*b9238976Sth */ 1611*b9238976Sth void 1612*b9238976Sth nfs4_ephemeral_umount_unlock(bool_t *pmust_unlock, 1613*b9238976Sth nfs4_ephemeral_tree_t **pnet) 1614*b9238976Sth { 1615*b9238976Sth nfs4_ephemeral_tree_t *net = *pnet; 1616*b9238976Sth 1617*b9238976Sth if (*pmust_unlock) { 1618*b9238976Sth mutex_enter(&net->net_cnt_lock); 1619*b9238976Sth net->net_refcnt--; 1620*b9238976Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_UMOUNTING; 1621*b9238976Sth mutex_exit(&net->net_cnt_lock); 1622*b9238976Sth 1623*b9238976Sth mutex_exit(&net->net_tree_lock); 1624*b9238976Sth 1625*b9238976Sth *pmust_unlock = FALSE; 1626*b9238976Sth } 1627*b9238976Sth } 1628*b9238976Sth 1629*b9238976Sth /* 1630*b9238976Sth * While we may have removed any child or sibling nodes of this 1631*b9238976Sth * ephemeral node, we can not nuke it until we know that there 1632*b9238976Sth * were no actived vnodes on it. This will do that final 1633*b9238976Sth * work once we know it is not busy. 1634*b9238976Sth */ 1635*b9238976Sth void 1636*b9238976Sth nfs4_ephemeral_umount_activate(mntinfo4_t *mi, bool_t *pmust_unlock, 1637*b9238976Sth nfs4_ephemeral_tree_t **pnet) 1638*b9238976Sth { 1639*b9238976Sth /* 1640*b9238976Sth * Now we need to get rid of the ephemeral data if it exists. 1641*b9238976Sth */ 1642*b9238976Sth mutex_enter(&mi->mi_lock); 1643*b9238976Sth if (mi->mi_ephemeral) { 1644*b9238976Sth /* 1645*b9238976Sth * If we are the root node of an ephemeral branch 1646*b9238976Sth * which is being removed, then we need to fixup 1647*b9238976Sth * pointers into and out of the node. 1648*b9238976Sth */ 1649*b9238976Sth if (!(mi->mi_flags & MI4_EPHEMERAL_RECURSED)) 1650*b9238976Sth nfs4_ephemeral_umount_cleanup(mi->mi_ephemeral); 1651*b9238976Sth 1652*b9238976Sth ASSERT(mi->mi_ephemeral != NULL); 1653*b9238976Sth 1654*b9238976Sth kmem_free(mi->mi_ephemeral, sizeof (*mi->mi_ephemeral)); 1655*b9238976Sth mi->mi_ephemeral = NULL; 1656*b9238976Sth } 1657*b9238976Sth mutex_exit(&mi->mi_lock); 1658*b9238976Sth 1659*b9238976Sth nfs4_ephemeral_umount_unlock(pmust_unlock, pnet); 1660*b9238976Sth } 1661*b9238976Sth 1662*b9238976Sth /* 1663*b9238976Sth * Unmount an ephemeral node. 1664*b9238976Sth */ 1665*b9238976Sth int 1666*b9238976Sth nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr, 1667*b9238976Sth bool_t *pmust_unlock, nfs4_ephemeral_tree_t **pnet) 1668*b9238976Sth { 1669*b9238976Sth int error = 0; 1670*b9238976Sth nfs4_ephemeral_t *eph; 1671*b9238976Sth nfs4_ephemeral_tree_t *net; 1672*b9238976Sth int is_derooting = FALSE; 1673*b9238976Sth int is_recursed = FALSE; 1674*b9238976Sth int was_locked = FALSE; 1675*b9238976Sth 1676*b9238976Sth /* 1677*b9238976Sth * The active vnodes on this file system may be ephemeral 1678*b9238976Sth * children. We need to check for and try to unmount them 1679*b9238976Sth * here. If any can not be unmounted, we are going 1680*b9238976Sth * to return EBUSY. 1681*b9238976Sth */ 1682*b9238976Sth mutex_enter(&mi->mi_lock); 1683*b9238976Sth 1684*b9238976Sth /* 1685*b9238976Sth * If an ephemeral tree, we need to check to see if 1686*b9238976Sth * the lock is already held. If it is, then we need 1687*b9238976Sth * to see if we are being called as a result of 1688*b9238976Sth * the recursive removal of some node of the tree or 1689*b9238976Sth * if we are another attempt to remove the tree. 1690*b9238976Sth * 1691*b9238976Sth * mi_flags & MI4_EPHEMERAL indicates an ephemeral 1692*b9238976Sth * node. mi_ephemeral being non-NULL also does this. 1693*b9238976Sth * 1694*b9238976Sth * mi_ephemeral_tree being non-NULL is sufficient 1695*b9238976Sth * to also indicate either it is an ephemeral node 1696*b9238976Sth * or the enclosing mntinfo4. 1697*b9238976Sth * 1698*b9238976Sth * Do we need MI4_EPHEMERAL? Yes, it is useful for 1699*b9238976Sth * when we delete the ephemeral node and need to 1700*b9238976Sth * differentiate from an ephemeral node and the 1701*b9238976Sth * enclosing root node. 1702*b9238976Sth */ 1703*b9238976Sth *pnet = net = mi->mi_ephemeral_tree; 1704*b9238976Sth eph = mi->mi_ephemeral; 1705*b9238976Sth if (net) { 1706*b9238976Sth is_recursed = mi->mi_flags & MI4_EPHEMERAL_RECURSED; 1707*b9238976Sth is_derooting = (eph == NULL); 1708*b9238976Sth mutex_exit(&mi->mi_lock); 1709*b9238976Sth 1710*b9238976Sth /* 1711*b9238976Sth * If this is not recursion, then we need to 1712*b9238976Sth * grab a ref count. 1713*b9238976Sth * 1714*b9238976Sth * But wait, we also do not want to do that 1715*b9238976Sth * if a harvester thread has already grabbed 1716*b9238976Sth * the lock. 1717*b9238976Sth */ 1718*b9238976Sth if (!is_recursed) { 1719*b9238976Sth mutex_enter(&net->net_cnt_lock); 1720*b9238976Sth if (net->net_status & 1721*b9238976Sth NFS4_EPHEMERAL_TREE_LOCKED) 1722*b9238976Sth was_locked = TRUE; 1723*b9238976Sth else 1724*b9238976Sth net->net_refcnt++; 1725*b9238976Sth mutex_exit(&net->net_cnt_lock); 1726*b9238976Sth } 1727*b9238976Sth 1728*b9238976Sth /* 1729*b9238976Sth * If we grab the lock, it means that no other 1730*b9238976Sth * operation is working on the tree. If we don't 1731*b9238976Sth * grab it, we need to decide if this is because 1732*b9238976Sth * we are a recursive call or a new operation. 1733*b9238976Sth * 1734*b9238976Sth * If we are a recursive call, we proceed without 1735*b9238976Sth * the lock. 1736*b9238976Sth * 1737*b9238976Sth * Else we have to wait until the lock becomes free. 1738*b9238976Sth */ 1739*b9238976Sth if (was_locked == FALSE && 1740*b9238976Sth !mutex_tryenter(&net->net_tree_lock)) { 1741*b9238976Sth if (!is_recursed) { 1742*b9238976Sth mutex_enter(&net->net_cnt_lock); 1743*b9238976Sth if (net->net_status & 1744*b9238976Sth (NFS4_EPHEMERAL_TREE_DEROOTING 1745*b9238976Sth | NFS4_EPHEMERAL_TREE_INVALID)) { 1746*b9238976Sth net->net_refcnt--; 1747*b9238976Sth mutex_exit(&net->net_cnt_lock); 1748*b9238976Sth goto is_busy; 1749*b9238976Sth } 1750*b9238976Sth mutex_exit(&net->net_cnt_lock); 1751*b9238976Sth 1752*b9238976Sth /* 1753*b9238976Sth * We can't hold any other locks whilst 1754*b9238976Sth * we wait on this to free up. 1755*b9238976Sth */ 1756*b9238976Sth mutex_enter(&net->net_tree_lock); 1757*b9238976Sth 1758*b9238976Sth /* 1759*b9238976Sth * Note that while mi->mi_ephemeral 1760*b9238976Sth * may change and thus we have to 1761*b9238976Sth * update eph, it is the case that 1762*b9238976Sth * we have tied down net and 1763*b9238976Sth * do not care if mi->mi_ephemeral_tree 1764*b9238976Sth * has changed. 1765*b9238976Sth */ 1766*b9238976Sth mutex_enter(&mi->mi_lock); 1767*b9238976Sth eph = mi->mi_ephemeral; 1768*b9238976Sth mutex_exit(&mi->mi_lock); 1769*b9238976Sth 1770*b9238976Sth /* 1771*b9238976Sth * Okay, we need to see if either the 1772*b9238976Sth * tree got nuked or the current node 1773*b9238976Sth * got nuked. Both of which will cause 1774*b9238976Sth * an error. 1775*b9238976Sth * 1776*b9238976Sth * Note that a subsequent retry of the 1777*b9238976Sth * umount shall work. 1778*b9238976Sth */ 1779*b9238976Sth mutex_enter(&net->net_cnt_lock); 1780*b9238976Sth if (net->net_status & 1781*b9238976Sth NFS4_EPHEMERAL_TREE_INVALID || 1782*b9238976Sth (!is_derooting && eph == NULL)) { 1783*b9238976Sth net->net_refcnt--; 1784*b9238976Sth mutex_exit(&net->net_cnt_lock); 1785*b9238976Sth mutex_exit(&net->net_tree_lock); 1786*b9238976Sth goto is_busy; 1787*b9238976Sth } 1788*b9238976Sth mutex_exit(&net->net_cnt_lock); 1789*b9238976Sth *pmust_unlock = TRUE; 1790*b9238976Sth } 1791*b9238976Sth } else if (was_locked == FALSE) { 1792*b9238976Sth /* 1793*b9238976Sth * If we grab it right away, everything must 1794*b9238976Sth * be great! 1795*b9238976Sth */ 1796*b9238976Sth *pmust_unlock = TRUE; 1797*b9238976Sth } 1798*b9238976Sth 1799*b9238976Sth /* 1800*b9238976Sth * Only once we have grabbed the lock can we mark what we 1801*b9238976Sth * are planning on doing to the ephemeral tree. 1802*b9238976Sth */ 1803*b9238976Sth if (*pmust_unlock) { 1804*b9238976Sth mutex_enter(&net->net_cnt_lock); 1805*b9238976Sth net->net_status |= NFS4_EPHEMERAL_TREE_UMOUNTING; 1806*b9238976Sth 1807*b9238976Sth /* 1808*b9238976Sth * Check to see if we are nuking the root. 1809*b9238976Sth */ 1810*b9238976Sth if (is_derooting) 1811*b9238976Sth net->net_status |= 1812*b9238976Sth NFS4_EPHEMERAL_TREE_DEROOTING; 1813*b9238976Sth mutex_exit(&net->net_cnt_lock); 1814*b9238976Sth } 1815*b9238976Sth 1816*b9238976Sth if (!is_derooting) { 1817*b9238976Sth /* 1818*b9238976Sth * Only work on children if the caller has not already 1819*b9238976Sth * done so. 1820*b9238976Sth */ 1821*b9238976Sth if (!is_recursed) { 1822*b9238976Sth ASSERT(eph != NULL); 1823*b9238976Sth 1824*b9238976Sth error = nfs4_ephemeral_unmount_engine(eph, 1825*b9238976Sth FALSE, flag, cr); 1826*b9238976Sth if (error) 1827*b9238976Sth goto is_busy; 1828*b9238976Sth } 1829*b9238976Sth } else { 1830*b9238976Sth eph = net->net_root; 1831*b9238976Sth 1832*b9238976Sth /* 1833*b9238976Sth * Only work if there is something there. 1834*b9238976Sth */ 1835*b9238976Sth if (eph) { 1836*b9238976Sth error = nfs4_ephemeral_unmount_engine(eph, TRUE, 1837*b9238976Sth flag, cr); 1838*b9238976Sth if (error) { 1839*b9238976Sth mutex_enter(&net->net_cnt_lock); 1840*b9238976Sth net->net_status &= 1841*b9238976Sth ~NFS4_EPHEMERAL_TREE_DEROOTING; 1842*b9238976Sth mutex_exit(&net->net_cnt_lock); 1843*b9238976Sth goto is_busy; 1844*b9238976Sth } 1845*b9238976Sth 1846*b9238976Sth /* 1847*b9238976Sth * Nothing else which goes wrong will 1848*b9238976Sth * invalidate the blowing away of the 1849*b9238976Sth * ephmeral tree. 1850*b9238976Sth */ 1851*b9238976Sth net->net_root = NULL; 1852*b9238976Sth } 1853*b9238976Sth 1854*b9238976Sth /* 1855*b9238976Sth * We have derooted and we have caused the tree to be 1856*b9238976Sth * invalid. 1857*b9238976Sth */ 1858*b9238976Sth mutex_enter(&net->net_cnt_lock); 1859*b9238976Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_DEROOTING; 1860*b9238976Sth net->net_status |= NFS4_EPHEMERAL_TREE_INVALID; 1861*b9238976Sth net->net_refcnt--; 1862*b9238976Sth mutex_exit(&net->net_cnt_lock); 1863*b9238976Sth 1864*b9238976Sth /* 1865*b9238976Sth * At this point, the tree should no 1866*b9238976Sth * longer be associated with the 1867*b9238976Sth * mntinfo4. We need to pull it off 1868*b9238976Sth * there and let the harvester take 1869*b9238976Sth * care of it once the refcnt drops. 1870*b9238976Sth */ 1871*b9238976Sth mutex_enter(&mi->mi_lock); 1872*b9238976Sth mi->mi_ephemeral_tree = NULL; 1873*b9238976Sth mutex_exit(&mi->mi_lock); 1874*b9238976Sth } 1875*b9238976Sth } else { 1876*b9238976Sth mutex_exit(&mi->mi_lock); 1877*b9238976Sth } 1878*b9238976Sth 1879*b9238976Sth return (0); 1880*b9238976Sth 1881*b9238976Sth is_busy: 1882*b9238976Sth 1883*b9238976Sth nfs4_ephemeral_umount_unlock(pmust_unlock, pnet); 1884*b9238976Sth 1885*b9238976Sth return (error); 1886*b9238976Sth } 1887*b9238976Sth 1888*b9238976Sth /* 1889*b9238976Sth * Do the umount and record any error in the parent. 1890*b9238976Sth */ 1891*b9238976Sth static void 1892*b9238976Sth nfs4_ephemeral_record_umount(vfs_t *vfsp, int flag, 1893*b9238976Sth nfs4_ephemeral_t *e, nfs4_ephemeral_t *prior) 1894*b9238976Sth { 1895*b9238976Sth int error; 1896*b9238976Sth 1897*b9238976Sth error = umount2_engine(vfsp, flag, kcred, FALSE); 1898*b9238976Sth if (error) { 1899*b9238976Sth if (prior) { 1900*b9238976Sth if (prior->ne_child == e) 1901*b9238976Sth prior->ne_state |= 1902*b9238976Sth NFS4_EPHEMERAL_CHILD_ERROR; 1903*b9238976Sth else 1904*b9238976Sth prior->ne_state |= 1905*b9238976Sth NFS4_EPHEMERAL_PEER_ERROR; 1906*b9238976Sth } 1907*b9238976Sth } 1908*b9238976Sth } 1909*b9238976Sth 1910*b9238976Sth /* 1911*b9238976Sth * For each tree in the forest (where the forest is in 1912*b9238976Sth * effect all of the ephemeral trees for this zone), 1913*b9238976Sth * scan to see if a node can be unmounted. Note that 1914*b9238976Sth * unlike nfs4_ephemeral_unmount_engine(), we do 1915*b9238976Sth * not process the current node before children or 1916*b9238976Sth * siblings. I.e., if a node can be unmounted, we 1917*b9238976Sth * do not recursively check to see if the nodes 1918*b9238976Sth * hanging off of it can also be unmounted. 1919*b9238976Sth * 1920*b9238976Sth * Instead, we delve down deep to try and remove the 1921*b9238976Sth * children first. Then, because we share code with 1922*b9238976Sth * nfs4_ephemeral_unmount_engine(), we will try 1923*b9238976Sth * them again. This could be a performance issue in 1924*b9238976Sth * the future. 1925*b9238976Sth * 1926*b9238976Sth * Also note that unlike nfs4_ephemeral_unmount_engine(), 1927*b9238976Sth * we do not halt on an error. We will not remove the 1928*b9238976Sth * current node, but we will keep on trying to remove 1929*b9238976Sth * the others. 1930*b9238976Sth * 1931*b9238976Sth * force indicates that we want the unmount to occur 1932*b9238976Sth * even if there is something blocking it. 1933*b9238976Sth * 1934*b9238976Sth * time_check indicates that we want to see if the 1935*b9238976Sth * mount has expired past mount_to or not. Typically 1936*b9238976Sth * we want to do this and only on a shutdown of the 1937*b9238976Sth * zone would we want to ignore the check. 1938*b9238976Sth */ 1939*b9238976Sth static void 1940*b9238976Sth nfs4_ephemeral_harvest_forest(nfs4_trigger_globals_t *ntg, 1941*b9238976Sth bool_t force, bool_t time_check) 1942*b9238976Sth { 1943*b9238976Sth nfs4_ephemeral_tree_t *net; 1944*b9238976Sth nfs4_ephemeral_tree_t *prev = NULL; 1945*b9238976Sth nfs4_ephemeral_tree_t *next; 1946*b9238976Sth nfs4_ephemeral_t *e; 1947*b9238976Sth nfs4_ephemeral_t *prior; 1948*b9238976Sth time_t now = gethrestime_sec(); 1949*b9238976Sth 1950*b9238976Sth nfs4_ephemeral_tree_t *harvest = NULL; 1951*b9238976Sth 1952*b9238976Sth int flag; 1953*b9238976Sth 1954*b9238976Sth mntinfo4_t *mi; 1955*b9238976Sth vfs_t *vfsp; 1956*b9238976Sth 1957*b9238976Sth if (force) 1958*b9238976Sth flag = MS_FORCE; 1959*b9238976Sth else 1960*b9238976Sth flag = 0; 1961*b9238976Sth 1962*b9238976Sth mutex_enter(&ntg->ntg_forest_lock); 1963*b9238976Sth for (net = ntg->ntg_forest; net != NULL; net = next) { 1964*b9238976Sth next = net->net_next; 1965*b9238976Sth 1966*b9238976Sth mutex_enter(&net->net_cnt_lock); 1967*b9238976Sth net->net_refcnt++; 1968*b9238976Sth mutex_exit(&net->net_cnt_lock); 1969*b9238976Sth 1970*b9238976Sth mutex_enter(&net->net_tree_lock); 1971*b9238976Sth 1972*b9238976Sth /* 1973*b9238976Sth * Let the unmount code know that the 1974*b9238976Sth * tree is already locked! 1975*b9238976Sth */ 1976*b9238976Sth mutex_enter(&net->net_cnt_lock); 1977*b9238976Sth net->net_status |= NFS4_EPHEMERAL_TREE_LOCKED; 1978*b9238976Sth mutex_exit(&net->net_cnt_lock); 1979*b9238976Sth 1980*b9238976Sth /* 1981*b9238976Sth * If the intent is force all ephemeral nodes to 1982*b9238976Sth * be unmounted in this zone, we can short circuit a 1983*b9238976Sth * lot of tree traversal and simply zap the root node. 1984*b9238976Sth */ 1985*b9238976Sth if (force) { 1986*b9238976Sth if (net->net_root) { 1987*b9238976Sth mi = net->net_root->ne_mount; 1988*b9238976Sth vfsp = mi->mi_vfsp; 1989*b9238976Sth 1990*b9238976Sth /* 1991*b9238976Sth * Cleared by umount2_engine. 1992*b9238976Sth */ 1993*b9238976Sth VFS_HOLD(vfsp); 1994*b9238976Sth 1995*b9238976Sth (void) umount2_engine(vfsp, flag, 1996*b9238976Sth kcred, FALSE); 1997*b9238976Sth 1998*b9238976Sth goto check_done; 1999*b9238976Sth } 2000*b9238976Sth } 2001*b9238976Sth 2002*b9238976Sth e = net->net_root; 2003*b9238976Sth if (e) 2004*b9238976Sth e->ne_state = NFS4_EPHEMERAL_VISIT_CHILD; 2005*b9238976Sth 2006*b9238976Sth while (e) { 2007*b9238976Sth if (e->ne_state == NFS4_EPHEMERAL_VISIT_CHILD) { 2008*b9238976Sth e->ne_state = NFS4_EPHEMERAL_VISIT_SIBLING; 2009*b9238976Sth if (e->ne_child) { 2010*b9238976Sth e = e->ne_child; 2011*b9238976Sth e->ne_state = 2012*b9238976Sth NFS4_EPHEMERAL_VISIT_CHILD; 2013*b9238976Sth } 2014*b9238976Sth 2015*b9238976Sth continue; 2016*b9238976Sth } else if (e->ne_state == 2017*b9238976Sth NFS4_EPHEMERAL_VISIT_SIBLING) { 2018*b9238976Sth e->ne_state = NFS4_EPHEMERAL_PROCESS_ME; 2019*b9238976Sth if (e->ne_peer) { 2020*b9238976Sth e = e->ne_peer; 2021*b9238976Sth e->ne_state = 2022*b9238976Sth NFS4_EPHEMERAL_VISIT_CHILD; 2023*b9238976Sth } 2024*b9238976Sth 2025*b9238976Sth continue; 2026*b9238976Sth } else if (e->ne_state == 2027*b9238976Sth NFS4_EPHEMERAL_CHILD_ERROR) { 2028*b9238976Sth prior = e->ne_prior; 2029*b9238976Sth 2030*b9238976Sth /* 2031*b9238976Sth * If a child reported an error, do 2032*b9238976Sth * not bother trying to unmount. 2033*b9238976Sth * 2034*b9238976Sth * If your prior node is a parent, 2035*b9238976Sth * pass the error up such that they 2036*b9238976Sth * also do not try to unmount. 2037*b9238976Sth * 2038*b9238976Sth * However, if your prior is a sibling, 2039*b9238976Sth * let them try to unmount if they can. 2040*b9238976Sth */ 2041*b9238976Sth if (prior) { 2042*b9238976Sth if (prior->ne_child == e) 2043*b9238976Sth prior->ne_state |= 2044*b9238976Sth NFS4_EPHEMERAL_CHILD_ERROR; 2045*b9238976Sth else 2046*b9238976Sth prior->ne_state |= 2047*b9238976Sth NFS4_EPHEMERAL_PEER_ERROR; 2048*b9238976Sth } 2049*b9238976Sth 2050*b9238976Sth /* 2051*b9238976Sth * Clear the error and if needed, process peers. 2052*b9238976Sth * 2053*b9238976Sth * Once we mask out the error, we know whether 2054*b9238976Sth * or we have to process another node. 2055*b9238976Sth */ 2056*b9238976Sth e->ne_state &= ~NFS4_EPHEMERAL_CHILD_ERROR; 2057*b9238976Sth if (e->ne_state == NFS4_EPHEMERAL_PROCESS_ME) 2058*b9238976Sth e = prior; 2059*b9238976Sth 2060*b9238976Sth continue; 2061*b9238976Sth } else if (e->ne_state == 2062*b9238976Sth NFS4_EPHEMERAL_PEER_ERROR) { 2063*b9238976Sth prior = e->ne_prior; 2064*b9238976Sth 2065*b9238976Sth if (prior) { 2066*b9238976Sth if (prior->ne_child == e) 2067*b9238976Sth prior->ne_state = 2068*b9238976Sth NFS4_EPHEMERAL_CHILD_ERROR; 2069*b9238976Sth else 2070*b9238976Sth prior->ne_state = 2071*b9238976Sth NFS4_EPHEMERAL_PEER_ERROR; 2072*b9238976Sth } 2073*b9238976Sth 2074*b9238976Sth /* 2075*b9238976Sth * Clear the error from this node and do the 2076*b9238976Sth * correct processing. 2077*b9238976Sth */ 2078*b9238976Sth e->ne_state &= ~NFS4_EPHEMERAL_PEER_ERROR; 2079*b9238976Sth continue; 2080*b9238976Sth } 2081*b9238976Sth 2082*b9238976Sth prior = e->ne_prior; 2083*b9238976Sth e->ne_state = NFS4_EPHEMERAL_OK; 2084*b9238976Sth 2085*b9238976Sth /* 2086*b9238976Sth * It must be the case that we need to process 2087*b9238976Sth * this node. 2088*b9238976Sth */ 2089*b9238976Sth if (!time_check || 2090*b9238976Sth now - e->ne_ref_time > e->ne_mount_to) { 2091*b9238976Sth mi = e->ne_mount; 2092*b9238976Sth vfsp = mi->mi_vfsp; 2093*b9238976Sth 2094*b9238976Sth /* 2095*b9238976Sth * Cleared by umount2_engine. 2096*b9238976Sth */ 2097*b9238976Sth VFS_HOLD(vfsp); 2098*b9238976Sth 2099*b9238976Sth /* 2100*b9238976Sth * Note that we effectively work down to the 2101*b9238976Sth * leaf nodes first, try to unmount them, 2102*b9238976Sth * then work our way back up into the leaf 2103*b9238976Sth * nodes. 2104*b9238976Sth * 2105*b9238976Sth * Also note that we deal with a lot of 2106*b9238976Sth * complexity by sharing the work with 2107*b9238976Sth * the manual unmount code. 2108*b9238976Sth */ 2109*b9238976Sth nfs4_ephemeral_record_umount(vfsp, flag, 2110*b9238976Sth e, prior); 2111*b9238976Sth } 2112*b9238976Sth 2113*b9238976Sth e = prior; 2114*b9238976Sth } 2115*b9238976Sth 2116*b9238976Sth check_done: 2117*b9238976Sth 2118*b9238976Sth /* 2119*b9238976Sth * Are we done with this tree? 2120*b9238976Sth */ 2121*b9238976Sth mutex_enter(&net->net_cnt_lock); 2122*b9238976Sth if (net->net_refcnt == 1 && 2123*b9238976Sth net->net_status & NFS4_EPHEMERAL_TREE_INVALID) { 2124*b9238976Sth net->net_refcnt--; 2125*b9238976Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED; 2126*b9238976Sth mutex_exit(&net->net_cnt_lock); 2127*b9238976Sth mutex_exit(&net->net_tree_lock); 2128*b9238976Sth 2129*b9238976Sth if (prev) 2130*b9238976Sth prev->net_next = net->net_next; 2131*b9238976Sth else 2132*b9238976Sth ntg->ntg_forest = net->net_next; 2133*b9238976Sth 2134*b9238976Sth net->net_next = harvest; 2135*b9238976Sth harvest = net; 2136*b9238976Sth continue; 2137*b9238976Sth } 2138*b9238976Sth 2139*b9238976Sth net->net_refcnt--; 2140*b9238976Sth net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED; 2141*b9238976Sth mutex_exit(&net->net_cnt_lock); 2142*b9238976Sth mutex_exit(&net->net_tree_lock); 2143*b9238976Sth 2144*b9238976Sth prev = net; 2145*b9238976Sth } 2146*b9238976Sth mutex_exit(&ntg->ntg_forest_lock); 2147*b9238976Sth 2148*b9238976Sth for (net = harvest; net != NULL; net = next) { 2149*b9238976Sth next = net->net_next; 2150*b9238976Sth 2151*b9238976Sth mutex_destroy(&net->net_tree_lock); 2152*b9238976Sth mutex_destroy(&net->net_cnt_lock); 2153*b9238976Sth kmem_free(net, sizeof (*net)); 2154*b9238976Sth } 2155*b9238976Sth } 2156*b9238976Sth 2157*b9238976Sth /* 2158*b9238976Sth * This is the thread which decides when the harvesting 2159*b9238976Sth * can proceed and when to kill it off for this zone. 2160*b9238976Sth */ 2161*b9238976Sth static void 2162*b9238976Sth nfs4_ephemeral_harvester(nfs4_trigger_globals_t *ntg) 2163*b9238976Sth { 2164*b9238976Sth clock_t timeleft; 2165*b9238976Sth zone_t *zone = curproc->p_zone; 2166*b9238976Sth 2167*b9238976Sth for (;;) { 2168*b9238976Sth timeleft = zone_status_timedwait(zone, lbolt + 2169*b9238976Sth nfs4_trigger_thread_timer * hz, ZONE_IS_SHUTTING_DOWN); 2170*b9238976Sth 2171*b9238976Sth /* 2172*b9238976Sth * zone is exiting... 2173*b9238976Sth */ 2174*b9238976Sth if (timeleft != -1) { 2175*b9238976Sth ASSERT(zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN); 2176*b9238976Sth zthread_exit(); 2177*b9238976Sth /* NOTREACHED */ 2178*b9238976Sth } 2179*b9238976Sth 2180*b9238976Sth /* 2181*b9238976Sth * Only bother scanning if there is potential 2182*b9238976Sth * work to be done. 2183*b9238976Sth */ 2184*b9238976Sth if (ntg->ntg_forest == NULL) 2185*b9238976Sth continue; 2186*b9238976Sth 2187*b9238976Sth /* 2188*b9238976Sth * Now scan the list and get rid of everything which 2189*b9238976Sth * is old. 2190*b9238976Sth */ 2191*b9238976Sth nfs4_ephemeral_harvest_forest(ntg, FALSE, TRUE); 2192*b9238976Sth } 2193*b9238976Sth 2194*b9238976Sth /* NOTREACHED */ 2195*b9238976Sth } 2196*b9238976Sth 2197*b9238976Sth /* 2198*b9238976Sth * The zone specific glue needed to start the unmount harvester. 2199*b9238976Sth * 2200*b9238976Sth * Note that we want to avoid holding the mutex as long as possible, 2201*b9238976Sth * hence the multiple checks. 2202*b9238976Sth * 2203*b9238976Sth * The caller should avoid us getting down here in the first 2204*b9238976Sth * place. 2205*b9238976Sth */ 2206*b9238976Sth static void 2207*b9238976Sth nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *ntg) 2208*b9238976Sth { 2209*b9238976Sth /* 2210*b9238976Sth * It got started before we got here... 2211*b9238976Sth */ 2212*b9238976Sth if (ntg->ntg_thread_started) 2213*b9238976Sth return; 2214*b9238976Sth 2215*b9238976Sth mutex_enter(&nfs4_ephemeral_thread_lock); 2216*b9238976Sth 2217*b9238976Sth if (ntg->ntg_thread_started) { 2218*b9238976Sth mutex_exit(&nfs4_ephemeral_thread_lock); 2219*b9238976Sth return; 2220*b9238976Sth } 2221*b9238976Sth 2222*b9238976Sth /* 2223*b9238976Sth * Start the unmounter harvester thread for this zone. 2224*b9238976Sth */ 2225*b9238976Sth (void) zthread_create(NULL, 0, nfs4_ephemeral_harvester, 2226*b9238976Sth ntg, 0, minclsyspri); 2227*b9238976Sth 2228*b9238976Sth ntg->ntg_thread_started = TRUE; 2229*b9238976Sth mutex_exit(&nfs4_ephemeral_thread_lock); 2230*b9238976Sth } 2231*b9238976Sth 2232*b9238976Sth /*ARGSUSED*/ 2233*b9238976Sth static void * 2234*b9238976Sth nfs4_ephemeral_zsd_create(zoneid_t zoneid) 2235*b9238976Sth { 2236*b9238976Sth nfs4_trigger_globals_t *ntg; 2237*b9238976Sth 2238*b9238976Sth ntg = kmem_zalloc(sizeof (*ntg), KM_SLEEP); 2239*b9238976Sth ntg->ntg_thread_started = FALSE; 2240*b9238976Sth 2241*b9238976Sth /* 2242*b9238976Sth * This is the default.... 2243*b9238976Sth */ 2244*b9238976Sth ntg->ntg_mount_to = nfs4_trigger_thread_timer; 2245*b9238976Sth 2246*b9238976Sth mutex_init(&ntg->ntg_forest_lock, NULL, 2247*b9238976Sth MUTEX_DEFAULT, NULL); 2248*b9238976Sth 2249*b9238976Sth return (ntg); 2250*b9238976Sth } 2251*b9238976Sth 2252*b9238976Sth /* 2253*b9238976Sth * Try a nice gentle walk down the forest and convince 2254*b9238976Sth * all of the trees to gracefully give it up. 2255*b9238976Sth */ 2256*b9238976Sth /*ARGSUSED*/ 2257*b9238976Sth static void 2258*b9238976Sth nfs4_ephemeral_zsd_shutdown(zoneid_t zoneid, void *arg) 2259*b9238976Sth { 2260*b9238976Sth nfs4_trigger_globals_t *ntg = arg; 2261*b9238976Sth 2262*b9238976Sth if (!ntg) 2263*b9238976Sth return; 2264*b9238976Sth 2265*b9238976Sth nfs4_ephemeral_harvest_forest(ntg, FALSE, FALSE); 2266*b9238976Sth } 2267*b9238976Sth 2268*b9238976Sth /* 2269*b9238976Sth * Race along the forest and rip all of the trees out by 2270*b9238976Sth * their rootballs! 2271*b9238976Sth */ 2272*b9238976Sth /*ARGSUSED*/ 2273*b9238976Sth static void 2274*b9238976Sth nfs4_ephemeral_zsd_destroy(zoneid_t zoneid, void *arg) 2275*b9238976Sth { 2276*b9238976Sth nfs4_trigger_globals_t *ntg = arg; 2277*b9238976Sth 2278*b9238976Sth if (!ntg) 2279*b9238976Sth return; 2280*b9238976Sth 2281*b9238976Sth nfs4_ephemeral_harvest_forest(ntg, TRUE, FALSE); 2282*b9238976Sth 2283*b9238976Sth mutex_destroy(&ntg->ntg_forest_lock); 2284*b9238976Sth kmem_free(ntg, sizeof (*ntg)); 2285*b9238976Sth } 2286*b9238976Sth 2287*b9238976Sth /* 2288*b9238976Sth * This is the zone independent cleanup needed for 2289*b9238976Sth * emphemeral mount processing. 2290*b9238976Sth */ 2291*b9238976Sth void 2292*b9238976Sth nfs4_ephemeral_fini(void) 2293*b9238976Sth { 2294*b9238976Sth (void) zone_key_delete(nfs4_ephemeral_key); 2295*b9238976Sth mutex_destroy(&nfs4_ephemeral_thread_lock); 2296*b9238976Sth } 2297*b9238976Sth 2298*b9238976Sth /* 2299*b9238976Sth * This is the zone independent initialization needed for 2300*b9238976Sth * emphemeral mount processing. 2301*b9238976Sth */ 2302*b9238976Sth void 2303*b9238976Sth nfs4_ephemeral_init(void) 2304*b9238976Sth { 2305*b9238976Sth mutex_init(&nfs4_ephemeral_thread_lock, NULL, MUTEX_DEFAULT, 2306*b9238976Sth NULL); 2307*b9238976Sth 2308*b9238976Sth zone_key_create(&nfs4_ephemeral_key, nfs4_ephemeral_zsd_create, 2309*b9238976Sth nfs4_ephemeral_zsd_shutdown, nfs4_ephemeral_zsd_destroy); 2310*b9238976Sth } 2311*b9238976Sth 2312*b9238976Sth /* 2313*b9238976Sth * nfssys() calls this function to set the per-zone 2314*b9238976Sth * value of mount_to to drive when an ephemeral mount is 2315*b9238976Sth * timed out. Each mount will grab a copy of this value 2316*b9238976Sth * when mounted. 2317*b9238976Sth */ 2318*b9238976Sth void 2319*b9238976Sth nfs4_ephemeral_set_mount_to(uint_t mount_to) 2320*b9238976Sth { 2321*b9238976Sth nfs4_trigger_globals_t *ntg; 2322*b9238976Sth zone_t *zone = curproc->p_zone; 2323*b9238976Sth 2324*b9238976Sth ntg = zone_getspecific(nfs4_ephemeral_key, zone); 2325*b9238976Sth 2326*b9238976Sth ntg->ntg_mount_to = mount_to; 2327*b9238976Sth } 2328*b9238976Sth 2329*b9238976Sth /* 2330*b9238976Sth * Walk the list of v4 mount options; if they are currently set in vfsp, 2331*b9238976Sth * append them to a new comma-separated mount option string, and return it. 2332*b9238976Sth * 2333*b9238976Sth * Caller should free by calling nfs4_trigger_destroy_mntopts(). 2334*b9238976Sth */ 2335*b9238976Sth static char * 2336*b9238976Sth nfs4_trigger_create_mntopts(vfs_t *vfsp) 2337*b9238976Sth { 2338*b9238976Sth uint_t i; 2339*b9238976Sth char *mntopts; 2340*b9238976Sth struct vfssw *vswp; 2341*b9238976Sth mntopts_t *optproto; 2342*b9238976Sth 2343*b9238976Sth mntopts = kmem_zalloc(MAX_MNTOPT_STR, KM_SLEEP); 2344*b9238976Sth 2345*b9238976Sth /* get the list of applicable mount options for v4; locks *vswp */ 2346*b9238976Sth vswp = vfs_getvfssw(MNTTYPE_NFS4); 2347*b9238976Sth optproto = &vswp->vsw_optproto; 2348*b9238976Sth 2349*b9238976Sth for (i = 0; i < optproto->mo_count; i++) { 2350*b9238976Sth struct mntopt *mop = &optproto->mo_list[i]; 2351*b9238976Sth 2352*b9238976Sth if (mop->mo_flags & MO_EMPTY) 2353*b9238976Sth continue; 2354*b9238976Sth 2355*b9238976Sth if (nfs4_trigger_add_mntopt(mntopts, mop->mo_name, vfsp)) { 2356*b9238976Sth kmem_free(mntopts, MAX_MNTOPT_STR); 2357*b9238976Sth vfs_unrefvfssw(vswp); 2358*b9238976Sth return (NULL); 2359*b9238976Sth } 2360*b9238976Sth } 2361*b9238976Sth 2362*b9238976Sth vfs_unrefvfssw(vswp); 2363*b9238976Sth 2364*b9238976Sth /* 2365*b9238976Sth * MNTOPT_XATTR is not in the v4 mount opt proto list, 2366*b9238976Sth * and it may only be passed via MS_OPTIONSTR, so we 2367*b9238976Sth * must handle it here. 2368*b9238976Sth * 2369*b9238976Sth * Ideally, it would be in the list, but NFS does not specify its 2370*b9238976Sth * own opt proto list, it uses instead the default one. Since 2371*b9238976Sth * not all filesystems support extended attrs, it would not be 2372*b9238976Sth * appropriate to add it there. 2373*b9238976Sth */ 2374*b9238976Sth if (nfs4_trigger_add_mntopt(mntopts, MNTOPT_XATTR, vfsp) || 2375*b9238976Sth nfs4_trigger_add_mntopt(mntopts, MNTOPT_NOXATTR, vfsp)) { 2376*b9238976Sth kmem_free(mntopts, MAX_MNTOPT_STR); 2377*b9238976Sth return (NULL); 2378*b9238976Sth } 2379*b9238976Sth 2380*b9238976Sth return (mntopts); 2381*b9238976Sth } 2382*b9238976Sth 2383*b9238976Sth static void 2384*b9238976Sth nfs4_trigger_destroy_mntopts(char *mntopts) 2385*b9238976Sth { 2386*b9238976Sth if (mntopts) 2387*b9238976Sth kmem_free(mntopts, MAX_MNTOPT_STR); 2388*b9238976Sth } 2389*b9238976Sth 2390*b9238976Sth /* 2391*b9238976Sth * Check a single mount option (optname). Add to mntopts if it is set in VFS. 2392*b9238976Sth */ 2393*b9238976Sth static int 2394*b9238976Sth nfs4_trigger_add_mntopt(char *mntopts, char *optname, vfs_t *vfsp) 2395*b9238976Sth { 2396*b9238976Sth if (mntopts == NULL || optname == NULL || vfsp == NULL) 2397*b9238976Sth return (EINVAL); 2398*b9238976Sth 2399*b9238976Sth if (vfs_optionisset(vfsp, optname, NULL)) { 2400*b9238976Sth size_t mntoptslen = strlen(mntopts); 2401*b9238976Sth size_t optnamelen = strlen(optname); 2402*b9238976Sth 2403*b9238976Sth /* +1 for ',', +1 for NUL */ 2404*b9238976Sth if (mntoptslen + optnamelen + 2 > MAX_MNTOPT_STR) 2405*b9238976Sth return (EOVERFLOW); 2406*b9238976Sth 2407*b9238976Sth /* first or subsequent mount option? */ 2408*b9238976Sth if (*mntopts != '\0') 2409*b9238976Sth (void) strcat(mntopts, ","); 2410*b9238976Sth 2411*b9238976Sth (void) strcat(mntopts, optname); 2412*b9238976Sth } 2413*b9238976Sth 2414*b9238976Sth return (0); 2415*b9238976Sth } 2416*b9238976Sth 2417*b9238976Sth static enum clnt_stat 2418*b9238976Sth nfs4_trigger_ping_server(servinfo4_t *svp, int nointr) 2419*b9238976Sth { 2420*b9238976Sth int retries, error; 2421*b9238976Sth uint_t max_msgsize; 2422*b9238976Sth enum clnt_stat status; 2423*b9238976Sth CLIENT *cl; 2424*b9238976Sth struct timeval timeout; 2425*b9238976Sth 2426*b9238976Sth /* as per recov_newserver() */ 2427*b9238976Sth max_msgsize = 0; 2428*b9238976Sth retries = 1; 2429*b9238976Sth timeout.tv_sec = 2; 2430*b9238976Sth timeout.tv_usec = 0; 2431*b9238976Sth 2432*b9238976Sth error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, NFS_PROGRAM, 2433*b9238976Sth NFS_V4, max_msgsize, retries, CRED(), &cl); 2434*b9238976Sth if (error) 2435*b9238976Sth return (RPC_FAILED); 2436*b9238976Sth 2437*b9238976Sth if (nointr) 2438*b9238976Sth cl->cl_nosignal = TRUE; 2439*b9238976Sth status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL, xdr_void, NULL, 2440*b9238976Sth timeout); 2441*b9238976Sth if (nointr) 2442*b9238976Sth cl->cl_nosignal = FALSE; 2443*b9238976Sth 2444*b9238976Sth AUTH_DESTROY(cl->cl_auth); 2445*b9238976Sth CLNT_DESTROY(cl); 2446*b9238976Sth 2447*b9238976Sth return (status); 2448*b9238976Sth } 2449