1b9238976Sth /*
2b9238976Sth  * CDDL HEADER START
3b9238976Sth  *
4b9238976Sth  * The contents of this file are subject to the terms of the
5b9238976Sth  * Common Development and Distribution License (the "License").
6b9238976Sth  * You may not use this file except in compliance with the License.
7b9238976Sth  *
8b9238976Sth  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9b9238976Sth  * or http://www.opensolaris.org/os/licensing.
10b9238976Sth  * See the License for the specific language governing permissions
11b9238976Sth  * and limitations under the License.
12b9238976Sth  *
13b9238976Sth  * When distributing Covered Code, include this CDDL HEADER in each
14b9238976Sth  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15b9238976Sth  * If applicable, add the following below this CDDL HEADER, with the
16b9238976Sth  * fields enclosed by brackets "[]" replaced with your own identifying
17b9238976Sth  * information: Portions Copyright [yyyy] [name of copyright owner]
18b9238976Sth  *
19b9238976Sth  * CDDL HEADER END
20b9238976Sth  */
21b9238976Sth 
22b9238976Sth /*
23546a3997SThomas Haynes  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24b9238976Sth  * Use is subject to license terms.
25b9238976Sth  */
26b9238976Sth 
27b9238976Sth /*
28b9238976Sth  * Support for ephemeral mounts, e.g. mirror-mounts. These mounts are
29b9238976Sth  * triggered from a "stub" rnode via a special set of vnodeops.
30b9238976Sth  */
31b9238976Sth 
32b9238976Sth #include <sys/param.h>
33b9238976Sth #include <sys/types.h>
34b9238976Sth #include <sys/systm.h>
35b9238976Sth #include <sys/cred.h>
36b9238976Sth #include <sys/time.h>
37b9238976Sth #include <sys/vnode.h>
38b9238976Sth #include <sys/vfs.h>
39b9238976Sth #include <sys/vfs_opreg.h>
40b9238976Sth #include <sys/file.h>
41b9238976Sth #include <sys/filio.h>
42b9238976Sth #include <sys/uio.h>
43b9238976Sth #include <sys/buf.h>
44b9238976Sth #include <sys/mman.h>
45b9238976Sth #include <sys/pathname.h>
46b9238976Sth #include <sys/dirent.h>
47b9238976Sth #include <sys/debug.h>
48b9238976Sth #include <sys/vmsystm.h>
49b9238976Sth #include <sys/fcntl.h>
50b9238976Sth #include <sys/flock.h>
51b9238976Sth #include <sys/swap.h>
52b9238976Sth #include <sys/errno.h>
53b9238976Sth #include <sys/strsubr.h>
54b9238976Sth #include <sys/sysmacros.h>
55b9238976Sth #include <sys/kmem.h>
56b9238976Sth #include <sys/mount.h>
57b9238976Sth #include <sys/cmn_err.h>
58b9238976Sth #include <sys/pathconf.h>
59b9238976Sth #include <sys/utsname.h>
60b9238976Sth #include <sys/dnlc.h>
61b9238976Sth #include <sys/acl.h>
62b9238976Sth #include <sys/systeminfo.h>
63b9238976Sth #include <sys/policy.h>
64b9238976Sth #include <sys/sdt.h>
65b9238976Sth #include <sys/list.h>
66b9238976Sth #include <sys/stat.h>
67b9238976Sth #include <sys/mntent.h>
68b9238976Sth 
69b9238976Sth #include <rpc/types.h>
70b9238976Sth #include <rpc/auth.h>
71b9238976Sth #include <rpc/clnt.h>
72b9238976Sth 
73b9238976Sth #include <nfs/nfs.h>
74b9238976Sth #include <nfs/nfs_clnt.h>
75b9238976Sth #include <nfs/nfs_acl.h>
76b9238976Sth #include <nfs/lm.h>
77b9238976Sth #include <nfs/nfs4.h>
78b9238976Sth #include <nfs/nfs4_kprot.h>
79b9238976Sth #include <nfs/rnode4.h>
80b9238976Sth #include <nfs/nfs4_clnt.h>
81b9238976Sth 
82b9238976Sth #include <vm/hat.h>
83b9238976Sth #include <vm/as.h>
84b9238976Sth #include <vm/page.h>
85b9238976Sth #include <vm/pvn.h>
86b9238976Sth #include <vm/seg.h>
87b9238976Sth #include <vm/seg_map.h>
88b9238976Sth #include <vm/seg_kpm.h>
89b9238976Sth #include <vm/seg_vn.h>
90b9238976Sth 
91b9238976Sth #include <fs/fs_subr.h>
92b9238976Sth 
93b9238976Sth #include <sys/ddi.h>
94b9238976Sth #include <sys/int_fmtio.h>
95b9238976Sth 
96f39b8789Sth #include <sys/sunddi.h>
97b9238976Sth 
98546a3997SThomas Haynes #include <sys/priv_names.h>
99546a3997SThomas Haynes 
100b9238976Sth /*
101b9238976Sth  * The automatic unmounter thread stuff!
102b9238976Sth  */
103b9238976Sth static int nfs4_trigger_thread_timer = 20;	/* in seconds */
104b9238976Sth 
105b9238976Sth /*
106b9238976Sth  * Just a default....
107b9238976Sth  */
108b9238976Sth static uint_t nfs4_trigger_mount_to = 240;
109b9238976Sth 
110b9238976Sth typedef struct nfs4_trigger_globals {
111b9238976Sth 	kmutex_t		ntg_forest_lock;
112b9238976Sth 	uint_t			ntg_mount_to;
113b9238976Sth 	int			ntg_thread_started;
114b9238976Sth 	nfs4_ephemeral_tree_t	*ntg_forest;
115b9238976Sth } nfs4_trigger_globals_t;
116b9238976Sth 
117b9238976Sth kmutex_t	nfs4_ephemeral_thread_lock;
118b9238976Sth 
119b9238976Sth zone_key_t	nfs4_ephemeral_key = ZONE_KEY_UNINITIALIZED;
120b9238976Sth 
121b9238976Sth static void	nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *);
122b9238976Sth 
123b9238976Sth /*
124b9238976Sth  * Used for ephemeral mounts; contains data either duplicated from
125b9238976Sth  * servinfo4_t, or hand-crafted, depending on type of ephemeral mount.
126b9238976Sth  *
127b9238976Sth  * It's intended that this structure is used solely for ephemeral
128b9238976Sth  * mount-type specific data, for passing this data to
129b9238976Sth  * nfs4_trigger_nargs_create().
130b9238976Sth  */
131b9238976Sth typedef struct ephemeral_servinfo {
132b9238976Sth 	char			*esi_hostname;
133b9238976Sth 	char			*esi_netname;
134b9238976Sth 	char			*esi_path;
135b9238976Sth 	int			esi_path_len;
136b9238976Sth 	int			esi_mount_flags;
137b9238976Sth 	struct netbuf		*esi_addr;
138b9238976Sth 	struct netbuf		*esi_syncaddr;
139b9238976Sth 	struct knetconfig	*esi_knconf;
140b9238976Sth } ephemeral_servinfo_t;
141b9238976Sth 
142b9238976Sth /*
143b9238976Sth  * Collect together the mount-type specific and generic data args.
144b9238976Sth  */
145b9238976Sth typedef struct domount_args {
146b9238976Sth 	ephemeral_servinfo_t	*dma_esi;
147b9238976Sth 	char			*dma_hostlist; /* comma-sep. for RO failover */
148b9238976Sth 	struct nfs_args		*dma_nargs;
149b9238976Sth } domount_args_t;
150b9238976Sth 
151b9238976Sth 
152b9238976Sth /*
153b9238976Sth  * The vnode ops functions for a trigger stub vnode
154b9238976Sth  */
155da6c28aaSamw static int nfs4_trigger_open(vnode_t **, int, cred_t *, caller_context_t *);
156da6c28aaSamw static int nfs4_trigger_getattr(vnode_t *, struct vattr *, int, cred_t *,
157da6c28aaSamw     caller_context_t *);
158da6c28aaSamw static int nfs4_trigger_setattr(vnode_t *, struct vattr *, int, cred_t *,
159da6c28aaSamw     caller_context_t *);
160da6c28aaSamw static int nfs4_trigger_access(vnode_t *, int, int, cred_t *,
161da6c28aaSamw     caller_context_t *);
162da6c28aaSamw static int nfs4_trigger_readlink(vnode_t *, struct uio *, cred_t *,
163da6c28aaSamw     caller_context_t *);
164da6c28aaSamw static int nfs4_trigger_lookup(vnode_t *, char *, vnode_t **,
165da6c28aaSamw     struct pathname *, int, vnode_t *, cred_t *, caller_context_t *,
166da6c28aaSamw     int *, pathname_t *);
167da6c28aaSamw static int nfs4_trigger_create(vnode_t *, char *, struct vattr *,
168da6c28aaSamw     enum vcexcl, int, vnode_t **, cred_t *, int, caller_context_t *,
169da6c28aaSamw     vsecattr_t *);
170da6c28aaSamw static int nfs4_trigger_remove(vnode_t *, char *, cred_t *, caller_context_t *,
171da6c28aaSamw     int);
172da6c28aaSamw static int nfs4_trigger_link(vnode_t *, vnode_t *, char *, cred_t *,
173da6c28aaSamw     caller_context_t *, int);
174da6c28aaSamw static int nfs4_trigger_rename(vnode_t *, char *, vnode_t *, char *,
175da6c28aaSamw     cred_t *, caller_context_t *, int);
176da6c28aaSamw static int nfs4_trigger_mkdir(vnode_t *, char *, struct vattr *,
177da6c28aaSamw     vnode_t **, cred_t *, caller_context_t *, int, vsecattr_t *vsecp);
178da6c28aaSamw static int nfs4_trigger_rmdir(vnode_t *, char *, vnode_t *, cred_t *,
179da6c28aaSamw     caller_context_t *, int);
180da6c28aaSamw static int nfs4_trigger_symlink(vnode_t *, char *, struct vattr *, char *,
181da6c28aaSamw     cred_t *, caller_context_t *, int);
182da6c28aaSamw static int nfs4_trigger_cmp(vnode_t *, vnode_t *, caller_context_t *);
183b9238976Sth 
184b9238976Sth /*
185b9238976Sth  * Regular NFSv4 vnodeops that we need to reference directly
186b9238976Sth  */
187da6c28aaSamw extern int	nfs4_getattr(vnode_t *, struct vattr *, int, cred_t *,
188da6c28aaSamw 		    caller_context_t *);
189da6c28aaSamw extern void	nfs4_inactive(vnode_t *, cred_t *, caller_context_t *);
190b9238976Sth extern int	nfs4_rwlock(vnode_t *, int, caller_context_t *);
191b9238976Sth extern void	nfs4_rwunlock(vnode_t *, int, caller_context_t *);
192b9238976Sth extern int	nfs4_lookup(vnode_t *, char *, vnode_t **,
193da6c28aaSamw 		    struct pathname *, int, vnode_t *, cred_t *,
194da6c28aaSamw 		    caller_context_t *, int *, pathname_t *);
195da6c28aaSamw extern int	nfs4_pathconf(vnode_t *, int, ulong_t *, cred_t *,
196da6c28aaSamw 		    caller_context_t *);
197da6c28aaSamw extern int	nfs4_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
198da6c28aaSamw 		    caller_context_t *);
199da6c28aaSamw extern int	nfs4_fid(vnode_t *, fid_t *, caller_context_t *);
200da6c28aaSamw extern int	nfs4_realvp(vnode_t *, vnode_t **, caller_context_t *);
201b9238976Sth 
202546a3997SThomas Haynes static int	nfs4_trigger_mount(vnode_t *, cred_t *, vnode_t **);
203b9238976Sth static int	nfs4_trigger_domount(vnode_t *, domount_args_t *, vfs_t **,
204*6962f5b8SThomas Haynes     cred_t *, vnode_t **);
205b9238976Sth static domount_args_t  *nfs4_trigger_domount_args_create(vnode_t *);
206b9238976Sth static void	nfs4_trigger_domount_args_destroy(domount_args_t *dma,
207b9238976Sth     vnode_t *vp);
208b9238976Sth static ephemeral_servinfo_t *nfs4_trigger_esi_create(vnode_t *, servinfo4_t *);
209b9238976Sth static void	nfs4_trigger_esi_destroy(ephemeral_servinfo_t *, vnode_t *);
210b9238976Sth static ephemeral_servinfo_t *nfs4_trigger_esi_create_mirrormount(vnode_t *,
211b9238976Sth     servinfo4_t *);
212b9238976Sth static struct nfs_args 	*nfs4_trigger_nargs_create(mntinfo4_t *, servinfo4_t *,
213b9238976Sth     ephemeral_servinfo_t *);
214b9238976Sth static void	nfs4_trigger_nargs_destroy(struct nfs_args *);
215b9238976Sth static char	*nfs4_trigger_create_mntopts(vfs_t *);
216b9238976Sth static void	nfs4_trigger_destroy_mntopts(char *);
217b9238976Sth static int 	nfs4_trigger_add_mntopt(char *, char *, vfs_t *);
218b9238976Sth static enum clnt_stat nfs4_trigger_ping_server(servinfo4_t *, int);
219b9238976Sth 
220b9238976Sth extern int	umount2_engine(vfs_t *, int, cred_t *, int);
221b9238976Sth 
222b9238976Sth 
223b9238976Sth vnodeops_t *nfs4_trigger_vnodeops;
224b9238976Sth 
225b9238976Sth /*
226b9238976Sth  * These are the vnodeops that we must define for stub vnodes.
227b9238976Sth  *
228b9238976Sth  *
229b9238976Sth  * Many of the VOPs defined for NFSv4 do not need to be defined here,
230b9238976Sth  * for various reasons. This will result in the VFS default function being
231b9238976Sth  * used:
232b9238976Sth  *
233b9238976Sth  * - These VOPs require a previous VOP_OPEN to have occurred. That will have
234b9238976Sth  *   lost the reference to the stub vnode, meaning these should not be called:
235b9238976Sth  *       close, read, write, ioctl, readdir, seek.
236b9238976Sth  *
237b9238976Sth  * - These VOPs are meaningless for vnodes without data pages. Since the
238b9238976Sth  *   stub vnode is of type VDIR, these should not be called:
239b9238976Sth  *       space, getpage, putpage, map, addmap, delmap, pageio, fsync.
240b9238976Sth  *
241b9238976Sth  * - These VOPs are otherwise not applicable, and should not be called:
242b9238976Sth  *       dump, setsecattr.
243b9238976Sth  *
244b9238976Sth  *
245b9238976Sth  * These VOPs we do not want to define, but nor do we want the VFS default
246b9238976Sth  * action. Instead, we specify the VFS error function, with fs_error(), but
247b9238976Sth  * note that fs_error() is not actually called. Instead it results in the
248b9238976Sth  * use of the error function defined for the particular VOP, in vn_ops_table[]:
249b9238976Sth  *
250b9238976Sth  * -   frlock, dispose, shrlock.
251b9238976Sth  *
252b9238976Sth  *
253b9238976Sth  * These VOPs we define to use the corresponding regular NFSv4 vnodeop.
254b9238976Sth  * NOTE: if any of these ops involve an OTW call with the stub FH, then
255b9238976Sth  * that call must be wrapped with save_mnt_secinfo()/check_mnt_secinfo()
256b9238976Sth  * to protect the security data in the servinfo4_t for the "parent"
257b9238976Sth  * filesystem that contains the stub.
258b9238976Sth  *
259b9238976Sth  * - These VOPs should not trigger a mount, so that "ls -l" does not:
260b9238976Sth  *       pathconf, getsecattr.
261b9238976Sth  *
262b9238976Sth  * - These VOPs would not make sense to trigger:
263b9238976Sth  *       inactive, rwlock, rwunlock, fid, realvp.
264b9238976Sth  */
265b9238976Sth const fs_operation_def_t nfs4_trigger_vnodeops_template[] = {
266b9238976Sth 	VOPNAME_OPEN,		{ .vop_open = nfs4_trigger_open },
267b9238976Sth 	VOPNAME_GETATTR,	{ .vop_getattr = nfs4_trigger_getattr },
268b9238976Sth 	VOPNAME_SETATTR,	{ .vop_setattr = nfs4_trigger_setattr },
269b9238976Sth 	VOPNAME_ACCESS,		{ .vop_access = nfs4_trigger_access },
270b9238976Sth 	VOPNAME_LOOKUP,		{ .vop_lookup = nfs4_trigger_lookup },
271b9238976Sth 	VOPNAME_CREATE,		{ .vop_create = nfs4_trigger_create },
272b9238976Sth 	VOPNAME_REMOVE,		{ .vop_remove = nfs4_trigger_remove },
273b9238976Sth 	VOPNAME_LINK,		{ .vop_link = nfs4_trigger_link },
274b9238976Sth 	VOPNAME_RENAME,		{ .vop_rename = nfs4_trigger_rename },
275b9238976Sth 	VOPNAME_MKDIR,		{ .vop_mkdir = nfs4_trigger_mkdir },
276b9238976Sth 	VOPNAME_RMDIR,		{ .vop_rmdir = nfs4_trigger_rmdir },
277b9238976Sth 	VOPNAME_SYMLINK,	{ .vop_symlink = nfs4_trigger_symlink },
278b9238976Sth 	VOPNAME_READLINK,	{ .vop_readlink = nfs4_trigger_readlink },
279b9238976Sth 	VOPNAME_INACTIVE, 	{ .vop_inactive = nfs4_inactive },
280b9238976Sth 	VOPNAME_FID,		{ .vop_fid = nfs4_fid },
281b9238976Sth 	VOPNAME_RWLOCK,		{ .vop_rwlock = nfs4_rwlock },
282b9238976Sth 	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = nfs4_rwunlock },
283b9238976Sth 	VOPNAME_REALVP,		{ .vop_realvp = nfs4_realvp },
284b9238976Sth 	VOPNAME_GETSECATTR,	{ .vop_getsecattr = nfs4_getsecattr },
285b9238976Sth 	VOPNAME_PATHCONF,	{ .vop_pathconf = nfs4_pathconf },
286b9238976Sth 	VOPNAME_FRLOCK,		{ .error = fs_error },
287b9238976Sth 	VOPNAME_DISPOSE,	{ .error = fs_error },
288b9238976Sth 	VOPNAME_SHRLOCK,	{ .error = fs_error },
289b9238976Sth 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
290b9238976Sth 	NULL, NULL
291b9238976Sth };
292b9238976Sth 
293d3a14591SThomas Haynes static void
294d708af74SThomas Haynes nfs4_ephemeral_tree_incr(nfs4_ephemeral_tree_t *net)
295d3a14591SThomas Haynes {
296d708af74SThomas Haynes 	ASSERT(mutex_owned(&net->net_cnt_lock));
297d3a14591SThomas Haynes 	net->net_refcnt++;
298d3a14591SThomas Haynes 	ASSERT(net->net_refcnt != 0);
299d708af74SThomas Haynes }
300d708af74SThomas Haynes 
301d708af74SThomas Haynes static void
302d708af74SThomas Haynes nfs4_ephemeral_tree_hold(nfs4_ephemeral_tree_t *net)
303d708af74SThomas Haynes {
304d708af74SThomas Haynes 	mutex_enter(&net->net_cnt_lock);
305d708af74SThomas Haynes 	nfs4_ephemeral_tree_incr(net);
306d3a14591SThomas Haynes 	mutex_exit(&net->net_cnt_lock);
307d3a14591SThomas Haynes }
308d3a14591SThomas Haynes 
309d3a14591SThomas Haynes /*
310d3a14591SThomas Haynes  * We need a safe way to decrement the refcnt whilst the
311d3a14591SThomas Haynes  * lock is being held.
312d3a14591SThomas Haynes  */
313d3a14591SThomas Haynes static void
314d3a14591SThomas Haynes nfs4_ephemeral_tree_decr(nfs4_ephemeral_tree_t *net)
315d3a14591SThomas Haynes {
316d3a14591SThomas Haynes 	ASSERT(mutex_owned(&net->net_cnt_lock));
317d3a14591SThomas Haynes 	ASSERT(net->net_refcnt != 0);
318d3a14591SThomas Haynes 	net->net_refcnt--;
319d3a14591SThomas Haynes }
320d3a14591SThomas Haynes 
321d3a14591SThomas Haynes static void
322d3a14591SThomas Haynes nfs4_ephemeral_tree_rele(nfs4_ephemeral_tree_t *net)
323d3a14591SThomas Haynes {
324d3a14591SThomas Haynes 	mutex_enter(&net->net_cnt_lock);
325d3a14591SThomas Haynes 	nfs4_ephemeral_tree_decr(net);
326d3a14591SThomas Haynes 	mutex_exit(&net->net_cnt_lock);
327d3a14591SThomas Haynes }
328d3a14591SThomas Haynes 
329b9238976Sth /*
330b9238976Sth  * Trigger ops for stub vnodes; for mirror mounts, etc.
331b9238976Sth  *
332b9238976Sth  * The general idea is that a "triggering" op will first call
333b9238976Sth  * nfs4_trigger_mount(), which will find out whether a mount has already
334b9238976Sth  * been triggered.
335b9238976Sth  *
336b9238976Sth  * If it has, then nfs4_trigger_mount() sets newvp to the root vnode
337b9238976Sth  * of the covering vfs.
338b9238976Sth  *
339b9238976Sth  * If a mount has not yet been triggered, nfs4_trigger_mount() will do so,
340b9238976Sth  * and again set newvp, as above.
341b9238976Sth  *
342b9238976Sth  * The triggering op may then re-issue the VOP by calling it on newvp.
343b9238976Sth  *
344b9238976Sth  * Note that some ops may perform custom action, and may or may not need
345b9238976Sth  * to trigger a mount.
346b9238976Sth  *
347b9238976Sth  * Some ops need to call the regular NFSv4 vnodeop for a stub vnode. We
348b9238976Sth  * obviously can't do this with VOP_<whatever>, since it's a stub vnode
349b9238976Sth  * and that would just recurse. Instead, we call the v4 op directly,
350b9238976Sth  * by name.  This is OK, since we know that the vnode is for NFSv4,
351b9238976Sth  * otherwise it couldn't be a stub.
352b9238976Sth  *
353b9238976Sth  */
354b9238976Sth 
355b9238976Sth static int
356da6c28aaSamw nfs4_trigger_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
357b9238976Sth {
358b9238976Sth 	int error;
359b9238976Sth 	vnode_t *newvp;
360b9238976Sth 
361546a3997SThomas Haynes 	error = nfs4_trigger_mount(*vpp, cr, &newvp);
362b9238976Sth 	if (error)
363b9238976Sth 		return (error);
364b9238976Sth 
365b9238976Sth 	/* Release the stub vnode, as we're losing the reference to it */
366b9238976Sth 	VN_RELE(*vpp);
367b9238976Sth 
368b9238976Sth 	/* Give the caller the root vnode of the newly-mounted fs */
369b9238976Sth 	*vpp = newvp;
370b9238976Sth 
371b9238976Sth 	/* return with VN_HELD(newvp) */
372da6c28aaSamw 	return (VOP_OPEN(vpp, flag, cr, ct));
373b9238976Sth }
374b9238976Sth 
375b9238976Sth /*
376b9238976Sth  * For the majority of cases, nfs4_trigger_getattr() will not trigger
377b9238976Sth  * a mount. However, if ATTR_TRIGGER is set, we are being informed
378b9238976Sth  * that we need to force the mount before we attempt to determine
379b9238976Sth  * the attributes. The intent is an atomic operation for security
380b9238976Sth  * testing.
381b9238976Sth  */
382b9238976Sth static int
383da6c28aaSamw nfs4_trigger_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
384da6c28aaSamw     caller_context_t *ct)
385b9238976Sth {
386b9238976Sth 	int error;
387b9238976Sth 
388b9238976Sth 	if (flags & ATTR_TRIGGER) {
389b9238976Sth 		vnode_t	*newvp;
390b9238976Sth 
391546a3997SThomas Haynes 		error = nfs4_trigger_mount(vp, cr, &newvp);
392b9238976Sth 		if (error)
393b9238976Sth 			return (error);
394b9238976Sth 
395da6c28aaSamw 		error = VOP_GETATTR(newvp, vap, flags, cr, ct);
396b9238976Sth 		VN_RELE(newvp);
397b9238976Sth 	} else {
398da6c28aaSamw 		error = nfs4_getattr(vp, vap, flags, cr, ct);
399b9238976Sth 	}
400b9238976Sth 
401b9238976Sth 	return (error);
402b9238976Sth }
403b9238976Sth 
404b9238976Sth static int
405b9238976Sth nfs4_trigger_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
406b9238976Sth 		caller_context_t *ct)
407b9238976Sth {
408b9238976Sth 	int error;
409b9238976Sth 	vnode_t *newvp;
410b9238976Sth 
411546a3997SThomas Haynes 	error = nfs4_trigger_mount(vp, cr, &newvp);
412b9238976Sth 	if (error)
413b9238976Sth 		return (error);
414b9238976Sth 
415b9238976Sth 	error = VOP_SETATTR(newvp, vap, flags, cr, ct);
416b9238976Sth 	VN_RELE(newvp);
417b9238976Sth 
418b9238976Sth 	return (error);
419b9238976Sth }
420b9238976Sth 
421b9238976Sth static int
422da6c28aaSamw nfs4_trigger_access(vnode_t *vp, int mode, int flags, cred_t *cr,
423da6c28aaSamw     caller_context_t *ct)
424b9238976Sth {
425b9238976Sth 	int error;
426b9238976Sth 	vnode_t *newvp;
427b9238976Sth 
428546a3997SThomas Haynes 	error = nfs4_trigger_mount(vp, cr, &newvp);
429b9238976Sth 	if (error)
430b9238976Sth 		return (error);
431b9238976Sth 
432da6c28aaSamw 	error = VOP_ACCESS(newvp, mode, flags, cr, ct);
433b9238976Sth 	VN_RELE(newvp);
434b9238976Sth 
435b9238976Sth 	return (error);
436b9238976Sth }
437b9238976Sth 
438b9238976Sth static int
439da6c28aaSamw nfs4_trigger_lookup(vnode_t *dvp, char *nm, vnode_t **vpp,
440da6c28aaSamw     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr,
441da6c28aaSamw     caller_context_t *ct, int *deflags, pathname_t *rpnp)
442b9238976Sth {
443b9238976Sth 	int error;
444b9238976Sth 	vnode_t *newdvp;
445b9238976Sth 	rnode4_t *drp = VTOR4(dvp);
446b9238976Sth 
447b9238976Sth 	ASSERT(RP_ISSTUB(drp));
448b9238976Sth 
449b9238976Sth 	/* for now, we only support mirror-mounts */
450b9238976Sth 	ASSERT(RP_ISSTUB_MIRRORMOUNT(drp));
451b9238976Sth 
452b9238976Sth 	/*
453b9238976Sth 	 * It's not legal to lookup ".." for an fs root, so we mustn't pass
454b9238976Sth 	 * that up. Instead, pass onto the regular op, regardless of whether
455b9238976Sth 	 * we've triggered a mount.
456b9238976Sth 	 */
457b9238976Sth 	if (strcmp(nm, "..") == 0)
458da6c28aaSamw 		return (nfs4_lookup(dvp, nm, vpp, pnp, flags, rdir, cr,
459da6c28aaSamw 		    ct, deflags, rpnp));
460b9238976Sth 
461546a3997SThomas Haynes 	error = nfs4_trigger_mount(dvp, cr, &newdvp);
462b9238976Sth 	if (error)
463b9238976Sth 		return (error);
464b9238976Sth 
465da6c28aaSamw 	error = VOP_LOOKUP(newdvp, nm, vpp, pnp, flags, rdir, cr, ct,
466da6c28aaSamw 	    deflags, rpnp);
467b9238976Sth 	VN_RELE(newdvp);
468b9238976Sth 
469b9238976Sth 	return (error);
470b9238976Sth }
471b9238976Sth 
472b9238976Sth static int
473b9238976Sth nfs4_trigger_create(vnode_t *dvp, char *nm, struct vattr *va,
474da6c28aaSamw     enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr,
475da6c28aaSamw     int flags, caller_context_t *ct, vsecattr_t *vsecp)
476b9238976Sth {
477b9238976Sth 	int error;
478b9238976Sth 	vnode_t *newdvp;
479b9238976Sth 
480546a3997SThomas Haynes 	error = nfs4_trigger_mount(dvp, cr, &newdvp);
481b9238976Sth 	if (error)
482b9238976Sth 		return (error);
483b9238976Sth 
484da6c28aaSamw 	error = VOP_CREATE(newdvp, nm, va, exclusive, mode, vpp, cr,
485da6c28aaSamw 	    flags, ct, vsecp);
486b9238976Sth 	VN_RELE(newdvp);
487b9238976Sth 
488b9238976Sth 	return (error);
489b9238976Sth }
490b9238976Sth 
491b9238976Sth static int
492da6c28aaSamw nfs4_trigger_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
493da6c28aaSamw     int flags)
494b9238976Sth {
495b9238976Sth 	int error;
496b9238976Sth 	vnode_t *newdvp;
497b9238976Sth 
498546a3997SThomas Haynes 	error = nfs4_trigger_mount(dvp, cr, &newdvp);
499b9238976Sth 	if (error)
500b9238976Sth 		return (error);
501b9238976Sth 
502da6c28aaSamw 	error = VOP_REMOVE(newdvp, nm, cr, ct, flags);
503b9238976Sth 	VN_RELE(newdvp);
504b9238976Sth 
505b9238976Sth 	return (error);
506b9238976Sth }
507b9238976Sth 
508b9238976Sth static int
509da6c28aaSamw nfs4_trigger_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr,
510da6c28aaSamw     caller_context_t *ct, int flags)
511b9238976Sth {
512b9238976Sth 	int error;
513b9238976Sth 	vnode_t *newtdvp;
514b9238976Sth 
515546a3997SThomas Haynes 	error = nfs4_trigger_mount(tdvp, cr, &newtdvp);
516b9238976Sth 	if (error)
517b9238976Sth 		return (error);
518b9238976Sth 
519b9238976Sth 	/*
520b9238976Sth 	 * We don't check whether svp is a stub. Let the NFSv4 code
521b9238976Sth 	 * detect that error, and return accordingly.
522b9238976Sth 	 */
523da6c28aaSamw 	error = VOP_LINK(newtdvp, svp, tnm, cr, ct, flags);
524b9238976Sth 	VN_RELE(newtdvp);
525b9238976Sth 
526b9238976Sth 	return (error);
527b9238976Sth }
528b9238976Sth 
529b9238976Sth static int
530b9238976Sth nfs4_trigger_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
531da6c28aaSamw     cred_t *cr, caller_context_t *ct, int flags)
532b9238976Sth {
533b9238976Sth 	int error;
534b9238976Sth 	vnode_t *newsdvp;
535b9238976Sth 	rnode4_t *tdrp = VTOR4(tdvp);
536b9238976Sth 
537b9238976Sth 	/*
538b9238976Sth 	 * We know that sdvp is a stub, otherwise we would not be here.
539b9238976Sth 	 *
540b9238976Sth 	 * If tdvp is also be a stub, there are two possibilities: it
541b9238976Sth 	 * is either the same stub as sdvp [i.e. VN_CMP(sdvp, tdvp)]
542b9238976Sth 	 * or it is a different stub [!VN_CMP(sdvp, tdvp)].
543b9238976Sth 	 *
544b9238976Sth 	 * In the former case, just trigger sdvp, and treat tdvp as
545b9238976Sth 	 * though it were not a stub.
546b9238976Sth 	 *
547b9238976Sth 	 * In the latter case, it might be a different stub for the
548b9238976Sth 	 * same server fs as sdvp, or for a different server fs.
549b9238976Sth 	 * Regardless, from the client perspective this would still
550b9238976Sth 	 * be a cross-filesystem rename, and should not be allowed,
551b9238976Sth 	 * so return EXDEV, without triggering either mount.
552b9238976Sth 	 */
553b9238976Sth 	if (RP_ISSTUB(tdrp) && !VN_CMP(sdvp, tdvp))
554b9238976Sth 		return (EXDEV);
555b9238976Sth 
556546a3997SThomas Haynes 	error = nfs4_trigger_mount(sdvp, cr, &newsdvp);
557b9238976Sth 	if (error)
558b9238976Sth 		return (error);
559b9238976Sth 
560da6c28aaSamw 	error = VOP_RENAME(newsdvp, snm, tdvp, tnm, cr, ct, flags);
561b9238976Sth 
562b9238976Sth 	VN_RELE(newsdvp);
563b9238976Sth 
564b9238976Sth 	return (error);
565b9238976Sth }
566b9238976Sth 
567da6c28aaSamw /* ARGSUSED */
568b9238976Sth static int
569b9238976Sth nfs4_trigger_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp,
570da6c28aaSamw     cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
571b9238976Sth {
572b9238976Sth 	int error;
573b9238976Sth 	vnode_t *newdvp;
574b9238976Sth 
575546a3997SThomas Haynes 	error = nfs4_trigger_mount(dvp, cr, &newdvp);
576b9238976Sth 	if (error)
577b9238976Sth 		return (error);
578b9238976Sth 
579da6c28aaSamw 	error = VOP_MKDIR(newdvp, nm, va, vpp, cr, ct, flags, vsecp);
580b9238976Sth 	VN_RELE(newdvp);
581b9238976Sth 
582b9238976Sth 	return (error);
583b9238976Sth }
584b9238976Sth 
585b9238976Sth static int
586da6c28aaSamw nfs4_trigger_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
587da6c28aaSamw     caller_context_t *ct, int flags)
588b9238976Sth {
589b9238976Sth 	int error;
590b9238976Sth 	vnode_t *newdvp;
591b9238976Sth 
592546a3997SThomas Haynes 	error = nfs4_trigger_mount(dvp, cr, &newdvp);
593b9238976Sth 	if (error)
594b9238976Sth 		return (error);
595b9238976Sth 
596da6c28aaSamw 	error = VOP_RMDIR(newdvp, nm, cdir, cr, ct, flags);
597b9238976Sth 	VN_RELE(newdvp);
598b9238976Sth 
599b9238976Sth 	return (error);
600b9238976Sth }
601b9238976Sth 
602b9238976Sth static int
603b9238976Sth nfs4_trigger_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm,
604da6c28aaSamw     cred_t *cr, caller_context_t *ct, int flags)
605b9238976Sth {
606b9238976Sth 	int error;
607b9238976Sth 	vnode_t *newdvp;
608b9238976Sth 
609546a3997SThomas Haynes 	error = nfs4_trigger_mount(dvp, cr, &newdvp);
610b9238976Sth 	if (error)
611b9238976Sth 		return (error);
612b9238976Sth 
613da6c28aaSamw 	error = VOP_SYMLINK(newdvp, lnm, tva, tnm, cr, ct, flags);
614b9238976Sth 	VN_RELE(newdvp);
615b9238976Sth 
616b9238976Sth 	return (error);
617b9238976Sth }
618b9238976Sth 
619b9238976Sth static int
620da6c28aaSamw nfs4_trigger_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr,
621da6c28aaSamw     caller_context_t *ct)
622b9238976Sth {
623b9238976Sth 	int error;
624b9238976Sth 	vnode_t *newvp;
625b9238976Sth 
626546a3997SThomas Haynes 	error = nfs4_trigger_mount(vp, cr, &newvp);
627b9238976Sth 	if (error)
628b9238976Sth 		return (error);
629b9238976Sth 
630da6c28aaSamw 	error = VOP_READLINK(newvp, uiop, cr, ct);
631b9238976Sth 	VN_RELE(newvp);
632b9238976Sth 
633b9238976Sth 	return (error);
634b9238976Sth }
635b9238976Sth 
636b9238976Sth /* end of trigger vnode ops */
637b9238976Sth 
638*6962f5b8SThomas Haynes /*
639*6962f5b8SThomas Haynes  * See if the mount has already been done by another caller.
640*6962f5b8SThomas Haynes  */
641*6962f5b8SThomas Haynes static int
642*6962f5b8SThomas Haynes nfs4_trigger_mounted_already(vnode_t *vp, vnode_t **newvpp,
643*6962f5b8SThomas Haynes     bool_t *was_mounted, vfs_t **vfsp)
644*6962f5b8SThomas Haynes {
645*6962f5b8SThomas Haynes 	int		error;
646*6962f5b8SThomas Haynes 	mntinfo4_t	*mi = VTOMI4(vp);
647*6962f5b8SThomas Haynes 
648*6962f5b8SThomas Haynes 	*was_mounted = FALSE;
649*6962f5b8SThomas Haynes 
650*6962f5b8SThomas Haynes 	error = vn_vfsrlock_wait(vp);
651*6962f5b8SThomas Haynes 	if (error)
652*6962f5b8SThomas Haynes 		return (error);
653*6962f5b8SThomas Haynes 
654*6962f5b8SThomas Haynes 	*vfsp = vn_mountedvfs(vp);
655*6962f5b8SThomas Haynes 	if (*vfsp != NULL) {
656*6962f5b8SThomas Haynes 		/* the mount has already occurred */
657*6962f5b8SThomas Haynes 		error = VFS_ROOT(*vfsp, newvpp);
658*6962f5b8SThomas Haynes 		if (!error) {
659*6962f5b8SThomas Haynes 			/* need to update the reference time  */
660*6962f5b8SThomas Haynes 			mutex_enter(&mi->mi_lock);
661*6962f5b8SThomas Haynes 			if (mi->mi_ephemeral)
662*6962f5b8SThomas Haynes 				mi->mi_ephemeral->ne_ref_time =
663*6962f5b8SThomas Haynes 				    gethrestime_sec();
664*6962f5b8SThomas Haynes 			mutex_exit(&mi->mi_lock);
665*6962f5b8SThomas Haynes 
666*6962f5b8SThomas Haynes 			*was_mounted = TRUE;
667*6962f5b8SThomas Haynes 		}
668*6962f5b8SThomas Haynes 	}
669*6962f5b8SThomas Haynes 
670*6962f5b8SThomas Haynes 	vn_vfsunlock(vp);
671*6962f5b8SThomas Haynes 	return (0);
672*6962f5b8SThomas Haynes }
673*6962f5b8SThomas Haynes 
674b9238976Sth /*
675b9238976Sth  * Mount upon a trigger vnode; for mirror-mounts, etc.
676b9238976Sth  *
677b9238976Sth  * The mount may have already occurred, via another thread. If not,
678b9238976Sth  * assemble the location information - which may require fetching - and
679b9238976Sth  * perform the mount.
680b9238976Sth  *
681b9238976Sth  * Sets newvp to be the root of the fs that is now covering vp. Note
682b9238976Sth  * that we return with VN_HELD(*newvp).
683b9238976Sth  *
684b9238976Sth  * The caller is responsible for passing the VOP onto the covering fs.
685b9238976Sth  */
686b9238976Sth static int
687546a3997SThomas Haynes nfs4_trigger_mount(vnode_t *vp, cred_t *cr, vnode_t **newvpp)
688b9238976Sth {
689b9238976Sth 	int			 error;
690b9238976Sth 	vfs_t			*vfsp;
691b9238976Sth 	rnode4_t		*rp = VTOR4(vp);
692b9238976Sth 	mntinfo4_t		*mi = VTOMI4(vp);
693b9238976Sth 	domount_args_t		*dma;
694b9238976Sth 
695b9238976Sth 	nfs4_ephemeral_tree_t	*net;
696b9238976Sth 
697b9238976Sth 	bool_t			must_unlock = FALSE;
698b9238976Sth 	bool_t			is_building = FALSE;
699*6962f5b8SThomas Haynes 	bool_t			was_mounted = FALSE;
700b9238976Sth 
701546a3997SThomas Haynes 	cred_t			*mcred = NULL;
702b9238976Sth 
703b9238976Sth 	nfs4_trigger_globals_t	*ntg;
704b9238976Sth 
705b9238976Sth 	zone_t			*zone = curproc->p_zone;
706b9238976Sth 
707b9238976Sth 	ASSERT(RP_ISSTUB(rp));
708b9238976Sth 
709b9238976Sth 	/* for now, we only support mirror-mounts */
710b9238976Sth 	ASSERT(RP_ISSTUB_MIRRORMOUNT(rp));
711b9238976Sth 
712b9238976Sth 	*newvpp = NULL;
713b9238976Sth 
714b9238976Sth 	/*
715b9238976Sth 	 * Has the mount already occurred?
716b9238976Sth 	 */
717*6962f5b8SThomas Haynes 	error = nfs4_trigger_mounted_already(vp, newvpp,
718*6962f5b8SThomas Haynes 	    &was_mounted, &vfsp);
719*6962f5b8SThomas Haynes 	if (error || was_mounted)
720b9238976Sth 		goto done;
721b9238976Sth 
722b9238976Sth 	ntg = zone_getspecific(nfs4_ephemeral_key, zone);
723b9238976Sth 	ASSERT(ntg != NULL);
724b9238976Sth 
725b9238976Sth 	mutex_enter(&mi->mi_lock);
726b9238976Sth 
727b9238976Sth 	/*
728b9238976Sth 	 * We need to lock down the ephemeral tree.
729b9238976Sth 	 */
730b9238976Sth 	if (mi->mi_ephemeral_tree == NULL) {
731b9238976Sth 		net = kmem_zalloc(sizeof (*net), KM_SLEEP);
732b9238976Sth 		mutex_init(&net->net_tree_lock, NULL, MUTEX_DEFAULT, NULL);
733b9238976Sth 		mutex_init(&net->net_cnt_lock, NULL, MUTEX_DEFAULT, NULL);
734b9238976Sth 		net->net_refcnt = 1;
735b9238976Sth 		net->net_status = NFS4_EPHEMERAL_TREE_BUILDING;
736b9238976Sth 		is_building = TRUE;
737b9238976Sth 
738b9238976Sth 		/*
739b9238976Sth 		 * We need to add it to the zone specific list for
740b9238976Sth 		 * automatic unmounting and harvesting of deadwood.
741b9238976Sth 		 */
742b9238976Sth 		mutex_enter(&ntg->ntg_forest_lock);
743b9238976Sth 		if (ntg->ntg_forest != NULL)
744b9238976Sth 			net->net_next = ntg->ntg_forest;
745b9238976Sth 		ntg->ntg_forest = net;
746b9238976Sth 		mutex_exit(&ntg->ntg_forest_lock);
747b9238976Sth 
748b9238976Sth 		/*
749b9238976Sth 		 * No lock order confusion with mi_lock because no
750b9238976Sth 		 * other node could have grabbed net_tree_lock.
751b9238976Sth 		 */
752b9238976Sth 		mutex_enter(&net->net_tree_lock);
753b9238976Sth 		mi->mi_ephemeral_tree = net;
754b9238976Sth 		net->net_mount = mi;
755b9238976Sth 		mutex_exit(&mi->mi_lock);
756b9238976Sth 	} else {
757b9238976Sth 		net = mi->mi_ephemeral_tree;
758d3a14591SThomas Haynes 		nfs4_ephemeral_tree_hold(net);
759d3a14591SThomas Haynes 
760d708af74SThomas Haynes 		mutex_exit(&mi->mi_lock);
761d708af74SThomas Haynes 
762d3a14591SThomas Haynes 		mutex_enter(&net->net_tree_lock);
763b9238976Sth 
764b9238976Sth 		/*
765d3a14591SThomas Haynes 		 * We can only procede if the tree is neither locked
766d3a14591SThomas Haynes 		 * nor being torn down.
767b9238976Sth 		 */
768d3a14591SThomas Haynes 		mutex_enter(&net->net_cnt_lock);
769d3a14591SThomas Haynes 		if (net->net_status & NFS4_EPHEMERAL_TREE_PROCESSING) {
770d3a14591SThomas Haynes 			nfs4_ephemeral_tree_decr(net);
771d3a14591SThomas Haynes 			mutex_exit(&net->net_cnt_lock);
772d3a14591SThomas Haynes 			mutex_exit(&net->net_tree_lock);
773d3a14591SThomas Haynes 
774d3a14591SThomas Haynes 			return (EIO);
775d3a14591SThomas Haynes 		}
776d3a14591SThomas Haynes 		mutex_exit(&net->net_cnt_lock);
777b9238976Sth 	}
778b9238976Sth 
779b9238976Sth 	mutex_enter(&net->net_cnt_lock);
780b9238976Sth 	net->net_status |= NFS4_EPHEMERAL_TREE_MOUNTING;
781b9238976Sth 	mutex_exit(&net->net_cnt_lock);
782b9238976Sth 
783b9238976Sth 	must_unlock = TRUE;
784b9238976Sth 
785b9238976Sth 	dma = nfs4_trigger_domount_args_create(vp);
786b9238976Sth 	if (dma == NULL) {
787b9238976Sth 		error = EINVAL;
788b9238976Sth 		goto done;
789b9238976Sth 	}
790b9238976Sth 
791b9238976Sth 	/*
792b9238976Sth 	 * Note that since we define mirror mounts to work
793546a3997SThomas Haynes 	 * for any user, we simply extend the privileges of
794546a3997SThomas Haynes 	 * the user's credentials to allow the mount to
795546a3997SThomas Haynes 	 * proceed.
796b9238976Sth 	 */
797546a3997SThomas Haynes 	mcred = crdup(cr);
798546a3997SThomas Haynes 	if (mcred == NULL) {
799546a3997SThomas Haynes 		error = EINVAL;
800546a3997SThomas Haynes 		goto done;
801546a3997SThomas Haynes 	}
802546a3997SThomas Haynes 
803546a3997SThomas Haynes 	crset_zone_privall(mcred);
804b9238976Sth 
805*6962f5b8SThomas Haynes 	error = nfs4_trigger_domount(vp, dma, &vfsp, mcred, newvpp);
806b9238976Sth 	nfs4_trigger_domount_args_destroy(dma, vp);
807b9238976Sth 
808546a3997SThomas Haynes 	crfree(mcred);
809b9238976Sth 
810b9238976Sth done:
811*6962f5b8SThomas Haynes 
812b9238976Sth 	if (must_unlock) {
813b9238976Sth 		mutex_enter(&net->net_cnt_lock);
814b9238976Sth 		net->net_status &= ~NFS4_EPHEMERAL_TREE_MOUNTING;
815b9238976Sth 		if (is_building)
816b9238976Sth 			net->net_status &= ~NFS4_EPHEMERAL_TREE_BUILDING;
817d3a14591SThomas Haynes 		nfs4_ephemeral_tree_decr(net);
818b9238976Sth 		mutex_exit(&net->net_cnt_lock);
819b9238976Sth 
820b9238976Sth 		mutex_exit(&net->net_tree_lock);
821b9238976Sth 	}
822b9238976Sth 
823b9238976Sth 	if (!error && (newvpp == NULL || *newvpp == NULL))
824b9238976Sth 		error = ENOSYS;
825b9238976Sth 
826b9238976Sth 	return (error);
827b9238976Sth }
828b9238976Sth 
829b9238976Sth /*
830b9238976Sth  * Collect together both the generic & mount-type specific args.
831b9238976Sth  */
832b9238976Sth static domount_args_t *
833b9238976Sth nfs4_trigger_domount_args_create(vnode_t *vp)
834b9238976Sth {
835b9238976Sth 	int nointr;
836b9238976Sth 	char *hostlist;
837b9238976Sth 	servinfo4_t *svp;
838b9238976Sth 	struct nfs_args *nargs, *nargs_head;
839b9238976Sth 	enum clnt_stat status;
840b9238976Sth 	ephemeral_servinfo_t *esi, *esi_first;
841b9238976Sth 	domount_args_t *dma;
842b9238976Sth 	mntinfo4_t *mi = VTOMI4(vp);
843b9238976Sth 
844b9238976Sth 	nointr = !(mi->mi_flags & MI4_INT);
845b9238976Sth 	hostlist = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
846b9238976Sth 
847b9238976Sth 	svp = mi->mi_curr_serv;
848b9238976Sth 	/* check if the current server is responding */
849b9238976Sth 	status = nfs4_trigger_ping_server(svp, nointr);
850b9238976Sth 	if (status == RPC_SUCCESS) {
851b9238976Sth 		esi_first = nfs4_trigger_esi_create(vp, svp);
852b9238976Sth 		if (esi_first == NULL) {
853b9238976Sth 			kmem_free(hostlist, MAXPATHLEN);
854b9238976Sth 			return (NULL);
855b9238976Sth 		}
856b9238976Sth 
857b9238976Sth 		(void) strlcpy(hostlist, esi_first->esi_hostname, MAXPATHLEN);
858b9238976Sth 
859b9238976Sth 		nargs_head = nfs4_trigger_nargs_create(mi, svp, esi_first);
860b9238976Sth 	} else {
861b9238976Sth 		/* current server did not respond */
862b9238976Sth 		esi_first = NULL;
863b9238976Sth 		nargs_head = NULL;
864b9238976Sth 	}
865b9238976Sth 	nargs = nargs_head;
866b9238976Sth 
867b9238976Sth 	/*
868b9238976Sth 	 * NFS RO failover.
869b9238976Sth 	 *
870b9238976Sth 	 * If we have multiple servinfo4 structures, linked via sv_next,
871b9238976Sth 	 * we must create one nfs_args for each, linking the nfs_args via
872b9238976Sth 	 * nfs_ext_u.nfs_extB.next.
873b9238976Sth 	 *
874b9238976Sth 	 * We need to build a corresponding esi for each, too, but that is
875b9238976Sth 	 * used solely for building nfs_args, and may be immediately
876b9238976Sth 	 * discarded, as domount() requires the info from just one esi,
877b9238976Sth 	 * but all the nfs_args.
878b9238976Sth 	 *
879b9238976Sth 	 * Currently, the NFS mount code will hang if not all servers
880b9238976Sth 	 * requested are available. To avoid that, we need to ping each
881b9238976Sth 	 * server, here, and remove it from the list if it is not
882b9238976Sth 	 * responding. This has the side-effect of that server then
883b9238976Sth 	 * being permanently unavailable for this failover mount, even if
884b9238976Sth 	 * it recovers. That's unfortunate, but the best we can do until
885b9238976Sth 	 * the mount code path is fixed.
886b9238976Sth 	 */
887b9238976Sth 
888b9238976Sth 	/*
889b9238976Sth 	 * If the current server was down, loop indefinitely until we find
890b9238976Sth 	 * at least one responsive server.
891b9238976Sth 	 */
892b9238976Sth 	do {
893b9238976Sth 		/* no locking needed for sv_next; it is only set at fs mount */
894b9238976Sth 		for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) {
895b9238976Sth 			struct nfs_args *next;
896b9238976Sth 
897b9238976Sth 			/*
898b9238976Sth 			 * nargs_head: the head of the nfs_args list
899b9238976Sth 			 * nargs: the current tail of the list
900b9238976Sth 			 * next: the newly-created element to be added
901b9238976Sth 			 */
902b9238976Sth 
903b9238976Sth 			/*
904b9238976Sth 			 * We've already tried the current server, above;
905b9238976Sth 			 * if it was responding, we have already included it
906b9238976Sth 			 * and it may now be ignored.
907b9238976Sth 			 *
908b9238976Sth 			 * Otherwise, try it again, since it may now have
909b9238976Sth 			 * recovered.
910b9238976Sth 			 */
911b9238976Sth 			if (svp == mi->mi_curr_serv && esi_first != NULL)
912b9238976Sth 				continue;
913b9238976Sth 
914b9238976Sth 			(void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
915b9238976Sth 			if (svp->sv_flags & SV4_NOTINUSE) {
916b9238976Sth 				nfs_rw_exit(&svp->sv_lock);
917b9238976Sth 				continue;
918b9238976Sth 			}
919b9238976Sth 			nfs_rw_exit(&svp->sv_lock);
920b9238976Sth 
921b9238976Sth 			/* check if the server is responding */
922b9238976Sth 			status = nfs4_trigger_ping_server(svp, nointr);
923b9238976Sth 			/* if the server did not respond, ignore it */
924b9238976Sth 			if (status != RPC_SUCCESS)
925b9238976Sth 				continue;
926b9238976Sth 
927b9238976Sth 			esi = nfs4_trigger_esi_create(vp, svp);
928b9238976Sth 			if (esi == NULL)
929b9238976Sth 				continue;
930b9238976Sth 
931b9238976Sth 			/*
932b9238976Sth 			 * If the original current server (mi_curr_serv)
933b9238976Sth 			 * was down when when we first tried it,
934b9238976Sth 			 * (i.e. esi_first == NULL),
935b9238976Sth 			 * we select this new server (svp) to be the server
936b9238976Sth 			 * that we will actually contact (esi_first).
937b9238976Sth 			 *
938b9238976Sth 			 * Note that it's possible that mi_curr_serv == svp,
939b9238976Sth 			 * if that mi_curr_serv was down but has now recovered.
940b9238976Sth 			 */
941b9238976Sth 			next = nfs4_trigger_nargs_create(mi, svp, esi);
942b9238976Sth 			if (esi_first == NULL) {
943b9238976Sth 				ASSERT(nargs == NULL);
944b9238976Sth 				ASSERT(nargs_head == NULL);
945b9238976Sth 				nargs_head = next;
946b9238976Sth 				esi_first = esi;
947b9238976Sth 				(void) strlcpy(hostlist,
948b9238976Sth 				    esi_first->esi_hostname, MAXPATHLEN);
949b9238976Sth 			} else {
950b9238976Sth 				ASSERT(nargs_head != NULL);
951b9238976Sth 				nargs->nfs_ext_u.nfs_extB.next = next;
952b9238976Sth 				(void) strlcat(hostlist, ",", MAXPATHLEN);
953b9238976Sth 				(void) strlcat(hostlist, esi->esi_hostname,
954b9238976Sth 				    MAXPATHLEN);
955b9238976Sth 				/* esi was only needed for hostname & nargs */
956b9238976Sth 				nfs4_trigger_esi_destroy(esi, vp);
957b9238976Sth 			}
958b9238976Sth 
959b9238976Sth 			nargs = next;
960b9238976Sth 		}
961b9238976Sth 
962b9238976Sth 		/* if we've had no response at all, wait a second */
963b9238976Sth 		if (esi_first == NULL)
964b9238976Sth 			delay(drv_usectohz(1000000));
965b9238976Sth 
966b9238976Sth 	} while (esi_first == NULL);
967b9238976Sth 	ASSERT(nargs_head != NULL);
968b9238976Sth 
969b9238976Sth 	dma = kmem_zalloc(sizeof (domount_args_t), KM_SLEEP);
970b9238976Sth 	dma->dma_esi = esi_first;
971b9238976Sth 	dma->dma_hostlist = hostlist;
972b9238976Sth 	dma->dma_nargs = nargs_head;
973b9238976Sth 
974b9238976Sth 	return (dma);
975b9238976Sth }
976b9238976Sth 
977b9238976Sth static void
978b9238976Sth nfs4_trigger_domount_args_destroy(domount_args_t *dma, vnode_t *vp)
979b9238976Sth {
980b9238976Sth 	if (dma != NULL) {
981b9238976Sth 		if (dma->dma_esi != NULL && vp != NULL)
982b9238976Sth 			nfs4_trigger_esi_destroy(dma->dma_esi, vp);
983b9238976Sth 
984b9238976Sth 		if (dma->dma_hostlist != NULL)
985b9238976Sth 			kmem_free(dma->dma_hostlist, MAXPATHLEN);
986b9238976Sth 
987b9238976Sth 		if (dma->dma_nargs != NULL) {
988b9238976Sth 			struct nfs_args *nargs = dma->dma_nargs;
989b9238976Sth 
990b9238976Sth 			do {
991b9238976Sth 				struct nfs_args *next =
992b9238976Sth 				    nargs->nfs_ext_u.nfs_extB.next;
993b9238976Sth 
994b9238976Sth 				nfs4_trigger_nargs_destroy(nargs);
995b9238976Sth 				nargs = next;
996b9238976Sth 			} while (nargs != NULL);
997b9238976Sth 		}
998b9238976Sth 
999b9238976Sth 		kmem_free(dma, sizeof (domount_args_t));
1000b9238976Sth 	}
1001b9238976Sth }
1002b9238976Sth 
1003b9238976Sth /*
1004b9238976Sth  * The ephemeral_servinfo_t struct contains basic information we will need to
1005b9238976Sth  * perform the mount. Whilst the structure is generic across different
1006b9238976Sth  * types of ephemeral mount, the way we gather its contents differs.
1007b9238976Sth  */
1008b9238976Sth static ephemeral_servinfo_t *
1009b9238976Sth nfs4_trigger_esi_create(vnode_t *vp, servinfo4_t *svp)
1010b9238976Sth {
1011b9238976Sth 	ephemeral_servinfo_t *esi;
1012b9238976Sth 	rnode4_t *rp = VTOR4(vp);
1013b9238976Sth 
1014b9238976Sth 	ASSERT(RP_ISSTUB(rp));
1015b9238976Sth 
1016b9238976Sth 	/* Call the ephemeral type-specific routine */
1017b9238976Sth 	if (RP_ISSTUB_MIRRORMOUNT(rp))
1018b9238976Sth 		esi = nfs4_trigger_esi_create_mirrormount(vp, svp);
1019b9238976Sth 	else
1020b9238976Sth 		esi = NULL;
1021b9238976Sth 
1022b9238976Sth 	/* for now, we only support mirror-mounts */
1023b9238976Sth 	ASSERT(esi != NULL);
1024b9238976Sth 
1025b9238976Sth 	return (esi);
1026b9238976Sth }
1027b9238976Sth 
1028b9238976Sth static void
1029b9238976Sth nfs4_trigger_esi_destroy(ephemeral_servinfo_t *esi, vnode_t *vp)
1030b9238976Sth {
1031b9238976Sth 	rnode4_t *rp = VTOR4(vp);
1032b9238976Sth 
1033b9238976Sth 	ASSERT(RP_ISSTUB(rp));
1034b9238976Sth 
1035b9238976Sth 	/* for now, we only support mirror-mounts */
1036b9238976Sth 	ASSERT(RP_ISSTUB_MIRRORMOUNT(rp));
1037b9238976Sth 
1038b9238976Sth 	/* Currently, no need for an ephemeral type-specific routine */
1039b9238976Sth 
1040b9238976Sth 	/*
1041b9238976Sth 	 * The contents of ephemeral_servinfo_t goes into nfs_args,
1042b9238976Sth 	 * and will be handled by nfs4_trigger_nargs_destroy().
1043b9238976Sth 	 * We need only free the structure itself.
1044b9238976Sth 	 */
1045b9238976Sth 	if (esi != NULL)
1046b9238976Sth 		kmem_free(esi, sizeof (ephemeral_servinfo_t));
1047b9238976Sth }
1048b9238976Sth 
1049b9238976Sth /*
1050b9238976Sth  * Some of this may turn out to be common with other ephemeral types,
1051b9238976Sth  * in which case it should be moved to nfs4_trigger_esi_create(), or a
1052b9238976Sth  * common function called.
1053b9238976Sth  */
1054b9238976Sth static ephemeral_servinfo_t *
1055b9238976Sth nfs4_trigger_esi_create_mirrormount(vnode_t *vp, servinfo4_t *svp)
1056b9238976Sth {
1057b9238976Sth 	char			*stubpath;
1058b9238976Sth 	struct knetconfig	*sikncp, *svkncp;
1059b9238976Sth 	struct netbuf		*bufp;
1060b9238976Sth 	ephemeral_servinfo_t	*esi;
1061b9238976Sth 
1062b9238976Sth 	esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP);
1063b9238976Sth 
1064b9238976Sth 	/* initially set to be our type of ephemeral mount; may be added to */
1065b9238976Sth 	esi->esi_mount_flags = NFSMNT_MIRRORMOUNT;
1066b9238976Sth 
1067b9238976Sth 	/*
1068b9238976Sth 	 * We're copying info from the stub rnode's servinfo4, but
1069b9238976Sth 	 * we must create new copies, not pointers, since this information
1070b9238976Sth 	 * is to be associated with the new mount, which will be
1071b9238976Sth 	 * unmounted (and its structures freed) separately
1072b9238976Sth 	 */
1073b9238976Sth 
1074b9238976Sth 	/*
1075b9238976Sth 	 * Sizes passed to kmem_[z]alloc here must match those freed
1076b9238976Sth 	 * in nfs4_free_args()
1077b9238976Sth 	 */
1078b9238976Sth 
1079b9238976Sth 	/*
1080b9238976Sth 	 * We hold sv_lock across kmem_zalloc() calls that may sleep, but this
1081b9238976Sth 	 * is difficult to avoid: as we need to read svp to calculate the
1082b9238976Sth 	 * sizes to be allocated.
1083b9238976Sth 	 */
1084b9238976Sth 	(void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1085b9238976Sth 
1086b9238976Sth 	esi->esi_hostname = kmem_zalloc(strlen(svp->sv_hostname) + 1, KM_SLEEP);
1087b9238976Sth 	(void) strcat(esi->esi_hostname, svp->sv_hostname);
1088b9238976Sth 
1089b9238976Sth 	esi->esi_addr = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP);
1090b9238976Sth 	bufp = esi->esi_addr;
1091b9238976Sth 	bufp->len = svp->sv_addr.len;
1092b9238976Sth 	bufp->maxlen = svp->sv_addr.maxlen;
1093b9238976Sth 	bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP);
1094b9238976Sth 	bcopy(svp->sv_addr.buf, bufp->buf, bufp->len);
1095b9238976Sth 
1096b9238976Sth 	esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP);
1097b9238976Sth 	sikncp = esi->esi_knconf;
1098b9238976Sth 	svkncp = svp->sv_knconf;
1099b9238976Sth 	sikncp->knc_semantics = svkncp->knc_semantics;
1100b9238976Sth 	sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1101b9238976Sth 	(void) strcat((char *)sikncp->knc_protofmly,
1102b9238976Sth 	    (char *)svkncp->knc_protofmly);
1103b9238976Sth 	sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1104b9238976Sth 	(void) strcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto);
1105b9238976Sth 	sikncp->knc_rdev = svkncp->knc_rdev;
1106b9238976Sth 
1107b9238976Sth 	/*
1108b9238976Sth 	 * Used when AUTH_DH is negotiated.
1109b9238976Sth 	 *
1110b9238976Sth 	 * This is ephemeral mount-type specific, since it contains the
1111b9238976Sth 	 * server's time-sync syncaddr.
1112b9238976Sth 	 */
1113b9238976Sth 	if (svp->sv_dhsec) {
1114b9238976Sth 		struct netbuf *bufp;
1115b9238976Sth 		sec_data_t *sdata;
1116b9238976Sth 		dh_k4_clntdata_t *data;
1117b9238976Sth 
1118b9238976Sth 		sdata = svp->sv_dhsec;
1119b9238976Sth 		data = (dh_k4_clntdata_t *)sdata->data;
1120b9238976Sth 		ASSERT(sdata->rpcflavor == AUTH_DH);
1121b9238976Sth 
1122b9238976Sth 		bufp = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP);
1123b9238976Sth 		bufp->len = data->syncaddr.len;
1124b9238976Sth 		bufp->maxlen = data->syncaddr.maxlen;
1125b9238976Sth 		bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP);
1126b9238976Sth 		bcopy(data->syncaddr.buf, bufp->buf, bufp->len);
1127b9238976Sth 		esi->esi_syncaddr = bufp;
1128b9238976Sth 
1129b9238976Sth 		if (data->netname != NULL) {
1130b9238976Sth 			int nmlen = data->netnamelen;
1131b9238976Sth 
1132b9238976Sth 			/*
1133b9238976Sth 			 * We need to copy from a dh_k4_clntdata_t
1134b9238976Sth 			 * netname/netnamelen pair to a NUL-terminated
1135b9238976Sth 			 * netname string suitable for putting in nfs_args,
1136b9238976Sth 			 * where the latter has no netnamelen field.
1137b9238976Sth 			 */
1138b9238976Sth 			esi->esi_netname = kmem_zalloc(nmlen + 1, KM_SLEEP);
1139b9238976Sth 			bcopy(data->netname, esi->esi_netname, nmlen);
1140b9238976Sth 		}
1141b9238976Sth 	} else {
1142b9238976Sth 		esi->esi_syncaddr = NULL;
1143b9238976Sth 		esi->esi_netname = NULL;
1144b9238976Sth 	}
1145b9238976Sth 
1146b9238976Sth 	stubpath = fn_path(VTOSV(vp)->sv_name);
1147b9238976Sth 	/* step over initial '.', to avoid e.g. sv_path: "/tank./ws" */
1148b9238976Sth 	ASSERT(*stubpath == '.');
1149b9238976Sth 	stubpath += 1;
1150b9238976Sth 
1151b9238976Sth 	/* for nfs_args->fh */
1152b9238976Sth 	esi->esi_path_len = strlen(svp->sv_path) + strlen(stubpath) + 1;
1153b9238976Sth 	esi->esi_path = kmem_zalloc(esi->esi_path_len, KM_SLEEP);
1154b9238976Sth 	(void) strcat(esi->esi_path, svp->sv_path);
1155b9238976Sth 	(void) strcat(esi->esi_path, stubpath);
1156b9238976Sth 
1157b9238976Sth 	stubpath -= 1;
1158b9238976Sth 	/* stubpath allocated by fn_path() */
1159b9238976Sth 	kmem_free(stubpath, strlen(stubpath) + 1);
1160b9238976Sth 
1161b9238976Sth 	nfs_rw_exit(&svp->sv_lock);
1162b9238976Sth 
1163b9238976Sth 	return (esi);
1164b9238976Sth }
1165b9238976Sth 
1166b9238976Sth /*
1167b9238976Sth  * Assemble the args, and call the generic VFS mount function to
1168b9238976Sth  * finally perform the ephemeral mount.
1169b9238976Sth  */
1170b9238976Sth static int
1171b9238976Sth nfs4_trigger_domount(vnode_t *stubvp, domount_args_t *dma, vfs_t **vfsp,
1172*6962f5b8SThomas Haynes     cred_t *cr, vnode_t **newvpp)
1173b9238976Sth {
1174b9238976Sth 	struct mounta	*uap;
1175b9238976Sth 	char		*mntpt, *orig_path, *path;
1176b9238976Sth 	const char	*orig_mntpt;
1177b9238976Sth 	int		retval;
1178b9238976Sth 	int		mntpt_len;
1179b9238976Sth 	int		spec_len;
1180b9238976Sth 	zone_t		*zone = curproc->p_zone;
1181b9238976Sth 	bool_t		has_leading_slash;
1182*6962f5b8SThomas Haynes 	int		i;
1183b9238976Sth 
1184b9238976Sth 	vfs_t			*stubvfsp = stubvp->v_vfsp;
1185b9238976Sth 	ephemeral_servinfo_t	*esi = dma->dma_esi;
1186b9238976Sth 	struct nfs_args		*nargs = dma->dma_nargs;
1187b9238976Sth 
1188b9238976Sth 	/* first, construct the mount point for the ephemeral mount */
1189b9238976Sth 	orig_path = path = fn_path(VTOSV(stubvp)->sv_name);
1190b9238976Sth 	orig_mntpt = (char *)refstr_value(stubvfsp->vfs_mntpt);
1191b9238976Sth 
1192b9238976Sth 	if (*orig_path == '.')
1193b9238976Sth 		orig_path++;
1194b9238976Sth 
1195b9238976Sth 	/*
1196b9238976Sth 	 * Get rid of zone's root path
1197b9238976Sth 	 */
1198b9238976Sth 	if (zone != global_zone) {
1199b9238976Sth 		/*
1200b9238976Sth 		 * -1 for trailing '/' and -1 for EOS.
1201b9238976Sth 		 */
1202b9238976Sth 		if (strncmp(zone->zone_rootpath, orig_mntpt,
1203b9238976Sth 		    zone->zone_rootpathlen - 1) == 0) {
1204b9238976Sth 			orig_mntpt += (zone->zone_rootpathlen - 2);
1205b9238976Sth 		}
1206b9238976Sth 	}
1207b9238976Sth 
1208b9238976Sth 	mntpt_len = strlen(orig_mntpt) + strlen(orig_path);
1209b9238976Sth 	mntpt = kmem_zalloc(mntpt_len + 1, KM_SLEEP);
1210b9238976Sth 	(void) strcat(mntpt, orig_mntpt);
1211b9238976Sth 	(void) strcat(mntpt, orig_path);
1212b9238976Sth 
1213b9238976Sth 	kmem_free(path, strlen(path) + 1);
1214b9238976Sth 	path = esi->esi_path;
1215b9238976Sth 	if (*path == '.')
1216b9238976Sth 		path++;
1217b9238976Sth 	if (path[0] == '/' && path[1] == '/')
1218b9238976Sth 		path++;
1219b9238976Sth 	has_leading_slash = (*path == '/');
1220b9238976Sth 
1221b9238976Sth 	spec_len = strlen(dma->dma_hostlist);
1222b9238976Sth 	spec_len += strlen(path);
1223b9238976Sth 
1224b9238976Sth 	/* We are going to have to add this in */
1225b9238976Sth 	if (!has_leading_slash)
1226b9238976Sth 		spec_len++;
1227b9238976Sth 
1228b9238976Sth 	/* We need to get the ':' for dma_hostlist:esi_path */
1229b9238976Sth 	spec_len++;
1230b9238976Sth 
1231b9238976Sth 	uap = kmem_zalloc(sizeof (struct mounta), KM_SLEEP);
1232b9238976Sth 	uap->spec = kmem_zalloc(spec_len + 1, KM_SLEEP);
1233b9238976Sth 	(void) snprintf(uap->spec, spec_len + 1, "%s:%s%s", dma->dma_hostlist,
1234b9238976Sth 	    has_leading_slash ? "" : "/", path);
1235b9238976Sth 
1236b9238976Sth 	uap->dir = mntpt;
1237b9238976Sth 
1238b9238976Sth 	uap->flags = MS_SYSSPACE | MS_DATA;
1239b9238976Sth 	/* fstype-independent mount options not covered elsewhere */
1240b9238976Sth 	/* copy parent's mount(1M) "-m" flag */
1241b9238976Sth 	if (stubvfsp->vfs_flag & VFS_NOMNTTAB)
1242b9238976Sth 		uap->flags |= MS_NOMNTTAB;
1243b9238976Sth 
1244b9238976Sth 	uap->fstype = MNTTYPE_NFS4;
1245b9238976Sth 	uap->dataptr = (char *)nargs;
1246b9238976Sth 	/* not needed for MS_SYSSPACE */
1247b9238976Sth 	uap->datalen = 0;
1248b9238976Sth 
1249b9238976Sth 	/* use optptr to pass in extra mount options */
1250b9238976Sth 	uap->flags |= MS_OPTIONSTR;
1251b9238976Sth 	uap->optptr = nfs4_trigger_create_mntopts(stubvfsp);
1252b9238976Sth 	if (uap->optptr == NULL) {
1253b9238976Sth 		retval = EINVAL;
1254b9238976Sth 		goto done;
1255b9238976Sth 	}
1256546a3997SThomas Haynes 
1257b9238976Sth 	/* domount() expects us to count the trailing NUL */
1258b9238976Sth 	uap->optlen = strlen(uap->optptr) + 1;
1259b9238976Sth 
1260*6962f5b8SThomas Haynes 	/*
1261*6962f5b8SThomas Haynes 	 * If we get EBUSY, we try again once to see if we can perform
1262*6962f5b8SThomas Haynes 	 * the mount. We do this because of a spurious race condition.
1263*6962f5b8SThomas Haynes 	 */
1264*6962f5b8SThomas Haynes 	for (i = 0; i < 2; i++) {
1265*6962f5b8SThomas Haynes 		int	error;
1266*6962f5b8SThomas Haynes 		bool_t	was_mounted;
1267*6962f5b8SThomas Haynes 
1268*6962f5b8SThomas Haynes 		retval = domount(NULL, uap, stubvp, cr, vfsp);
1269*6962f5b8SThomas Haynes 		if (retval == 0) {
1270*6962f5b8SThomas Haynes 			retval = VFS_ROOT(*vfsp, newvpp);
1271*6962f5b8SThomas Haynes 			VFS_RELE(*vfsp);
1272*6962f5b8SThomas Haynes 			break;
1273*6962f5b8SThomas Haynes 		} else if (retval != EBUSY) {
1274*6962f5b8SThomas Haynes 			break;
1275*6962f5b8SThomas Haynes 		}
1276*6962f5b8SThomas Haynes 
1277*6962f5b8SThomas Haynes 		/*
1278*6962f5b8SThomas Haynes 		 * We might find it mounted by the other racer...
1279*6962f5b8SThomas Haynes 		 */
1280*6962f5b8SThomas Haynes 		error = nfs4_trigger_mounted_already(stubvp,
1281*6962f5b8SThomas Haynes 		    newvpp, &was_mounted, vfsp);
1282*6962f5b8SThomas Haynes 		if (error) {
1283*6962f5b8SThomas Haynes 			goto done;
1284*6962f5b8SThomas Haynes 		} else if (was_mounted) {
1285*6962f5b8SThomas Haynes 			retval = 0;
1286*6962f5b8SThomas Haynes 			break;
1287*6962f5b8SThomas Haynes 		}
1288*6962f5b8SThomas Haynes 	}
1289546a3997SThomas Haynes 
1290b9238976Sth done:
1291b9238976Sth 	if (uap->optptr)
1292b9238976Sth 		nfs4_trigger_destroy_mntopts(uap->optptr);
1293b9238976Sth 
1294b9238976Sth 	kmem_free(uap->spec, spec_len + 1);
1295b9238976Sth 	kmem_free(uap, sizeof (struct mounta));
1296b9238976Sth 	kmem_free(mntpt, mntpt_len + 1);
1297b9238976Sth 
1298b9238976Sth 	return (retval);
1299b9238976Sth }
1300b9238976Sth 
1301b9238976Sth /*
1302b9238976Sth  * Build an nfs_args structure for passing to domount().
1303b9238976Sth  *
1304b9238976Sth  * Ephemeral mount-type specific data comes from the ephemeral_servinfo_t;
1305b9238976Sth  * generic data - common to all ephemeral mount types - is read directly
1306b9238976Sth  * from the parent mount's servinfo4_t and mntinfo4_t, via the stub vnode.
1307b9238976Sth  */
1308b9238976Sth static struct nfs_args *
1309b9238976Sth nfs4_trigger_nargs_create(mntinfo4_t *mi, servinfo4_t *svp,
1310b9238976Sth     ephemeral_servinfo_t *esi)
1311b9238976Sth {
1312b9238976Sth 	sec_data_t *secdata;
1313b9238976Sth 	struct nfs_args *nargs;
1314b9238976Sth 
1315b9238976Sth 	/* setup the nfs args */
1316b9238976Sth 	nargs = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP);
1317b9238976Sth 
1318b9238976Sth 	(void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1319b9238976Sth 
1320b9238976Sth 	nargs->addr = esi->esi_addr;
1321b9238976Sth 
1322b9238976Sth 	/* for AUTH_DH by negotiation */
1323b9238976Sth 	if (esi->esi_syncaddr || esi->esi_netname) {
1324b9238976Sth 		nargs->flags |= NFSMNT_SECURE;
1325b9238976Sth 		nargs->syncaddr = esi->esi_syncaddr;
1326b9238976Sth 		nargs->netname = esi->esi_netname;
1327b9238976Sth 	}
1328b9238976Sth 
1329b9238976Sth 	nargs->flags |= NFSMNT_KNCONF;
1330b9238976Sth 	nargs->knconf = esi->esi_knconf;
1331b9238976Sth 	nargs->flags |= NFSMNT_HOSTNAME;
1332b9238976Sth 	nargs->hostname = esi->esi_hostname;
1333b9238976Sth 	nargs->fh = esi->esi_path;
1334b9238976Sth 
1335b9238976Sth 	/* general mount settings, all copied from parent mount */
1336b9238976Sth 	mutex_enter(&mi->mi_lock);
1337b9238976Sth 
1338b9238976Sth 	if (!(mi->mi_flags & MI4_HARD))
1339b9238976Sth 		nargs->flags |= NFSMNT_SOFT;
1340b9238976Sth 
1341b9238976Sth 	nargs->flags |= NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_TIMEO |
1342b9238976Sth 	    NFSMNT_RETRANS;
1343b9238976Sth 	nargs->wsize = mi->mi_stsize;
1344b9238976Sth 	nargs->rsize = mi->mi_tsize;
1345b9238976Sth 	nargs->timeo = mi->mi_timeo;
1346b9238976Sth 	nargs->retrans = mi->mi_retrans;
1347b9238976Sth 
1348b9238976Sth 	if (mi->mi_flags & MI4_INT)
1349b9238976Sth 		nargs->flags |= NFSMNT_INT;
1350b9238976Sth 	if (mi->mi_flags & MI4_NOAC)
1351b9238976Sth 		nargs->flags |= NFSMNT_NOAC;
1352b9238976Sth 
1353b9238976Sth 	nargs->flags |= NFSMNT_ACREGMIN | NFSMNT_ACREGMAX | NFSMNT_ACDIRMIN |
1354b9238976Sth 	    NFSMNT_ACDIRMAX;
1355b9238976Sth 	nargs->acregmin = HR2SEC(mi->mi_acregmin);
1356b9238976Sth 	nargs->acregmax = HR2SEC(mi->mi_acregmax);
1357b9238976Sth 	nargs->acdirmin = HR2SEC(mi->mi_acdirmin);
1358b9238976Sth 	nargs->acdirmax = HR2SEC(mi->mi_acdirmax);
1359b9238976Sth 
1360b9238976Sth 	if (mi->mi_flags & MI4_NOCTO)
1361b9238976Sth 		nargs->flags |= NFSMNT_NOCTO;
1362b9238976Sth 	if (mi->mi_flags & MI4_GRPID)
1363b9238976Sth 		nargs->flags |= NFSMNT_GRPID;
1364b9238976Sth 	if (mi->mi_flags & MI4_LLOCK)
1365b9238976Sth 		nargs->flags |= NFSMNT_LLOCK;
1366b9238976Sth 	if (mi->mi_flags & MI4_NOPRINT)
1367b9238976Sth 		nargs->flags |= NFSMNT_NOPRINT;
1368b9238976Sth 	if (mi->mi_flags & MI4_DIRECTIO)
1369b9238976Sth 		nargs->flags |= NFSMNT_DIRECTIO;
1370b9238976Sth 	if (mi->mi_flags & MI4_PUBLIC)
1371b9238976Sth 		nargs->flags |= NFSMNT_PUBLIC;
1372b9238976Sth 
1373b9238976Sth 	mutex_exit(&mi->mi_lock);
1374b9238976Sth 
1375b9238976Sth 	/* add any specific flags for this type of ephemeral mount */
1376b9238976Sth 	nargs->flags |= esi->esi_mount_flags;
1377b9238976Sth 
1378b9238976Sth 	/*
1379b9238976Sth 	 * Security data & negotiation policy.
1380b9238976Sth 	 *
1381b9238976Sth 	 * We need to preserve the parent mount's preference for security
1382b9238976Sth 	 * negotiation, translating SV4_TRYSECDEFAULT -> NFSMNT_SECDEFAULT.
1383b9238976Sth 	 *
1384b9238976Sth 	 * If SV4_TRYSECDEFAULT is not set, that indicates that a specific
1385b9238976Sth 	 * security flavour was requested, with data in sv_secdata, and that
1386b9238976Sth 	 * no negotiation should occur. If this specified flavour fails, that's
1387b9238976Sth 	 * it. We will copy sv_secdata, and not set NFSMNT_SECDEFAULT.
1388b9238976Sth 	 *
1389b9238976Sth 	 * If SV4_TRYSECDEFAULT is set, then we start with a passed-in
1390b9238976Sth 	 * default flavour, in sv_secdata, but then negotiate a new flavour.
1391b9238976Sth 	 * Possible flavours are recorded in an array in sv_secinfo, with
1392b9238976Sth 	 * currently in-use flavour pointed to by sv_currsec.
1393b9238976Sth 	 *
1394b9238976Sth 	 * If sv_currsec is set, i.e. if negotiation has already occurred,
1395b9238976Sth 	 * we will copy sv_currsec. Otherwise, copy sv_secdata. Regardless,
1396b9238976Sth 	 * we will set NFSMNT_SECDEFAULT, to enable negotiation.
1397b9238976Sth 	 */
1398b9238976Sth 	if (svp->sv_flags & SV4_TRYSECDEFAULT) {
1399b9238976Sth 		/* enable negotiation for ephemeral mount */
1400b9238976Sth 		nargs->flags |= NFSMNT_SECDEFAULT;
1401b9238976Sth 
1402b9238976Sth 		/*
1403b9238976Sth 		 * As a starting point for negotiation, copy parent
1404b9238976Sth 		 * mount's negotiated flavour (sv_currsec) if available,
1405b9238976Sth 		 * or its passed-in flavour (sv_secdata) if not.
1406b9238976Sth 		 */
1407b9238976Sth 		if (svp->sv_currsec != NULL)
1408b9238976Sth 			secdata = copy_sec_data(svp->sv_currsec);
1409b9238976Sth 		else if (svp->sv_secdata != NULL)
1410b9238976Sth 			secdata = copy_sec_data(svp->sv_secdata);
1411b9238976Sth 		else
1412b9238976Sth 			secdata = NULL;
1413b9238976Sth 	} else {
1414b9238976Sth 		/* do not enable negotiation; copy parent's passed-in flavour */
1415b9238976Sth 		if (svp->sv_secdata != NULL)
1416b9238976Sth 			secdata = copy_sec_data(svp->sv_secdata);
1417b9238976Sth 		else
1418b9238976Sth 			secdata = NULL;
1419b9238976Sth 	}
1420b9238976Sth 
1421b9238976Sth 	nfs_rw_exit(&svp->sv_lock);
1422b9238976Sth 
1423b9238976Sth 	nargs->flags |= NFSMNT_NEWARGS;
1424b9238976Sth 	nargs->nfs_args_ext = NFS_ARGS_EXTB;
1425b9238976Sth 	nargs->nfs_ext_u.nfs_extB.secdata = secdata;
1426b9238976Sth 
1427b9238976Sth 	/* for NFS RO failover; caller will set if necessary */
1428b9238976Sth 	nargs->nfs_ext_u.nfs_extB.next = NULL;
1429b9238976Sth 
1430b9238976Sth 	return (nargs);
1431b9238976Sth }
1432b9238976Sth 
1433b9238976Sth static void
1434b9238976Sth nfs4_trigger_nargs_destroy(struct nfs_args *nargs)
1435b9238976Sth {
1436b9238976Sth 	/*
1437b9238976Sth 	 * Either the mount failed, in which case the data is not needed, or
1438b9238976Sth 	 * nfs4_mount() has either taken copies of what it needs or,
1439b9238976Sth 	 * where it has merely copied the ptr, it has set *our* ptr to NULL,
1440b9238976Sth 	 * whereby nfs4_free_args() will ignore it.
1441b9238976Sth 	 */
1442b9238976Sth 	nfs4_free_args(nargs);
1443b9238976Sth 	kmem_free(nargs, sizeof (struct nfs_args));
1444b9238976Sth }
1445b9238976Sth 
1446b9238976Sth /*
1447b9238976Sth  * When we finally get into the mounting, we need to add this
1448b9238976Sth  * node to the ephemeral tree.
1449b9238976Sth  *
1450b9238976Sth  * This is called from nfs4_mount().
1451b9238976Sth  */
1452d3a14591SThomas Haynes int
1453b9238976Sth nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp)
1454b9238976Sth {
1455b9238976Sth 	mntinfo4_t		*mi_parent;
1456b9238976Sth 	nfs4_ephemeral_t	*eph;
1457b9238976Sth 	nfs4_ephemeral_tree_t	*net;
1458b9238976Sth 
1459b9238976Sth 	nfs4_ephemeral_t	*prior;
1460b9238976Sth 	nfs4_ephemeral_t	*child;
1461b9238976Sth 
1462b9238976Sth 	nfs4_ephemeral_t	*peer;
1463b9238976Sth 
1464b9238976Sth 	nfs4_trigger_globals_t	*ntg;
1465b9238976Sth 	zone_t			*zone = curproc->p_zone;
1466b9238976Sth 
1467d3a14591SThomas Haynes 	int			rc = 0;
1468d3a14591SThomas Haynes 
1469b9238976Sth 	mi_parent = VTOMI4(mvp);
1470b9238976Sth 
1471b9238976Sth 	/*
1472b9238976Sth 	 * Get this before grabbing anything else!
1473b9238976Sth 	 */
1474b9238976Sth 	ntg = zone_getspecific(nfs4_ephemeral_key, zone);
1475b9238976Sth 	if (!ntg->ntg_thread_started) {
1476b9238976Sth 		nfs4_ephemeral_start_harvester(ntg);
1477b9238976Sth 	}
1478b9238976Sth 
1479b9238976Sth 	mutex_enter(&mi_parent->mi_lock);
1480b9238976Sth 	mutex_enter(&mi->mi_lock);
1481b9238976Sth 
1482d3a14591SThomas Haynes 	net = mi->mi_ephemeral_tree =
1483d3a14591SThomas Haynes 	    mi_parent->mi_ephemeral_tree;
1484d3a14591SThomas Haynes 
1485d3a14591SThomas Haynes 	/*
1486d3a14591SThomas Haynes 	 * If the mi_ephemeral_tree is NULL, then it
1487d3a14591SThomas Haynes 	 * means that either the harvester or a manual
1488d3a14591SThomas Haynes 	 * umount has cleared the tree out right before
1489d3a14591SThomas Haynes 	 * we got here.
1490d3a14591SThomas Haynes 	 *
1491d3a14591SThomas Haynes 	 * There is nothing we can do here, so return
1492d3a14591SThomas Haynes 	 * to the caller and let them decide whether they
1493d3a14591SThomas Haynes 	 * try again.
1494d3a14591SThomas Haynes 	 */
1495d3a14591SThomas Haynes 	if (net == NULL) {
1496d3a14591SThomas Haynes 		mutex_exit(&mi->mi_lock);
1497d3a14591SThomas Haynes 		mutex_exit(&mi_parent->mi_lock);
1498d3a14591SThomas Haynes 
1499d3a14591SThomas Haynes 		return (EBUSY);
1500d3a14591SThomas Haynes 	}
1501d3a14591SThomas Haynes 
1502d3a14591SThomas Haynes 	nfs4_ephemeral_tree_hold(net);
1503d3a14591SThomas Haynes 
1504b9238976Sth 	/*
1505b9238976Sth 	 * We need to tack together the ephemeral mount
1506b9238976Sth 	 * with this new mntinfo.
1507b9238976Sth 	 */
1508b9238976Sth 	eph = kmem_zalloc(sizeof (*eph), KM_SLEEP);
1509b9238976Sth 	eph->ne_mount = mi;
1510b9238976Sth 	eph->ne_ref_time = gethrestime_sec();
1511b9238976Sth 
1512b9238976Sth 	/*
1513b9238976Sth 	 * We need to tell the ephemeral mount when
1514b9238976Sth 	 * to time out.
1515b9238976Sth 	 */
1516b9238976Sth 	eph->ne_mount_to = ntg->ntg_mount_to;
1517b9238976Sth 
1518b9238976Sth 	mi->mi_flags |= MI4_EPHEMERAL;
1519b9238976Sth 	mi->mi_ephemeral = eph;
1520b9238976Sth 
1521b9238976Sth 	/*
1522b9238976Sth 	 * If the enclosing mntinfo4 is also ephemeral,
1523b9238976Sth 	 * then we need to point to its enclosing parent.
1524b9238976Sth 	 * Else the enclosing mntinfo4 is the enclosing parent.
1525b9238976Sth 	 *
1526b9238976Sth 	 * We also need to weave this ephemeral node
1527b9238976Sth 	 * into the tree.
1528b9238976Sth 	 */
1529b9238976Sth 	if (mi_parent->mi_flags & MI4_EPHEMERAL) {
1530b9238976Sth 		/*
1531b9238976Sth 		 * We need to decide if we are
1532b9238976Sth 		 * the root node of this branch
1533b9238976Sth 		 * or if we are a sibling of this
1534b9238976Sth 		 * branch.
1535b9238976Sth 		 */
1536b9238976Sth 		prior = mi_parent->mi_ephemeral;
1537d3a14591SThomas Haynes 		if (prior == NULL) {
1538d3a14591SThomas Haynes 			/*
1539d3a14591SThomas Haynes 			 * Race condition, clean up, and
1540d3a14591SThomas Haynes 			 * let caller handle mntinfo.
1541d3a14591SThomas Haynes 			 */
1542d3a14591SThomas Haynes 			mi->mi_flags &= ~MI4_EPHEMERAL;
1543d3a14591SThomas Haynes 			mi->mi_ephemeral = NULL;
1544d3a14591SThomas Haynes 			kmem_free(eph, sizeof (*eph));
1545d3a14591SThomas Haynes 			rc = EBUSY;
1546b9238976Sth 		} else {
1547d3a14591SThomas Haynes 			if (prior->ne_child == NULL) {
1548d3a14591SThomas Haynes 				prior->ne_child = eph;
1549d3a14591SThomas Haynes 			} else {
1550d3a14591SThomas Haynes 				child = prior->ne_child;
1551b9238976Sth 
1552d3a14591SThomas Haynes 				prior->ne_child = eph;
1553d3a14591SThomas Haynes 				eph->ne_peer = child;
1554b9238976Sth 
1555d3a14591SThomas Haynes 				child->ne_prior = eph;
1556d3a14591SThomas Haynes 			}
1557b9238976Sth 
1558d3a14591SThomas Haynes 			eph->ne_prior = prior;
1559d3a14591SThomas Haynes 		}
1560b9238976Sth 	} else {
1561b9238976Sth 		/*
1562b9238976Sth 		 * The parent mntinfo4 is the non-ephemeral
1563b9238976Sth 		 * root of the ephemeral tree. We
1564b9238976Sth 		 * need to decide if we are the root
1565b9238976Sth 		 * node of that tree or if we are a
1566b9238976Sth 		 * sibling of the root node.
1567b9238976Sth 		 *
1568b9238976Sth 		 * We are the root if there is no
1569b9238976Sth 		 * other node.
1570b9238976Sth 		 */
1571b9238976Sth 		if (net->net_root == NULL) {
1572b9238976Sth 			net->net_root = eph;
1573b9238976Sth 		} else {
1574b9238976Sth 			eph->ne_peer = peer = net->net_root;
1575b9238976Sth 			ASSERT(peer != NULL);
1576b9238976Sth 			net->net_root = eph;
1577b9238976Sth 
1578b9238976Sth 			peer->ne_prior = eph;
1579b9238976Sth 		}
1580b9238976Sth 
1581b9238976Sth 		eph->ne_prior = NULL;
1582b9238976Sth 	}
1583b9238976Sth 
1584d3a14591SThomas Haynes 	nfs4_ephemeral_tree_rele(net);
1585d3a14591SThomas Haynes 
1586b9238976Sth 	mutex_exit(&mi->mi_lock);
1587b9238976Sth 	mutex_exit(&mi_parent->mi_lock);
1588d3a14591SThomas Haynes 
1589d3a14591SThomas Haynes 	return (rc);
1590b9238976Sth }
1591b9238976Sth 
1592b9238976Sth /*
1593b9238976Sth  * Commit the changes to the ephemeral tree for removing this node.
1594b9238976Sth  */
1595b9238976Sth static void
1596b9238976Sth nfs4_ephemeral_umount_cleanup(nfs4_ephemeral_t *eph)
1597b9238976Sth {
1598b9238976Sth 	nfs4_ephemeral_t	*e = eph;
1599b9238976Sth 	nfs4_ephemeral_t	*peer;
1600b9238976Sth 	nfs4_ephemeral_t	*prior;
1601b9238976Sth 
1602b9238976Sth 	peer = eph->ne_peer;
1603b9238976Sth 	prior = e->ne_prior;
1604b9238976Sth 
1605b9238976Sth 	/*
1606b9238976Sth 	 * If this branch root was not the
1607b9238976Sth 	 * tree root, then we need to fix back pointers.
1608b9238976Sth 	 */
1609b9238976Sth 	if (prior) {
1610b9238976Sth 		if (prior->ne_child == e) {
1611b9238976Sth 			prior->ne_child = peer;
1612b9238976Sth 		} else {
1613b9238976Sth 			prior->ne_peer = peer;
1614b9238976Sth 		}
1615b9238976Sth 
1616b9238976Sth 		if (peer)
1617b9238976Sth 			peer->ne_prior = prior;
1618b9238976Sth 	} else if (peer) {
1619b9238976Sth 		peer->ne_mount->mi_ephemeral_tree->net_root = peer;
1620b9238976Sth 		peer->ne_prior = NULL;
1621b9238976Sth 	} else {
1622b9238976Sth 		e->ne_mount->mi_ephemeral_tree->net_root = NULL;
1623b9238976Sth 	}
1624b9238976Sth }
1625b9238976Sth 
1626b9238976Sth /*
1627b9238976Sth  * We want to avoid recursion at all costs. So we need to
1628b9238976Sth  * unroll the tree. We do this by a depth first traversal to
1629b9238976Sth  * leaf nodes. We blast away the leaf and work our way back
1630b9238976Sth  * up and down the tree.
1631b9238976Sth  */
1632b9238976Sth static int
1633b9238976Sth nfs4_ephemeral_unmount_engine(nfs4_ephemeral_t *eph,
1634b9238976Sth     int isTreeRoot, int flag, cred_t *cr)
1635b9238976Sth {
1636b9238976Sth 	nfs4_ephemeral_t	*e = eph;
1637b9238976Sth 	nfs4_ephemeral_t	*prior;
1638b9238976Sth 	mntinfo4_t		*mi;
1639b9238976Sth 	vfs_t			*vfsp;
1640b9238976Sth 	int			error;
1641b9238976Sth 
1642b9238976Sth 	/*
1643b9238976Sth 	 * We use the loop while unrolling the ephemeral tree.
1644b9238976Sth 	 */
1645b9238976Sth 	for (;;) {
1646b9238976Sth 		/*
1647b9238976Sth 		 * First we walk down the child.
1648b9238976Sth 		 */
1649b9238976Sth 		if (e->ne_child) {
1650b9238976Sth 			prior = e;
1651b9238976Sth 			e = e->ne_child;
1652b9238976Sth 			continue;
1653b9238976Sth 		}
1654b9238976Sth 
1655b9238976Sth 		/*
1656b9238976Sth 		 * If we are the root of the branch we are removing,
1657b9238976Sth 		 * we end it here. But if the branch is the root of
1658b9238976Sth 		 * the tree, we have to forge on. We do not consider
1659b9238976Sth 		 * the peer list for the root because while it may
1660b9238976Sth 		 * be okay to remove, it is both extra work and a
1661b9238976Sth 		 * potential for a false-positive error to stall the
1662b9238976Sth 		 * unmount attempt.
1663b9238976Sth 		 */
1664b9238976Sth 		if (e == eph && isTreeRoot == FALSE)
1665b9238976Sth 			return (0);
1666b9238976Sth 
1667b9238976Sth 		/*
1668b9238976Sth 		 * Next we walk down the peer list.
1669b9238976Sth 		 */
1670b9238976Sth 		if (e->ne_peer) {
1671b9238976Sth 			prior = e;
1672b9238976Sth 			e = e->ne_peer;
1673b9238976Sth 			continue;
1674b9238976Sth 		}
1675b9238976Sth 
1676b9238976Sth 		/*
1677b9238976Sth 		 * We can only remove the node passed in by the
1678b9238976Sth 		 * caller if it is the root of the ephemeral tree.
1679b9238976Sth 		 * Otherwise, the caller will remove it.
1680b9238976Sth 		 */
1681b9238976Sth 		if (e == eph && isTreeRoot == FALSE)
1682b9238976Sth 			return (0);
1683b9238976Sth 
1684b9238976Sth 		/*
1685b9238976Sth 		 * Okay, we have a leaf node, time
1686b9238976Sth 		 * to prune it!
1687b9238976Sth 		 *
1688b9238976Sth 		 * Note that prior can only be NULL if
1689b9238976Sth 		 * and only if it is the root of the
1690b9238976Sth 		 * ephemeral tree.
1691b9238976Sth 		 */
1692b9238976Sth 		prior = e->ne_prior;
1693b9238976Sth 
1694b9238976Sth 		mi = e->ne_mount;
1695b9238976Sth 		mutex_enter(&mi->mi_lock);
1696b9238976Sth 		vfsp = mi->mi_vfsp;
1697b9238976Sth 
1698b9238976Sth 		/*
1699b9238976Sth 		 * Cleared by umount2_engine.
1700b9238976Sth 		 */
1701b9238976Sth 		VFS_HOLD(vfsp);
1702b9238976Sth 
1703b9238976Sth 		/*
1704b9238976Sth 		 * Inform nfs4_unmount to not recursively
1705b9238976Sth 		 * descend into this node's children when it
1706b9238976Sth 		 * gets processed.
1707b9238976Sth 		 */
1708b9238976Sth 		mi->mi_flags |= MI4_EPHEMERAL_RECURSED;
1709b9238976Sth 		mutex_exit(&mi->mi_lock);
1710b9238976Sth 
1711b9238976Sth 		error = umount2_engine(vfsp, flag, cr, FALSE);
1712b9238976Sth 		if (error) {
1713b9238976Sth 			/*
1714b9238976Sth 			 * We need to reenable nfs4_unmount's ability
1715b9238976Sth 			 * to recursively descend on this node.
1716b9238976Sth 			 */
1717b9238976Sth 			mutex_enter(&mi->mi_lock);
1718b9238976Sth 			mi->mi_flags &= ~MI4_EPHEMERAL_RECURSED;
1719b9238976Sth 			mutex_exit(&mi->mi_lock);
1720b9238976Sth 
1721b9238976Sth 			return (error);
1722b9238976Sth 		}
1723b9238976Sth 
1724b9238976Sth 		/*
1725b9238976Sth 		 * If we are the current node, we do not want to
1726b9238976Sth 		 * touch anything else. At this point, the only
1727b9238976Sth 		 * way the current node can have survived to here
1728b9238976Sth 		 * is if it is the root of the ephemeral tree and
1729b9238976Sth 		 * we are unmounting the enclosing mntinfo4.
1730b9238976Sth 		 */
1731b9238976Sth 		if (e == eph) {
1732b9238976Sth 			ASSERT(prior == NULL);
1733b9238976Sth 			return (0);
1734b9238976Sth 		}
1735b9238976Sth 
1736b9238976Sth 		/*
1737b9238976Sth 		 * Stitch up the prior node. Note that since
1738b9238976Sth 		 * we have handled the root of the tree, prior
1739b9238976Sth 		 * must be non-NULL.
1740b9238976Sth 		 */
1741b9238976Sth 		ASSERT(prior != NULL);
1742b9238976Sth 		if (prior->ne_child == e) {
1743b9238976Sth 			prior->ne_child = NULL;
1744b9238976Sth 		} else {
1745b9238976Sth 			ASSERT(prior->ne_peer == e);
1746b9238976Sth 
1747b9238976Sth 			prior->ne_peer = NULL;
1748b9238976Sth 		}
1749b9238976Sth 
1750b9238976Sth 		e = prior;
1751b9238976Sth 	}
1752b9238976Sth 
1753b9238976Sth 	/* NOTREACHED */
1754b9238976Sth }
1755b9238976Sth 
1756b9238976Sth /*
1757b9238976Sth  * Common code to safely release net_cnt_lock and net_tree_lock
1758b9238976Sth  */
1759b9238976Sth void
1760b9238976Sth nfs4_ephemeral_umount_unlock(bool_t *pmust_unlock,
1761d708af74SThomas Haynes     bool_t *pmust_rele, nfs4_ephemeral_tree_t **pnet)
1762b9238976Sth {
1763b9238976Sth 	nfs4_ephemeral_tree_t	*net = *pnet;
1764b9238976Sth 
1765b9238976Sth 	if (*pmust_unlock) {
1766b9238976Sth 		mutex_enter(&net->net_cnt_lock);
1767b9238976Sth 		net->net_status &= ~NFS4_EPHEMERAL_TREE_UMOUNTING;
1768d708af74SThomas Haynes 		if (*pmust_rele)
1769d708af74SThomas Haynes 			nfs4_ephemeral_tree_decr(net);
1770b9238976Sth 		mutex_exit(&net->net_cnt_lock);
1771b9238976Sth 
1772b9238976Sth 		mutex_exit(&net->net_tree_lock);
1773b9238976Sth 
1774b9238976Sth 		*pmust_unlock = FALSE;
1775b9238976Sth 	}
1776b9238976Sth }
1777b9238976Sth 
1778b9238976Sth /*
1779b9238976Sth  * While we may have removed any child or sibling nodes of this
1780b9238976Sth  * ephemeral node, we can not nuke it until we know that there
1781b9238976Sth  * were no actived vnodes on it. This will do that final
1782b9238976Sth  * work once we know it is not busy.
1783b9238976Sth  */
1784b9238976Sth void
1785b9238976Sth nfs4_ephemeral_umount_activate(mntinfo4_t *mi, bool_t *pmust_unlock,
1786d708af74SThomas Haynes     bool_t *pmust_rele, nfs4_ephemeral_tree_t **pnet)
1787b9238976Sth {
1788b9238976Sth 	/*
1789b9238976Sth 	 * Now we need to get rid of the ephemeral data if it exists.
1790b9238976Sth 	 */
1791b9238976Sth 	mutex_enter(&mi->mi_lock);
1792b9238976Sth 	if (mi->mi_ephemeral) {
1793b9238976Sth 		/*
1794b9238976Sth 		 * If we are the root node of an ephemeral branch
1795b9238976Sth 		 * which is being removed, then we need to fixup
1796b9238976Sth 		 * pointers into and out of the node.
1797b9238976Sth 		 */
1798b9238976Sth 		if (!(mi->mi_flags & MI4_EPHEMERAL_RECURSED))
1799b9238976Sth 			nfs4_ephemeral_umount_cleanup(mi->mi_ephemeral);
1800b9238976Sth 
1801b9238976Sth 		ASSERT(mi->mi_ephemeral != NULL);
1802b9238976Sth 
1803b9238976Sth 		kmem_free(mi->mi_ephemeral, sizeof (*mi->mi_ephemeral));
1804b9238976Sth 		mi->mi_ephemeral = NULL;
1805b9238976Sth 	}
1806b9238976Sth 	mutex_exit(&mi->mi_lock);
1807b9238976Sth 
1808d708af74SThomas Haynes 	nfs4_ephemeral_umount_unlock(pmust_unlock, pmust_rele, pnet);
1809b9238976Sth }
1810b9238976Sth 
1811b9238976Sth /*
1812b9238976Sth  * Unmount an ephemeral node.
1813b9238976Sth  */
1814b9238976Sth int
1815b9238976Sth nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr,
1816d708af74SThomas Haynes     bool_t *pmust_unlock, bool_t *pmust_rele, nfs4_ephemeral_tree_t **pnet)
1817b9238976Sth {
1818b9238976Sth 	int			error = 0;
1819b9238976Sth 	nfs4_ephemeral_t	*eph;
1820b9238976Sth 	nfs4_ephemeral_tree_t	*net;
1821b9238976Sth 	int			is_derooting = FALSE;
1822b9238976Sth 	int			is_recursed = FALSE;
1823d3a14591SThomas Haynes 	int			was_locked = FALSE;
1824d3a14591SThomas Haynes 
1825d3a14591SThomas Haynes 	/*
1826d3a14591SThomas Haynes 	 * Make sure to set the default state for cleaning
1827d3a14591SThomas Haynes 	 * up the tree in the caller (and on the way out).
1828d3a14591SThomas Haynes 	 */
1829d708af74SThomas Haynes 	*pmust_unlock = *pmust_rele = FALSE;
1830b9238976Sth 
1831b9238976Sth 	/*
1832b9238976Sth 	 * The active vnodes on this file system may be ephemeral
1833b9238976Sth 	 * children. We need to check for and try to unmount them
1834b9238976Sth 	 * here. If any can not be unmounted, we are going
1835b9238976Sth 	 * to return EBUSY.
1836b9238976Sth 	 */
1837b9238976Sth 	mutex_enter(&mi->mi_lock);
1838b9238976Sth 
1839b9238976Sth 	/*
1840b9238976Sth 	 * If an ephemeral tree, we need to check to see if
1841b9238976Sth 	 * the lock is already held. If it is, then we need
1842b9238976Sth 	 * to see if we are being called as a result of
1843b9238976Sth 	 * the recursive removal of some node of the tree or
1844b9238976Sth 	 * if we are another attempt to remove the tree.
1845b9238976Sth 	 *
1846b9238976Sth 	 * mi_flags & MI4_EPHEMERAL indicates an ephemeral
1847b9238976Sth 	 * node. mi_ephemeral being non-NULL also does this.
1848b9238976Sth 	 *
1849b9238976Sth 	 * mi_ephemeral_tree being non-NULL is sufficient
1850b9238976Sth 	 * to also indicate either it is an ephemeral node
1851b9238976Sth 	 * or the enclosing mntinfo4.
1852b9238976Sth 	 *
1853b9238976Sth 	 * Do we need MI4_EPHEMERAL? Yes, it is useful for
1854b9238976Sth 	 * when we delete the ephemeral node and need to
1855b9238976Sth 	 * differentiate from an ephemeral node and the
1856b9238976Sth 	 * enclosing root node.
1857b9238976Sth 	 */
1858b9238976Sth 	*pnet = net = mi->mi_ephemeral_tree;
1859eabd0450Sth 	if (net == NULL) {
1860b9238976Sth 		mutex_exit(&mi->mi_lock);
1861eabd0450Sth 		return (0);
1862eabd0450Sth 	}
1863b9238976Sth 
1864eabd0450Sth 	eph = mi->mi_ephemeral;
1865eabd0450Sth 	is_recursed = mi->mi_flags & MI4_EPHEMERAL_RECURSED;
1866eabd0450Sth 	is_derooting = (eph == NULL);
1867b9238976Sth 
1868eabd0450Sth 	/*
1869eabd0450Sth 	 * If this is not recursion, then we need to
1870eabd0450Sth 	 * grab a ref count.
1871eabd0450Sth 	 *
1872eabd0450Sth 	 * But wait, we also do not want to do that
1873eabd0450Sth 	 * if a harvester thread has already grabbed
1874eabd0450Sth 	 * the lock.
1875eabd0450Sth 	 */
1876eabd0450Sth 	if (!is_recursed) {
1877eabd0450Sth 		mutex_enter(&net->net_cnt_lock);
1878eabd0450Sth 		if (net->net_status &
1879eabd0450Sth 		    NFS4_EPHEMERAL_TREE_LOCKED) {
1880b9238976Sth 			/*
1881d3a14591SThomas Haynes 			 * If the tree is locked, we need
1882d3a14591SThomas Haynes 			 * to decide whether we are the
1883d3a14591SThomas Haynes 			 * harvester or some explicit call
1884d3a14591SThomas Haynes 			 * for a umount. The only way that
1885d3a14591SThomas Haynes 			 * we are the harvester is if
1886d3a14591SThomas Haynes 			 * MS_SYSSPACE is set.
1887d3a14591SThomas Haynes 			 *
1888d3a14591SThomas Haynes 			 * We only let the harvester through
1889d3a14591SThomas Haynes 			 * at this point.
1890eabd0450Sth 			 *
1891eabd0450Sth 			 * We return EBUSY so that the
1892eabd0450Sth 			 * caller knows something is
1893eabd0450Sth 			 * going on. Note that by that
1894eabd0450Sth 			 * time, the umount in the other
1895eabd0450Sth 			 * thread may have already occured.
1896b9238976Sth 			 */
1897d3a14591SThomas Haynes 			if (!(flag & MS_SYSSPACE)) {
1898d3a14591SThomas Haynes 				mutex_exit(&net->net_cnt_lock);
1899d3a14591SThomas Haynes 				mutex_exit(&mi->mi_lock);
1900d3a14591SThomas Haynes 
1901d3a14591SThomas Haynes 				return (EBUSY);
1902d3a14591SThomas Haynes 			}
1903d3a14591SThomas Haynes 
1904d3a14591SThomas Haynes 			was_locked = TRUE;
1905d3a14591SThomas Haynes 		} else {
1906d708af74SThomas Haynes 			nfs4_ephemeral_tree_incr(net);
1907d708af74SThomas Haynes 			*pmust_rele = TRUE;
1908d3a14591SThomas Haynes 		}
1909d3a14591SThomas Haynes 
1910eabd0450Sth 		mutex_exit(&net->net_cnt_lock);
1911eabd0450Sth 	}
1912eabd0450Sth 	mutex_exit(&mi->mi_lock);
1913b9238976Sth 
1914eabd0450Sth 	/*
1915d3a14591SThomas Haynes 	 * If we are not the harvester, we need to check
1916d3a14591SThomas Haynes 	 * to see if we need to grab the tree lock.
1917eabd0450Sth 	 */
1918d3a14591SThomas Haynes 	if (was_locked == FALSE) {
1919d3a14591SThomas Haynes 		/*
1920d3a14591SThomas Haynes 		 * If we grab the lock, it means that no other
1921d3a14591SThomas Haynes 		 * operation is working on the tree. If we don't
1922d3a14591SThomas Haynes 		 * grab it, we need to decide if this is because
1923d3a14591SThomas Haynes 		 * we are a recursive call or a new operation.
1924d3a14591SThomas Haynes 		 */
1925d3a14591SThomas Haynes 		if (mutex_tryenter(&net->net_tree_lock)) {
1926d3a14591SThomas Haynes 			*pmust_unlock = TRUE;
1927d3a14591SThomas Haynes 		} else {
1928b9238976Sth 			/*
1929d3a14591SThomas Haynes 			 * If we are a recursive call, we can
1930d3a14591SThomas Haynes 			 * proceed without the lock.
1931d3a14591SThomas Haynes 			 * Otherwise we have to wait until
1932d3a14591SThomas Haynes 			 * the lock becomes free.
1933b9238976Sth 			 */
1934d3a14591SThomas Haynes 			if (!is_recursed) {
1935d3a14591SThomas Haynes 				mutex_enter(&net->net_cnt_lock);
1936d3a14591SThomas Haynes 				if (net->net_status &
1937d3a14591SThomas Haynes 				    (NFS4_EPHEMERAL_TREE_DEROOTING
1938d3a14591SThomas Haynes 				    | NFS4_EPHEMERAL_TREE_INVALID)) {
1939d3a14591SThomas Haynes 					nfs4_ephemeral_tree_decr(net);
1940d3a14591SThomas Haynes 					mutex_exit(&net->net_cnt_lock);
1941d708af74SThomas Haynes 					*pmust_rele = FALSE;
1942d3a14591SThomas Haynes 					goto is_busy;
1943d3a14591SThomas Haynes 				}
1944d3a14591SThomas Haynes 				mutex_exit(&net->net_cnt_lock);
1945b9238976Sth 
1946d3a14591SThomas Haynes 				/*
1947d3a14591SThomas Haynes 				 * We can't hold any other locks whilst
1948d3a14591SThomas Haynes 				 * we wait on this to free up.
1949d3a14591SThomas Haynes 				 */
1950d3a14591SThomas Haynes 				mutex_enter(&net->net_tree_lock);
1951b9238976Sth 
1952d3a14591SThomas Haynes 				/*
1953d3a14591SThomas Haynes 				 * Note that while mi->mi_ephemeral
1954d3a14591SThomas Haynes 				 * may change and thus we have to
1955d3a14591SThomas Haynes 				 * update eph, it is the case that
1956d3a14591SThomas Haynes 				 * we have tied down net and
1957d3a14591SThomas Haynes 				 * do not care if mi->mi_ephemeral_tree
1958d3a14591SThomas Haynes 				 * has changed.
1959d3a14591SThomas Haynes 				 */
1960d3a14591SThomas Haynes 				mutex_enter(&mi->mi_lock);
1961d3a14591SThomas Haynes 				eph = mi->mi_ephemeral;
1962d3a14591SThomas Haynes 				mutex_exit(&mi->mi_lock);
1963d3a14591SThomas Haynes 
1964d3a14591SThomas Haynes 				/*
1965d3a14591SThomas Haynes 				 * Okay, we need to see if either the
1966d3a14591SThomas Haynes 				 * tree got nuked or the current node
1967d3a14591SThomas Haynes 				 * got nuked. Both of which will cause
1968d3a14591SThomas Haynes 				 * an error.
1969d3a14591SThomas Haynes 				 *
1970d3a14591SThomas Haynes 				 * Note that a subsequent retry of the
1971d3a14591SThomas Haynes 				 * umount shall work.
1972d3a14591SThomas Haynes 				 */
1973d3a14591SThomas Haynes 				mutex_enter(&net->net_cnt_lock);
1974d3a14591SThomas Haynes 				if (net->net_status &
1975d3a14591SThomas Haynes 				    NFS4_EPHEMERAL_TREE_INVALID ||
1976d3a14591SThomas Haynes 				    (!is_derooting && eph == NULL)) {
1977d3a14591SThomas Haynes 					nfs4_ephemeral_tree_decr(net);
1978d3a14591SThomas Haynes 					mutex_exit(&net->net_cnt_lock);
1979d3a14591SThomas Haynes 					mutex_exit(&net->net_tree_lock);
1980d708af74SThomas Haynes 					*pmust_rele = FALSE;
1981d3a14591SThomas Haynes 					goto is_busy;
1982d3a14591SThomas Haynes 				}
1983eabd0450Sth 				mutex_exit(&net->net_cnt_lock);
1984d3a14591SThomas Haynes 				*pmust_unlock = TRUE;
1985eabd0450Sth 			}
1986eabd0450Sth 		}
1987eabd0450Sth 	}
1988eabd0450Sth 
1989eabd0450Sth 	/*
1990eabd0450Sth 	 * Only once we have grabbed the lock can we mark what we
1991eabd0450Sth 	 * are planning on doing to the ephemeral tree.
1992eabd0450Sth 	 */
1993eabd0450Sth 	if (*pmust_unlock) {
1994eabd0450Sth 		mutex_enter(&net->net_cnt_lock);
1995eabd0450Sth 		net->net_status |= NFS4_EPHEMERAL_TREE_UMOUNTING;
1996eabd0450Sth 
1997eabd0450Sth 		/*
1998eabd0450Sth 		 * Check to see if we are nuking the root.
1999eabd0450Sth 		 */
2000eabd0450Sth 		if (is_derooting)
2001eabd0450Sth 			net->net_status |=
2002eabd0450Sth 			    NFS4_EPHEMERAL_TREE_DEROOTING;
2003eabd0450Sth 		mutex_exit(&net->net_cnt_lock);
2004eabd0450Sth 	}
2005eabd0450Sth 
2006eabd0450Sth 	if (!is_derooting) {
2007eabd0450Sth 		/*
2008eabd0450Sth 		 * Only work on children if the caller has not already
2009eabd0450Sth 		 * done so.
2010eabd0450Sth 		 */
2011eabd0450Sth 		if (!is_recursed) {
2012eabd0450Sth 			ASSERT(eph != NULL);
2013eabd0450Sth 
2014eabd0450Sth 			error = nfs4_ephemeral_unmount_engine(eph,
2015eabd0450Sth 			    FALSE, flag, cr);
2016eabd0450Sth 			if (error)
2017eabd0450Sth 				goto is_busy;
2018eabd0450Sth 		}
2019eabd0450Sth 	} else {
2020eabd0450Sth 		eph = net->net_root;
2021eabd0450Sth 
2022eabd0450Sth 		/*
2023eabd0450Sth 		 * Only work if there is something there.
2024eabd0450Sth 		 */
2025eabd0450Sth 		if (eph) {
2026eabd0450Sth 			error = nfs4_ephemeral_unmount_engine(eph, TRUE,
2027eabd0450Sth 			    flag, cr);
2028eabd0450Sth 			if (error) {
2029eabd0450Sth 				mutex_enter(&net->net_cnt_lock);
2030eabd0450Sth 				net->net_status &=
2031eabd0450Sth 				    ~NFS4_EPHEMERAL_TREE_DEROOTING;
2032eabd0450Sth 				mutex_exit(&net->net_cnt_lock);
2033eabd0450Sth 				goto is_busy;
2034eabd0450Sth 			}
2035b9238976Sth 
2036b9238976Sth 			/*
2037eabd0450Sth 			 * Nothing else which goes wrong will
2038eabd0450Sth 			 * invalidate the blowing away of the
2039eabd0450Sth 			 * ephmeral tree.
2040b9238976Sth 			 */
2041eabd0450Sth 			net->net_root = NULL;
2042b9238976Sth 		}
2043eabd0450Sth 
2044eabd0450Sth 		/*
2045eabd0450Sth 		 * We have derooted and we have caused the tree to be
2046d3a14591SThomas Haynes 		 * invalidated.
2047eabd0450Sth 		 */
2048eabd0450Sth 		mutex_enter(&net->net_cnt_lock);
2049eabd0450Sth 		net->net_status &= ~NFS4_EPHEMERAL_TREE_DEROOTING;
2050eabd0450Sth 		net->net_status |= NFS4_EPHEMERAL_TREE_INVALID;
2051d708af74SThomas Haynes 		if (was_locked == FALSE)
2052d708af74SThomas Haynes 			nfs4_ephemeral_tree_decr(net);
2053eabd0450Sth 		mutex_exit(&net->net_cnt_lock);
2054eabd0450Sth 
2055d3a14591SThomas Haynes 		if (was_locked == FALSE)
2056d3a14591SThomas Haynes 			mutex_exit(&net->net_tree_lock);
2057d3a14591SThomas Haynes 
2058d3a14591SThomas Haynes 		/*
2059d3a14591SThomas Haynes 		 * We have just blown away any notation of this
2060d3a14591SThomas Haynes 		 * tree being locked. We can't let the caller
2061d3a14591SThomas Haynes 		 * try to clean things up.
2062d3a14591SThomas Haynes 		 */
2063d3a14591SThomas Haynes 		*pmust_unlock = FALSE;
2064d3a14591SThomas Haynes 
2065eabd0450Sth 		/*
2066d708af74SThomas Haynes 		 * At this point, the tree should no longer be
2067d708af74SThomas Haynes 		 * associated with the mntinfo4. We need to pull
2068d708af74SThomas Haynes 		 * it off there and let the harvester take
2069eabd0450Sth 		 * care of it once the refcnt drops.
2070eabd0450Sth 		 */
2071eabd0450Sth 		mutex_enter(&mi->mi_lock);
2072eabd0450Sth 		mi->mi_ephemeral_tree = NULL;
2073b9238976Sth 		mutex_exit(&mi->mi_lock);
2074b9238976Sth 	}
2075b9238976Sth 
2076b9238976Sth 	return (0);
2077b9238976Sth 
2078b9238976Sth is_busy:
2079b9238976Sth 
2080d708af74SThomas Haynes 	nfs4_ephemeral_umount_unlock(pmust_unlock, pmust_rele,
2081d708af74SThomas Haynes 	    pnet);
2082b9238976Sth 
2083b9238976Sth 	return (error);
2084b9238976Sth }
2085b9238976Sth 
2086b9238976Sth /*
2087b9238976Sth  * Do the umount and record any error in the parent.
2088b9238976Sth  */
2089b9238976Sth static void
2090b9238976Sth nfs4_ephemeral_record_umount(vfs_t *vfsp, int flag,
2091b9238976Sth     nfs4_ephemeral_t *e, nfs4_ephemeral_t *prior)
2092b9238976Sth {
2093b9238976Sth 	int	error;
2094b9238976Sth 
2095b9238976Sth 	error = umount2_engine(vfsp, flag, kcred, FALSE);
2096b9238976Sth 	if (error) {
2097b9238976Sth 		if (prior) {
2098b9238976Sth 			if (prior->ne_child == e)
2099b9238976Sth 				prior->ne_state |=
2100b9238976Sth 				    NFS4_EPHEMERAL_CHILD_ERROR;
2101b9238976Sth 			else
2102b9238976Sth 				prior->ne_state |=
2103b9238976Sth 				    NFS4_EPHEMERAL_PEER_ERROR;
2104b9238976Sth 		}
2105b9238976Sth 	}
2106b9238976Sth }
2107b9238976Sth 
2108b9238976Sth /*
2109b9238976Sth  * For each tree in the forest (where the forest is in
2110b9238976Sth  * effect all of the ephemeral trees for this zone),
2111b9238976Sth  * scan to see if a node can be unmounted. Note that
2112b9238976Sth  * unlike nfs4_ephemeral_unmount_engine(), we do
2113b9238976Sth  * not process the current node before children or
2114b9238976Sth  * siblings. I.e., if a node can be unmounted, we
2115b9238976Sth  * do not recursively check to see if the nodes
2116b9238976Sth  * hanging off of it can also be unmounted.
2117b9238976Sth  *
2118b9238976Sth  * Instead, we delve down deep to try and remove the
2119b9238976Sth  * children first. Then, because we share code with
2120b9238976Sth  * nfs4_ephemeral_unmount_engine(), we will try
2121b9238976Sth  * them again. This could be a performance issue in
2122b9238976Sth  * the future.
2123b9238976Sth  *
2124b9238976Sth  * Also note that unlike nfs4_ephemeral_unmount_engine(),
2125b9238976Sth  * we do not halt on an error. We will not remove the
2126b9238976Sth  * current node, but we will keep on trying to remove
2127b9238976Sth  * the others.
2128b9238976Sth  *
2129b9238976Sth  * force indicates that we want the unmount to occur
2130b9238976Sth  * even if there is something blocking it.
2131b9238976Sth  *
2132b9238976Sth  * time_check indicates that we want to see if the
2133b9238976Sth  * mount has expired past mount_to or not. Typically
2134b9238976Sth  * we want to do this and only on a shutdown of the
2135b9238976Sth  * zone would we want to ignore the check.
2136b9238976Sth  */
2137b9238976Sth static void
2138b9238976Sth nfs4_ephemeral_harvest_forest(nfs4_trigger_globals_t *ntg,
2139b9238976Sth     bool_t force, bool_t time_check)
2140b9238976Sth {
2141b9238976Sth 	nfs4_ephemeral_tree_t	*net;
2142b9238976Sth 	nfs4_ephemeral_tree_t	*prev = NULL;
2143b9238976Sth 	nfs4_ephemeral_tree_t	*next;
2144b9238976Sth 	nfs4_ephemeral_t	*e;
2145b9238976Sth 	nfs4_ephemeral_t	*prior;
2146b9238976Sth 	time_t			now = gethrestime_sec();
2147b9238976Sth 
2148b9238976Sth 	nfs4_ephemeral_tree_t	*harvest = NULL;
2149b9238976Sth 
2150b9238976Sth 	int			flag;
2151b9238976Sth 
2152b9238976Sth 	mntinfo4_t		*mi;
2153b9238976Sth 	vfs_t			*vfsp;
2154b9238976Sth 
2155b9238976Sth 	if (force)
2156d3a14591SThomas Haynes 		flag = MS_FORCE | MS_SYSSPACE;
2157b9238976Sth 	else
2158d3a14591SThomas Haynes 		flag = MS_SYSSPACE;
2159b9238976Sth 
2160b9238976Sth 	mutex_enter(&ntg->ntg_forest_lock);
2161b9238976Sth 	for (net = ntg->ntg_forest; net != NULL; net = next) {
2162b9238976Sth 		next = net->net_next;
2163b9238976Sth 
2164d3a14591SThomas Haynes 		nfs4_ephemeral_tree_hold(net);
2165b9238976Sth 
2166b9238976Sth 		mutex_enter(&net->net_tree_lock);
2167b9238976Sth 
2168b9238976Sth 		/*
2169b9238976Sth 		 * Let the unmount code know that the
2170b9238976Sth 		 * tree is already locked!
2171b9238976Sth 		 */
2172b9238976Sth 		mutex_enter(&net->net_cnt_lock);
2173b9238976Sth 		net->net_status |= NFS4_EPHEMERAL_TREE_LOCKED;
2174b9238976Sth 		mutex_exit(&net->net_cnt_lock);
2175b9238976Sth 
2176b9238976Sth 		/*
2177b9238976Sth 		 * If the intent is force all ephemeral nodes to
2178b9238976Sth 		 * be unmounted in this zone, we can short circuit a
2179b9238976Sth 		 * lot of tree traversal and simply zap the root node.
2180b9238976Sth 		 */
2181b9238976Sth 		if (force) {
2182b9238976Sth 			if (net->net_root) {
2183b9238976Sth 				mi = net->net_root->ne_mount;
2184b9238976Sth 				vfsp = mi->mi_vfsp;
2185b9238976Sth 
2186b9238976Sth 				/*
2187b9238976Sth 				 * Cleared by umount2_engine.
2188b9238976Sth 				 */
2189b9238976Sth 				VFS_HOLD(vfsp);
2190b9238976Sth 
2191b9238976Sth 				(void) umount2_engine(vfsp, flag,
2192b9238976Sth 				    kcred, FALSE);
2193b9238976Sth 
2194b9238976Sth 				goto check_done;
2195b9238976Sth 			}
2196b9238976Sth 		}
2197b9238976Sth 
2198b9238976Sth 		e = net->net_root;
2199b9238976Sth 		if (e)
2200b9238976Sth 			e->ne_state = NFS4_EPHEMERAL_VISIT_CHILD;
2201b9238976Sth 
2202b9238976Sth 		while (e) {
2203b9238976Sth 			if (e->ne_state == NFS4_EPHEMERAL_VISIT_CHILD) {
2204b9238976Sth 				e->ne_state = NFS4_EPHEMERAL_VISIT_SIBLING;
2205b9238976Sth 				if (e->ne_child) {
2206b9238976Sth 					e = e->ne_child;
2207b9238976Sth 					e->ne_state =
2208b9238976Sth 					    NFS4_EPHEMERAL_VISIT_CHILD;
2209b9238976Sth 				}
2210b9238976Sth 
2211b9238976Sth 				continue;
2212b9238976Sth 			} else if (e->ne_state ==
2213b9238976Sth 			    NFS4_EPHEMERAL_VISIT_SIBLING) {
2214b9238976Sth 				e->ne_state = NFS4_EPHEMERAL_PROCESS_ME;
2215b9238976Sth 				if (e->ne_peer) {
2216b9238976Sth 					e = e->ne_peer;
2217b9238976Sth 					e->ne_state =
2218b9238976Sth 					    NFS4_EPHEMERAL_VISIT_CHILD;
2219b9238976Sth 				}
2220b9238976Sth 
2221b9238976Sth 				continue;
2222b9238976Sth 			} else if (e->ne_state ==
2223b9238976Sth 			    NFS4_EPHEMERAL_CHILD_ERROR) {
2224b9238976Sth 				prior = e->ne_prior;
2225b9238976Sth 
2226b9238976Sth 				/*
2227b9238976Sth 				 * If a child reported an error, do
2228b9238976Sth 				 * not bother trying to unmount.
2229b9238976Sth 				 *
2230b9238976Sth 				 * If your prior node is a parent,
2231b9238976Sth 				 * pass the error up such that they
2232b9238976Sth 				 * also do not try to unmount.
2233b9238976Sth 				 *
2234b9238976Sth 				 * However, if your prior is a sibling,
2235b9238976Sth 				 * let them try to unmount if they can.
2236b9238976Sth 				 */
2237b9238976Sth 				if (prior) {
2238b9238976Sth 					if (prior->ne_child == e)
2239b9238976Sth 						prior->ne_state |=
2240b9238976Sth 						    NFS4_EPHEMERAL_CHILD_ERROR;
2241b9238976Sth 					else
2242b9238976Sth 						prior->ne_state |=
2243b9238976Sth 						    NFS4_EPHEMERAL_PEER_ERROR;
2244b9238976Sth 				}
2245b9238976Sth 
2246b9238976Sth 				/*
2247b9238976Sth 				 * Clear the error and if needed, process peers.
2248b9238976Sth 				 *
2249b9238976Sth 				 * Once we mask out the error, we know whether
2250b9238976Sth 				 * or we have to process another node.
2251b9238976Sth 				 */
2252b9238976Sth 				e->ne_state &= ~NFS4_EPHEMERAL_CHILD_ERROR;
2253b9238976Sth 				if (e->ne_state == NFS4_EPHEMERAL_PROCESS_ME)
2254b9238976Sth 					e = prior;
2255b9238976Sth 
2256b9238976Sth 				continue;
2257b9238976Sth 			} else if (e->ne_state ==
2258b9238976Sth 			    NFS4_EPHEMERAL_PEER_ERROR) {
2259b9238976Sth 				prior = e->ne_prior;
2260b9238976Sth 
2261b9238976Sth 				if (prior) {
2262b9238976Sth 					if (prior->ne_child == e)
2263b9238976Sth 						prior->ne_state =
2264b9238976Sth 						    NFS4_EPHEMERAL_CHILD_ERROR;
2265b9238976Sth 					else
2266b9238976Sth 						prior->ne_state =
2267b9238976Sth 						    NFS4_EPHEMERAL_PEER_ERROR;
2268b9238976Sth 				}
2269b9238976Sth 
2270b9238976Sth 				/*
2271b9238976Sth 				 * Clear the error from this node and do the
2272b9238976Sth 				 * correct processing.
2273b9238976Sth 				 */
2274b9238976Sth 				e->ne_state &= ~NFS4_EPHEMERAL_PEER_ERROR;
2275b9238976Sth 				continue;
2276b9238976Sth 			}
2277b9238976Sth 
2278b9238976Sth 			prior = e->ne_prior;
2279b9238976Sth 			e->ne_state = NFS4_EPHEMERAL_OK;
2280b9238976Sth 
2281b9238976Sth 			/*
2282b9238976Sth 			 * It must be the case that we need to process
2283b9238976Sth 			 * this node.
2284b9238976Sth 			 */
2285b9238976Sth 			if (!time_check ||
2286b9238976Sth 			    now - e->ne_ref_time > e->ne_mount_to) {
2287b9238976Sth 				mi = e->ne_mount;
2288b9238976Sth 				vfsp = mi->mi_vfsp;
2289b9238976Sth 
2290b9238976Sth 				/*
2291b9238976Sth 				 * Cleared by umount2_engine.
2292b9238976Sth 				 */
2293b9238976Sth 				VFS_HOLD(vfsp);
2294b9238976Sth 
2295b9238976Sth 				/*
2296b9238976Sth 				 * Note that we effectively work down to the
2297b9238976Sth 				 * leaf nodes first, try to unmount them,
2298b9238976Sth 				 * then work our way back up into the leaf
2299b9238976Sth 				 * nodes.
2300b9238976Sth 				 *
2301b9238976Sth 				 * Also note that we deal with a lot of
2302b9238976Sth 				 * complexity by sharing the work with
2303b9238976Sth 				 * the manual unmount code.
2304b9238976Sth 				 */
2305b9238976Sth 				nfs4_ephemeral_record_umount(vfsp, flag,
2306b9238976Sth 				    e, prior);
2307b9238976Sth 			}
2308b9238976Sth 
2309b9238976Sth 			e = prior;
2310b9238976Sth 		}
2311b9238976Sth 
2312b9238976Sth check_done:
2313b9238976Sth 
2314b9238976Sth 		/*
2315d3a14591SThomas Haynes 		 * At this point we are done processing this tree.
2316d3a14591SThomas Haynes 		 *
2317d3a14591SThomas Haynes 		 * If the tree is invalid and we are the only reference
2318d3a14591SThomas Haynes 		 * to it, then we push it on the local linked list
2319d3a14591SThomas Haynes 		 * to remove it at the end. We avoid that action now
2320d3a14591SThomas Haynes 		 * to keep the tree processing going along at a fair clip.
2321d3a14591SThomas Haynes 		 *
2322d3a14591SThomas Haynes 		 * Else, even if we are the only reference, we drop
2323d3a14591SThomas Haynes 		 * our hold on the current tree and allow it to be
2324d3a14591SThomas Haynes 		 * reused as needed.
2325b9238976Sth 		 */
2326b9238976Sth 		mutex_enter(&net->net_cnt_lock);
2327b9238976Sth 		if (net->net_refcnt == 1 &&
2328b9238976Sth 		    net->net_status & NFS4_EPHEMERAL_TREE_INVALID) {
2329d3a14591SThomas Haynes 			nfs4_ephemeral_tree_decr(net);
2330b9238976Sth 			net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED;
2331b9238976Sth 			mutex_exit(&net->net_cnt_lock);
2332b9238976Sth 			mutex_exit(&net->net_tree_lock);
2333b9238976Sth 
2334b9238976Sth 			if (prev)
2335b9238976Sth 				prev->net_next = net->net_next;
2336b9238976Sth 			else
2337b9238976Sth 				ntg->ntg_forest = net->net_next;
2338b9238976Sth 
2339b9238976Sth 			net->net_next = harvest;
2340b9238976Sth 			harvest = net;
2341b9238976Sth 			continue;
2342b9238976Sth 		}
2343b9238976Sth 
2344d3a14591SThomas Haynes 		nfs4_ephemeral_tree_decr(net);
2345b9238976Sth 		net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED;
2346b9238976Sth 		mutex_exit(&net->net_cnt_lock);
2347b9238976Sth 		mutex_exit(&net->net_tree_lock);
2348b9238976Sth 
2349b9238976Sth 		prev = net;
2350b9238976Sth 	}
2351b9238976Sth 	mutex_exit(&ntg->ntg_forest_lock);
2352b9238976Sth 
2353b9238976Sth 	for (net = harvest; net != NULL; net = next) {
2354b9238976Sth 		next = net->net_next;
2355b9238976Sth 
2356b9238976Sth 		mutex_destroy(&net->net_tree_lock);
2357b9238976Sth 		mutex_destroy(&net->net_cnt_lock);
2358b9238976Sth 		kmem_free(net, sizeof (*net));
2359b9238976Sth 	}
2360b9238976Sth }
2361b9238976Sth 
2362b9238976Sth /*
2363b9238976Sth  * This is the thread which decides when the harvesting
2364b9238976Sth  * can proceed and when to kill it off for this zone.
2365b9238976Sth  */
2366b9238976Sth static void
2367b9238976Sth nfs4_ephemeral_harvester(nfs4_trigger_globals_t *ntg)
2368b9238976Sth {
2369b9238976Sth 	clock_t		timeleft;
2370b9238976Sth 	zone_t		*zone = curproc->p_zone;
2371b9238976Sth 
2372b9238976Sth 	for (;;) {
2373b9238976Sth 		timeleft = zone_status_timedwait(zone, lbolt +
2374b9238976Sth 		    nfs4_trigger_thread_timer * hz, ZONE_IS_SHUTTING_DOWN);
2375b9238976Sth 
2376b9238976Sth 		/*
2377b9238976Sth 		 * zone is exiting...
2378b9238976Sth 		 */
2379b9238976Sth 		if (timeleft != -1) {
2380b9238976Sth 			ASSERT(zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN);
2381b9238976Sth 			zthread_exit();
2382b9238976Sth 			/* NOTREACHED */
2383b9238976Sth 		}
2384b9238976Sth 
2385b9238976Sth 		/*
2386b9238976Sth 		 * Only bother scanning if there is potential
2387b9238976Sth 		 * work to be done.
2388b9238976Sth 		 */
2389b9238976Sth 		if (ntg->ntg_forest == NULL)
2390b9238976Sth 			continue;
2391b9238976Sth 
2392b9238976Sth 		/*
2393b9238976Sth 		 * Now scan the list and get rid of everything which
2394b9238976Sth 		 * is old.
2395b9238976Sth 		 */
2396b9238976Sth 		nfs4_ephemeral_harvest_forest(ntg, FALSE, TRUE);
2397b9238976Sth 	}
2398b9238976Sth 
2399b9238976Sth 	/* NOTREACHED */
2400b9238976Sth }
2401b9238976Sth 
2402b9238976Sth /*
2403b9238976Sth  * The zone specific glue needed to start the unmount harvester.
2404b9238976Sth  *
2405b9238976Sth  * Note that we want to avoid holding the mutex as long as possible,
2406b9238976Sth  * hence the multiple checks.
2407b9238976Sth  *
2408b9238976Sth  * The caller should avoid us getting down here in the first
2409b9238976Sth  * place.
2410b9238976Sth  */
2411b9238976Sth static void
2412b9238976Sth nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *ntg)
2413b9238976Sth {
2414b9238976Sth 	/*
2415b9238976Sth 	 * It got started before we got here...
2416b9238976Sth 	 */
2417b9238976Sth 	if (ntg->ntg_thread_started)
2418b9238976Sth 		return;
2419b9238976Sth 
2420b9238976Sth 	mutex_enter(&nfs4_ephemeral_thread_lock);
2421b9238976Sth 
2422b9238976Sth 	if (ntg->ntg_thread_started) {
2423b9238976Sth 		mutex_exit(&nfs4_ephemeral_thread_lock);
2424b9238976Sth 		return;
2425b9238976Sth 	}
2426b9238976Sth 
2427b9238976Sth 	/*
2428b9238976Sth 	 * Start the unmounter harvester thread for this zone.
2429b9238976Sth 	 */
2430b9238976Sth 	(void) zthread_create(NULL, 0, nfs4_ephemeral_harvester,
2431b9238976Sth 	    ntg, 0, minclsyspri);
2432b9238976Sth 
2433b9238976Sth 	ntg->ntg_thread_started = TRUE;
2434b9238976Sth 	mutex_exit(&nfs4_ephemeral_thread_lock);
2435b9238976Sth }
2436b9238976Sth 
2437b9238976Sth /*ARGSUSED*/
2438b9238976Sth static void *
2439b9238976Sth nfs4_ephemeral_zsd_create(zoneid_t zoneid)
2440b9238976Sth {
2441b9238976Sth 	nfs4_trigger_globals_t	*ntg;
2442b9238976Sth 
2443b9238976Sth 	ntg = kmem_zalloc(sizeof (*ntg), KM_SLEEP);
2444b9238976Sth 	ntg->ntg_thread_started = FALSE;
2445b9238976Sth 
2446b9238976Sth 	/*
2447b9238976Sth 	 * This is the default....
2448b9238976Sth 	 */
2449b9238976Sth 	ntg->ntg_mount_to = nfs4_trigger_thread_timer;
2450b9238976Sth 
2451b9238976Sth 	mutex_init(&ntg->ntg_forest_lock, NULL,
2452b9238976Sth 	    MUTEX_DEFAULT, NULL);
2453b9238976Sth 
2454b9238976Sth 	return (ntg);
2455b9238976Sth }
2456b9238976Sth 
2457b9238976Sth /*
2458b9238976Sth  * Try a nice gentle walk down the forest and convince
2459b9238976Sth  * all of the trees to gracefully give it up.
2460b9238976Sth  */
2461b9238976Sth /*ARGSUSED*/
2462b9238976Sth static void
2463b9238976Sth nfs4_ephemeral_zsd_shutdown(zoneid_t zoneid, void *arg)
2464b9238976Sth {
2465b9238976Sth 	nfs4_trigger_globals_t	*ntg = arg;
2466b9238976Sth 
2467b9238976Sth 	if (!ntg)
2468b9238976Sth 		return;
2469b9238976Sth 
2470b9238976Sth 	nfs4_ephemeral_harvest_forest(ntg, FALSE, FALSE);
2471b9238976Sth }
2472b9238976Sth 
2473b9238976Sth /*
2474b9238976Sth  * Race along the forest and rip all of the trees out by
2475b9238976Sth  * their rootballs!
2476b9238976Sth  */
2477b9238976Sth /*ARGSUSED*/
2478b9238976Sth static void
2479b9238976Sth nfs4_ephemeral_zsd_destroy(zoneid_t zoneid, void *arg)
2480b9238976Sth {
2481b9238976Sth 	nfs4_trigger_globals_t	*ntg = arg;
2482b9238976Sth 
2483b9238976Sth 	if (!ntg)
2484b9238976Sth 		return;
2485b9238976Sth 
2486b9238976Sth 	nfs4_ephemeral_harvest_forest(ntg, TRUE, FALSE);
2487b9238976Sth 
2488b9238976Sth 	mutex_destroy(&ntg->ntg_forest_lock);
2489b9238976Sth 	kmem_free(ntg, sizeof (*ntg));
2490b9238976Sth }
2491b9238976Sth 
2492b9238976Sth /*
2493b9238976Sth  * This is the zone independent cleanup needed for
2494b9238976Sth  * emphemeral mount processing.
2495b9238976Sth  */
2496b9238976Sth void
2497b9238976Sth nfs4_ephemeral_fini(void)
2498b9238976Sth {
2499b9238976Sth 	(void) zone_key_delete(nfs4_ephemeral_key);
2500b9238976Sth 	mutex_destroy(&nfs4_ephemeral_thread_lock);
2501b9238976Sth }
2502b9238976Sth 
2503b9238976Sth /*
2504b9238976Sth  * This is the zone independent initialization needed for
2505b9238976Sth  * emphemeral mount processing.
2506b9238976Sth  */
2507b9238976Sth void
2508b9238976Sth nfs4_ephemeral_init(void)
2509b9238976Sth {
2510b9238976Sth 	mutex_init(&nfs4_ephemeral_thread_lock, NULL, MUTEX_DEFAULT,
2511b9238976Sth 	    NULL);
2512b9238976Sth 
2513b9238976Sth 	zone_key_create(&nfs4_ephemeral_key, nfs4_ephemeral_zsd_create,
2514b9238976Sth 	    nfs4_ephemeral_zsd_shutdown, nfs4_ephemeral_zsd_destroy);
2515b9238976Sth }
2516b9238976Sth 
2517b9238976Sth /*
2518b9238976Sth  * nfssys() calls this function to set the per-zone
2519b9238976Sth  * value of mount_to to drive when an ephemeral mount is
2520b9238976Sth  * timed out. Each mount will grab a copy of this value
2521b9238976Sth  * when mounted.
2522b9238976Sth  */
2523b9238976Sth void
2524b9238976Sth nfs4_ephemeral_set_mount_to(uint_t mount_to)
2525b9238976Sth {
2526b9238976Sth 	nfs4_trigger_globals_t	*ntg;
2527b9238976Sth 	zone_t			*zone = curproc->p_zone;
2528b9238976Sth 
2529b9238976Sth 	ntg = zone_getspecific(nfs4_ephemeral_key, zone);
2530b9238976Sth 
2531b9238976Sth 	ntg->ntg_mount_to = mount_to;
2532b9238976Sth }
2533b9238976Sth 
2534b9238976Sth /*
2535b9238976Sth  * Walk the list of v4 mount options; if they are currently set in vfsp,
2536b9238976Sth  * append them to a new comma-separated mount option string, and return it.
2537b9238976Sth  *
2538b9238976Sth  * Caller should free by calling nfs4_trigger_destroy_mntopts().
2539b9238976Sth  */
2540b9238976Sth static char *
2541b9238976Sth nfs4_trigger_create_mntopts(vfs_t *vfsp)
2542b9238976Sth {
2543b9238976Sth 	uint_t i;
2544b9238976Sth 	char *mntopts;
2545b9238976Sth 	struct vfssw *vswp;
2546b9238976Sth 	mntopts_t *optproto;
2547b9238976Sth 
2548b9238976Sth 	mntopts = kmem_zalloc(MAX_MNTOPT_STR, KM_SLEEP);
2549b9238976Sth 
2550b9238976Sth 	/* get the list of applicable mount options for v4; locks *vswp */
2551b9238976Sth 	vswp = vfs_getvfssw(MNTTYPE_NFS4);
2552b9238976Sth 	optproto = &vswp->vsw_optproto;
2553b9238976Sth 
2554b9238976Sth 	for (i = 0; i < optproto->mo_count; i++) {
2555b9238976Sth 		struct mntopt *mop = &optproto->mo_list[i];
2556b9238976Sth 
2557b9238976Sth 		if (mop->mo_flags & MO_EMPTY)
2558b9238976Sth 			continue;
2559b9238976Sth 
2560b9238976Sth 		if (nfs4_trigger_add_mntopt(mntopts, mop->mo_name, vfsp)) {
2561b9238976Sth 			kmem_free(mntopts, MAX_MNTOPT_STR);
2562b9238976Sth 			vfs_unrefvfssw(vswp);
2563b9238976Sth 			return (NULL);
2564b9238976Sth 		}
2565b9238976Sth 	}
2566b9238976Sth 
2567b9238976Sth 	vfs_unrefvfssw(vswp);
2568b9238976Sth 
2569b9238976Sth 	/*
2570b9238976Sth 	 * MNTOPT_XATTR is not in the v4 mount opt proto list,
2571b9238976Sth 	 * and it may only be passed via MS_OPTIONSTR, so we
2572b9238976Sth 	 * must handle it here.
2573b9238976Sth 	 *
2574b9238976Sth 	 * Ideally, it would be in the list, but NFS does not specify its
2575b9238976Sth 	 * own opt proto list, it uses instead the default one. Since
2576b9238976Sth 	 * not all filesystems support extended attrs, it would not be
2577b9238976Sth 	 * appropriate to add it there.
2578b9238976Sth 	 */
2579b9238976Sth 	if (nfs4_trigger_add_mntopt(mntopts, MNTOPT_XATTR, vfsp) ||
2580b9238976Sth 	    nfs4_trigger_add_mntopt(mntopts, MNTOPT_NOXATTR, vfsp)) {
2581b9238976Sth 		kmem_free(mntopts, MAX_MNTOPT_STR);
2582b9238976Sth 		return (NULL);
2583b9238976Sth 	}
2584b9238976Sth 
2585b9238976Sth 	return (mntopts);
2586b9238976Sth }
2587b9238976Sth 
2588b9238976Sth static void
2589b9238976Sth nfs4_trigger_destroy_mntopts(char *mntopts)
2590b9238976Sth {
2591b9238976Sth 	if (mntopts)
2592b9238976Sth 		kmem_free(mntopts, MAX_MNTOPT_STR);
2593b9238976Sth }
2594b9238976Sth 
2595b9238976Sth /*
2596b9238976Sth  * Check a single mount option (optname). Add to mntopts if it is set in VFS.
2597b9238976Sth  */
2598b9238976Sth static int
2599b9238976Sth nfs4_trigger_add_mntopt(char *mntopts, char *optname, vfs_t *vfsp)
2600b9238976Sth {
2601b9238976Sth 	if (mntopts == NULL || optname == NULL || vfsp == NULL)
2602b9238976Sth 		return (EINVAL);
2603b9238976Sth 
2604b9238976Sth 	if (vfs_optionisset(vfsp, optname, NULL)) {
2605b9238976Sth 		size_t mntoptslen = strlen(mntopts);
2606b9238976Sth 		size_t optnamelen = strlen(optname);
2607b9238976Sth 
2608b9238976Sth 		/* +1 for ',', +1 for NUL */
2609b9238976Sth 		if (mntoptslen + optnamelen + 2 > MAX_MNTOPT_STR)
2610b9238976Sth 			return (EOVERFLOW);
2611b9238976Sth 
2612b9238976Sth 		/* first or subsequent mount option? */
2613b9238976Sth 		if (*mntopts != '\0')
2614b9238976Sth 			(void) strcat(mntopts, ",");
2615b9238976Sth 
2616b9238976Sth 		(void) strcat(mntopts, optname);
2617b9238976Sth 	}
2618b9238976Sth 
2619b9238976Sth 	return (0);
2620b9238976Sth }
2621b9238976Sth 
2622b9238976Sth static enum clnt_stat
2623b9238976Sth nfs4_trigger_ping_server(servinfo4_t *svp, int nointr)
2624b9238976Sth {
2625b9238976Sth 	int retries, error;
2626b9238976Sth 	uint_t max_msgsize;
2627b9238976Sth 	enum clnt_stat status;
2628b9238976Sth 	CLIENT *cl;
2629b9238976Sth 	struct timeval timeout;
2630b9238976Sth 
2631b9238976Sth 	/* as per recov_newserver() */
2632b9238976Sth 	max_msgsize = 0;
2633b9238976Sth 	retries = 1;
2634b9238976Sth 	timeout.tv_sec = 2;
2635b9238976Sth 	timeout.tv_usec = 0;
2636b9238976Sth 
2637b9238976Sth 	error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, NFS_PROGRAM,
2638b9238976Sth 	    NFS_V4, max_msgsize, retries, CRED(), &cl);
2639b9238976Sth 	if (error)
2640b9238976Sth 		return (RPC_FAILED);
2641b9238976Sth 
2642b9238976Sth 	if (nointr)
2643b9238976Sth 		cl->cl_nosignal = TRUE;
2644b9238976Sth 	status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL, xdr_void, NULL,
2645b9238976Sth 	    timeout);
2646b9238976Sth 	if (nointr)
2647b9238976Sth 		cl->cl_nosignal = FALSE;
2648b9238976Sth 
2649b9238976Sth 	AUTH_DESTROY(cl->cl_auth);
2650b9238976Sth 	CLNT_DESTROY(cl);
2651b9238976Sth 
2652b9238976Sth 	return (status);
2653b9238976Sth }
2654