xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_stub_vnops.c (revision 2f172c55ef76964744bc62b4500ece87f3089b4d)
1b9238976Sth /*
2b9238976Sth  * CDDL HEADER START
3b9238976Sth  *
4b9238976Sth  * The contents of this file are subject to the terms of the
5b9238976Sth  * Common Development and Distribution License (the "License").
6b9238976Sth  * You may not use this file except in compliance with the License.
7b9238976Sth  *
8b9238976Sth  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9b9238976Sth  * or http://www.opensolaris.org/os/licensing.
10b9238976Sth  * See the License for the specific language governing permissions
11b9238976Sth  * and limitations under the License.
12b9238976Sth  *
13b9238976Sth  * When distributing Covered Code, include this CDDL HEADER in each
14b9238976Sth  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15b9238976Sth  * If applicable, add the following below this CDDL HEADER, with the
16b9238976Sth  * fields enclosed by brackets "[]" replaced with your own identifying
17b9238976Sth  * information: Portions Copyright [yyyy] [name of copyright owner]
18b9238976Sth  *
19b9238976Sth  * CDDL HEADER END
20b9238976Sth  */
21b9238976Sth 
22b9238976Sth /*
23546a3997SThomas Haynes  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24b9238976Sth  * Use is subject to license terms.
25b9238976Sth  */
26b9238976Sth 
27b9238976Sth /*
28b9238976Sth  * Support for ephemeral mounts, e.g. mirror-mounts. These mounts are
29b9238976Sth  * triggered from a "stub" rnode via a special set of vnodeops.
30b9238976Sth  */
31b9238976Sth 
32b9238976Sth #include <sys/param.h>
33b9238976Sth #include <sys/types.h>
34b9238976Sth #include <sys/systm.h>
35b9238976Sth #include <sys/cred.h>
36b9238976Sth #include <sys/time.h>
37b9238976Sth #include <sys/vnode.h>
38b9238976Sth #include <sys/vfs.h>
39b9238976Sth #include <sys/vfs_opreg.h>
40b9238976Sth #include <sys/file.h>
41b9238976Sth #include <sys/filio.h>
42b9238976Sth #include <sys/uio.h>
43b9238976Sth #include <sys/buf.h>
44b9238976Sth #include <sys/mman.h>
45b9238976Sth #include <sys/pathname.h>
46b9238976Sth #include <sys/dirent.h>
47b9238976Sth #include <sys/debug.h>
48b9238976Sth #include <sys/vmsystm.h>
49b9238976Sth #include <sys/fcntl.h>
50b9238976Sth #include <sys/flock.h>
51b9238976Sth #include <sys/swap.h>
52b9238976Sth #include <sys/errno.h>
53b9238976Sth #include <sys/strsubr.h>
54b9238976Sth #include <sys/sysmacros.h>
55b9238976Sth #include <sys/kmem.h>
56b9238976Sth #include <sys/mount.h>
57b9238976Sth #include <sys/cmn_err.h>
58b9238976Sth #include <sys/pathconf.h>
59b9238976Sth #include <sys/utsname.h>
60b9238976Sth #include <sys/dnlc.h>
61b9238976Sth #include <sys/acl.h>
62b9238976Sth #include <sys/systeminfo.h>
63b9238976Sth #include <sys/policy.h>
64b9238976Sth #include <sys/sdt.h>
65b9238976Sth #include <sys/list.h>
66b9238976Sth #include <sys/stat.h>
67b9238976Sth #include <sys/mntent.h>
68*2f172c55SRobert Thurlow #include <sys/priv.h>
69b9238976Sth 
70b9238976Sth #include <rpc/types.h>
71b9238976Sth #include <rpc/auth.h>
72b9238976Sth #include <rpc/clnt.h>
73b9238976Sth 
74b9238976Sth #include <nfs/nfs.h>
75b9238976Sth #include <nfs/nfs_clnt.h>
76b9238976Sth #include <nfs/nfs_acl.h>
77b9238976Sth #include <nfs/lm.h>
78b9238976Sth #include <nfs/nfs4.h>
79b9238976Sth #include <nfs/nfs4_kprot.h>
80b9238976Sth #include <nfs/rnode4.h>
81b9238976Sth #include <nfs/nfs4_clnt.h>
82*2f172c55SRobert Thurlow #include <nfs/nfsid_map.h>
83*2f172c55SRobert Thurlow #include <nfs/nfs4_idmap_impl.h>
84b9238976Sth 
85b9238976Sth #include <vm/hat.h>
86b9238976Sth #include <vm/as.h>
87b9238976Sth #include <vm/page.h>
88b9238976Sth #include <vm/pvn.h>
89b9238976Sth #include <vm/seg.h>
90b9238976Sth #include <vm/seg_map.h>
91b9238976Sth #include <vm/seg_kpm.h>
92b9238976Sth #include <vm/seg_vn.h>
93b9238976Sth 
94b9238976Sth #include <fs/fs_subr.h>
95b9238976Sth 
96b9238976Sth #include <sys/ddi.h>
97b9238976Sth #include <sys/int_fmtio.h>
98b9238976Sth 
99f39b8789Sth #include <sys/sunddi.h>
100b9238976Sth 
101546a3997SThomas Haynes #include <sys/priv_names.h>
102546a3997SThomas Haynes 
103*2f172c55SRobert Thurlow extern zone_key_t	nfs4clnt_zone_key;
104*2f172c55SRobert Thurlow extern zone_key_t	nfsidmap_zone_key;
105*2f172c55SRobert Thurlow 
106b9238976Sth /*
107b9238976Sth  * The automatic unmounter thread stuff!
108b9238976Sth  */
109b9238976Sth static int nfs4_trigger_thread_timer = 20;	/* in seconds */
110b9238976Sth 
111b9238976Sth /*
112b9238976Sth  * Just a default....
113b9238976Sth  */
114b9238976Sth static uint_t nfs4_trigger_mount_to = 240;
115b9238976Sth 
116b9238976Sth typedef struct nfs4_trigger_globals {
117b9238976Sth 	kmutex_t		ntg_forest_lock;
118b9238976Sth 	uint_t			ntg_mount_to;
119b9238976Sth 	int			ntg_thread_started;
120b9238976Sth 	nfs4_ephemeral_tree_t	*ntg_forest;
121b9238976Sth } nfs4_trigger_globals_t;
122b9238976Sth 
123b9238976Sth kmutex_t	nfs4_ephemeral_thread_lock;
124b9238976Sth 
125b9238976Sth zone_key_t	nfs4_ephemeral_key = ZONE_KEY_UNINITIALIZED;
126b9238976Sth 
127b9238976Sth static void	nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *);
128b9238976Sth 
129b9238976Sth /*
130b9238976Sth  * Used for ephemeral mounts; contains data either duplicated from
131b9238976Sth  * servinfo4_t, or hand-crafted, depending on type of ephemeral mount.
132b9238976Sth  *
133b9238976Sth  * It's intended that this structure is used solely for ephemeral
134b9238976Sth  * mount-type specific data, for passing this data to
135b9238976Sth  * nfs4_trigger_nargs_create().
136b9238976Sth  */
137b9238976Sth typedef struct ephemeral_servinfo {
138b9238976Sth 	char			*esi_hostname;
139b9238976Sth 	char			*esi_netname;
140b9238976Sth 	char			*esi_path;
141b9238976Sth 	int			esi_path_len;
142b9238976Sth 	int			esi_mount_flags;
143b9238976Sth 	struct netbuf		*esi_addr;
144b9238976Sth 	struct netbuf		*esi_syncaddr;
145b9238976Sth 	struct knetconfig	*esi_knconf;
146b9238976Sth } ephemeral_servinfo_t;
147b9238976Sth 
148b9238976Sth /*
149b9238976Sth  * Collect together the mount-type specific and generic data args.
150b9238976Sth  */
151b9238976Sth typedef struct domount_args {
152b9238976Sth 	ephemeral_servinfo_t	*dma_esi;
153b9238976Sth 	char			*dma_hostlist; /* comma-sep. for RO failover */
154b9238976Sth 	struct nfs_args		*dma_nargs;
155b9238976Sth } domount_args_t;
156b9238976Sth 
157b9238976Sth 
158b9238976Sth /*
159b9238976Sth  * The vnode ops functions for a trigger stub vnode
160b9238976Sth  */
161da6c28aaSamw static int nfs4_trigger_open(vnode_t **, int, cred_t *, caller_context_t *);
162da6c28aaSamw static int nfs4_trigger_getattr(vnode_t *, struct vattr *, int, cred_t *,
163da6c28aaSamw     caller_context_t *);
164da6c28aaSamw static int nfs4_trigger_setattr(vnode_t *, struct vattr *, int, cred_t *,
165da6c28aaSamw     caller_context_t *);
166da6c28aaSamw static int nfs4_trigger_access(vnode_t *, int, int, cred_t *,
167da6c28aaSamw     caller_context_t *);
168da6c28aaSamw static int nfs4_trigger_readlink(vnode_t *, struct uio *, cred_t *,
169da6c28aaSamw     caller_context_t *);
170da6c28aaSamw static int nfs4_trigger_lookup(vnode_t *, char *, vnode_t **,
171da6c28aaSamw     struct pathname *, int, vnode_t *, cred_t *, caller_context_t *,
172da6c28aaSamw     int *, pathname_t *);
173da6c28aaSamw static int nfs4_trigger_create(vnode_t *, char *, struct vattr *,
174da6c28aaSamw     enum vcexcl, int, vnode_t **, cred_t *, int, caller_context_t *,
175da6c28aaSamw     vsecattr_t *);
176da6c28aaSamw static int nfs4_trigger_remove(vnode_t *, char *, cred_t *, caller_context_t *,
177da6c28aaSamw     int);
178da6c28aaSamw static int nfs4_trigger_link(vnode_t *, vnode_t *, char *, cred_t *,
179da6c28aaSamw     caller_context_t *, int);
180da6c28aaSamw static int nfs4_trigger_rename(vnode_t *, char *, vnode_t *, char *,
181da6c28aaSamw     cred_t *, caller_context_t *, int);
182da6c28aaSamw static int nfs4_trigger_mkdir(vnode_t *, char *, struct vattr *,
183da6c28aaSamw     vnode_t **, cred_t *, caller_context_t *, int, vsecattr_t *vsecp);
184da6c28aaSamw static int nfs4_trigger_rmdir(vnode_t *, char *, vnode_t *, cred_t *,
185da6c28aaSamw     caller_context_t *, int);
186da6c28aaSamw static int nfs4_trigger_symlink(vnode_t *, char *, struct vattr *, char *,
187da6c28aaSamw     cred_t *, caller_context_t *, int);
188da6c28aaSamw static int nfs4_trigger_cmp(vnode_t *, vnode_t *, caller_context_t *);
189b9238976Sth 
190b9238976Sth /*
191b9238976Sth  * Regular NFSv4 vnodeops that we need to reference directly
192b9238976Sth  */
193da6c28aaSamw extern int	nfs4_getattr(vnode_t *, struct vattr *, int, cred_t *,
194da6c28aaSamw 		    caller_context_t *);
195da6c28aaSamw extern void	nfs4_inactive(vnode_t *, cred_t *, caller_context_t *);
196b9238976Sth extern int	nfs4_rwlock(vnode_t *, int, caller_context_t *);
197b9238976Sth extern void	nfs4_rwunlock(vnode_t *, int, caller_context_t *);
198b9238976Sth extern int	nfs4_lookup(vnode_t *, char *, vnode_t **,
199da6c28aaSamw 		    struct pathname *, int, vnode_t *, cred_t *,
200da6c28aaSamw 		    caller_context_t *, int *, pathname_t *);
201da6c28aaSamw extern int	nfs4_pathconf(vnode_t *, int, ulong_t *, cred_t *,
202da6c28aaSamw 		    caller_context_t *);
203da6c28aaSamw extern int	nfs4_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
204da6c28aaSamw 		    caller_context_t *);
205da6c28aaSamw extern int	nfs4_fid(vnode_t *, fid_t *, caller_context_t *);
206da6c28aaSamw extern int	nfs4_realvp(vnode_t *, vnode_t **, caller_context_t *);
207b9238976Sth 
208546a3997SThomas Haynes static int	nfs4_trigger_mount(vnode_t *, cred_t *, vnode_t **);
209b9238976Sth static int	nfs4_trigger_domount(vnode_t *, domount_args_t *, vfs_t **,
2106962f5b8SThomas Haynes     cred_t *, vnode_t **);
211*2f172c55SRobert Thurlow static domount_args_t  *nfs4_trigger_domount_args_create(vnode_t *, cred_t *);
212b9238976Sth static void	nfs4_trigger_domount_args_destroy(domount_args_t *dma,
213b9238976Sth     vnode_t *vp);
214*2f172c55SRobert Thurlow static ephemeral_servinfo_t *nfs4_trigger_esi_create(vnode_t *, servinfo4_t *,
215*2f172c55SRobert Thurlow     cred_t *);
216b9238976Sth static void	nfs4_trigger_esi_destroy(ephemeral_servinfo_t *, vnode_t *);
217b9238976Sth static ephemeral_servinfo_t *nfs4_trigger_esi_create_mirrormount(vnode_t *,
218b9238976Sth     servinfo4_t *);
219*2f172c55SRobert Thurlow static ephemeral_servinfo_t *nfs4_trigger_esi_create_referral(vnode_t *,
220*2f172c55SRobert Thurlow     cred_t *);
221b9238976Sth static struct nfs_args 	*nfs4_trigger_nargs_create(mntinfo4_t *, servinfo4_t *,
222b9238976Sth     ephemeral_servinfo_t *);
223b9238976Sth static void	nfs4_trigger_nargs_destroy(struct nfs_args *);
224b9238976Sth static char	*nfs4_trigger_create_mntopts(vfs_t *);
225b9238976Sth static void	nfs4_trigger_destroy_mntopts(char *);
226b9238976Sth static int 	nfs4_trigger_add_mntopt(char *, char *, vfs_t *);
227b9238976Sth static enum clnt_stat nfs4_trigger_ping_server(servinfo4_t *, int);
228*2f172c55SRobert Thurlow static enum clnt_stat nfs4_ping_server_common(struct knetconfig *,
229*2f172c55SRobert Thurlow     struct netbuf *, int);
230b9238976Sth 
231b9238976Sth extern int	umount2_engine(vfs_t *, int, cred_t *, int);
232b9238976Sth 
233b9238976Sth vnodeops_t *nfs4_trigger_vnodeops;
234b9238976Sth 
235b9238976Sth /*
236b9238976Sth  * These are the vnodeops that we must define for stub vnodes.
237b9238976Sth  *
238b9238976Sth  *
239b9238976Sth  * Many of the VOPs defined for NFSv4 do not need to be defined here,
240b9238976Sth  * for various reasons. This will result in the VFS default function being
241b9238976Sth  * used:
242b9238976Sth  *
243b9238976Sth  * - These VOPs require a previous VOP_OPEN to have occurred. That will have
244b9238976Sth  *   lost the reference to the stub vnode, meaning these should not be called:
245b9238976Sth  *       close, read, write, ioctl, readdir, seek.
246b9238976Sth  *
247b9238976Sth  * - These VOPs are meaningless for vnodes without data pages. Since the
248b9238976Sth  *   stub vnode is of type VDIR, these should not be called:
249b9238976Sth  *       space, getpage, putpage, map, addmap, delmap, pageio, fsync.
250b9238976Sth  *
251b9238976Sth  * - These VOPs are otherwise not applicable, and should not be called:
252b9238976Sth  *       dump, setsecattr.
253b9238976Sth  *
254b9238976Sth  *
255b9238976Sth  * These VOPs we do not want to define, but nor do we want the VFS default
256b9238976Sth  * action. Instead, we specify the VFS error function, with fs_error(), but
257b9238976Sth  * note that fs_error() is not actually called. Instead it results in the
258b9238976Sth  * use of the error function defined for the particular VOP, in vn_ops_table[]:
259b9238976Sth  *
260b9238976Sth  * -   frlock, dispose, shrlock.
261b9238976Sth  *
262b9238976Sth  *
263b9238976Sth  * These VOPs we define to use the corresponding regular NFSv4 vnodeop.
264b9238976Sth  * NOTE: if any of these ops involve an OTW call with the stub FH, then
265b9238976Sth  * that call must be wrapped with save_mnt_secinfo()/check_mnt_secinfo()
266b9238976Sth  * to protect the security data in the servinfo4_t for the "parent"
267b9238976Sth  * filesystem that contains the stub.
268b9238976Sth  *
269b9238976Sth  * - These VOPs should not trigger a mount, so that "ls -l" does not:
270b9238976Sth  *       pathconf, getsecattr.
271b9238976Sth  *
272b9238976Sth  * - These VOPs would not make sense to trigger:
273b9238976Sth  *       inactive, rwlock, rwunlock, fid, realvp.
274b9238976Sth  */
275b9238976Sth const fs_operation_def_t nfs4_trigger_vnodeops_template[] = {
276b9238976Sth 	VOPNAME_OPEN,		{ .vop_open = nfs4_trigger_open },
277b9238976Sth 	VOPNAME_GETATTR,	{ .vop_getattr = nfs4_trigger_getattr },
278b9238976Sth 	VOPNAME_SETATTR,	{ .vop_setattr = nfs4_trigger_setattr },
279b9238976Sth 	VOPNAME_ACCESS,		{ .vop_access = nfs4_trigger_access },
280b9238976Sth 	VOPNAME_LOOKUP,		{ .vop_lookup = nfs4_trigger_lookup },
281b9238976Sth 	VOPNAME_CREATE,		{ .vop_create = nfs4_trigger_create },
282b9238976Sth 	VOPNAME_REMOVE,		{ .vop_remove = nfs4_trigger_remove },
283b9238976Sth 	VOPNAME_LINK,		{ .vop_link = nfs4_trigger_link },
284b9238976Sth 	VOPNAME_RENAME,		{ .vop_rename = nfs4_trigger_rename },
285b9238976Sth 	VOPNAME_MKDIR,		{ .vop_mkdir = nfs4_trigger_mkdir },
286b9238976Sth 	VOPNAME_RMDIR,		{ .vop_rmdir = nfs4_trigger_rmdir },
287b9238976Sth 	VOPNAME_SYMLINK,	{ .vop_symlink = nfs4_trigger_symlink },
288b9238976Sth 	VOPNAME_READLINK,	{ .vop_readlink = nfs4_trigger_readlink },
289b9238976Sth 	VOPNAME_INACTIVE, 	{ .vop_inactive = nfs4_inactive },
290b9238976Sth 	VOPNAME_FID,		{ .vop_fid = nfs4_fid },
291b9238976Sth 	VOPNAME_RWLOCK,		{ .vop_rwlock = nfs4_rwlock },
292b9238976Sth 	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = nfs4_rwunlock },
293b9238976Sth 	VOPNAME_REALVP,		{ .vop_realvp = nfs4_realvp },
294b9238976Sth 	VOPNAME_GETSECATTR,	{ .vop_getsecattr = nfs4_getsecattr },
295b9238976Sth 	VOPNAME_PATHCONF,	{ .vop_pathconf = nfs4_pathconf },
296b9238976Sth 	VOPNAME_FRLOCK,		{ .error = fs_error },
297b9238976Sth 	VOPNAME_DISPOSE,	{ .error = fs_error },
298b9238976Sth 	VOPNAME_SHRLOCK,	{ .error = fs_error },
299b9238976Sth 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
300b9238976Sth 	NULL, NULL
301b9238976Sth };
302b9238976Sth 
303d3a14591SThomas Haynes static void
304d708af74SThomas Haynes nfs4_ephemeral_tree_incr(nfs4_ephemeral_tree_t *net)
305d3a14591SThomas Haynes {
306d708af74SThomas Haynes 	ASSERT(mutex_owned(&net->net_cnt_lock));
307d3a14591SThomas Haynes 	net->net_refcnt++;
308d3a14591SThomas Haynes 	ASSERT(net->net_refcnt != 0);
309d708af74SThomas Haynes }
310d708af74SThomas Haynes 
311d708af74SThomas Haynes static void
312d708af74SThomas Haynes nfs4_ephemeral_tree_hold(nfs4_ephemeral_tree_t *net)
313d708af74SThomas Haynes {
314d708af74SThomas Haynes 	mutex_enter(&net->net_cnt_lock);
315d708af74SThomas Haynes 	nfs4_ephemeral_tree_incr(net);
316d3a14591SThomas Haynes 	mutex_exit(&net->net_cnt_lock);
317d3a14591SThomas Haynes }
318d3a14591SThomas Haynes 
319d3a14591SThomas Haynes /*
320d3a14591SThomas Haynes  * We need a safe way to decrement the refcnt whilst the
321d3a14591SThomas Haynes  * lock is being held.
322d3a14591SThomas Haynes  */
323d3a14591SThomas Haynes static void
324d3a14591SThomas Haynes nfs4_ephemeral_tree_decr(nfs4_ephemeral_tree_t *net)
325d3a14591SThomas Haynes {
326d3a14591SThomas Haynes 	ASSERT(mutex_owned(&net->net_cnt_lock));
327d3a14591SThomas Haynes 	ASSERT(net->net_refcnt != 0);
328d3a14591SThomas Haynes 	net->net_refcnt--;
329d3a14591SThomas Haynes }
330d3a14591SThomas Haynes 
331d3a14591SThomas Haynes static void
332d3a14591SThomas Haynes nfs4_ephemeral_tree_rele(nfs4_ephemeral_tree_t *net)
333d3a14591SThomas Haynes {
334d3a14591SThomas Haynes 	mutex_enter(&net->net_cnt_lock);
335d3a14591SThomas Haynes 	nfs4_ephemeral_tree_decr(net);
336d3a14591SThomas Haynes 	mutex_exit(&net->net_cnt_lock);
337d3a14591SThomas Haynes }
338d3a14591SThomas Haynes 
339b9238976Sth /*
340b9238976Sth  * Trigger ops for stub vnodes; for mirror mounts, etc.
341b9238976Sth  *
342b9238976Sth  * The general idea is that a "triggering" op will first call
343b9238976Sth  * nfs4_trigger_mount(), which will find out whether a mount has already
344b9238976Sth  * been triggered.
345b9238976Sth  *
346b9238976Sth  * If it has, then nfs4_trigger_mount() sets newvp to the root vnode
347b9238976Sth  * of the covering vfs.
348b9238976Sth  *
349b9238976Sth  * If a mount has not yet been triggered, nfs4_trigger_mount() will do so,
350b9238976Sth  * and again set newvp, as above.
351b9238976Sth  *
352b9238976Sth  * The triggering op may then re-issue the VOP by calling it on newvp.
353b9238976Sth  *
354b9238976Sth  * Note that some ops may perform custom action, and may or may not need
355b9238976Sth  * to trigger a mount.
356b9238976Sth  *
357b9238976Sth  * Some ops need to call the regular NFSv4 vnodeop for a stub vnode. We
358b9238976Sth  * obviously can't do this with VOP_<whatever>, since it's a stub vnode
359b9238976Sth  * and that would just recurse. Instead, we call the v4 op directly,
360b9238976Sth  * by name.  This is OK, since we know that the vnode is for NFSv4,
361b9238976Sth  * otherwise it couldn't be a stub.
362b9238976Sth  *
363b9238976Sth  */
364b9238976Sth 
365b9238976Sth static int
366da6c28aaSamw nfs4_trigger_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
367b9238976Sth {
368b9238976Sth 	int error;
369b9238976Sth 	vnode_t *newvp;
370b9238976Sth 
371546a3997SThomas Haynes 	error = nfs4_trigger_mount(*vpp, cr, &newvp);
372b9238976Sth 	if (error)
373b9238976Sth 		return (error);
374b9238976Sth 
375b9238976Sth 	/* Release the stub vnode, as we're losing the reference to it */
376b9238976Sth 	VN_RELE(*vpp);
377b9238976Sth 
378b9238976Sth 	/* Give the caller the root vnode of the newly-mounted fs */
379b9238976Sth 	*vpp = newvp;
380b9238976Sth 
381b9238976Sth 	/* return with VN_HELD(newvp) */
382da6c28aaSamw 	return (VOP_OPEN(vpp, flag, cr, ct));
383b9238976Sth }
384b9238976Sth 
385*2f172c55SRobert Thurlow void
386*2f172c55SRobert Thurlow nfs4_fake_attrs(vnode_t *vp, struct vattr *vap)
387*2f172c55SRobert Thurlow {
388*2f172c55SRobert Thurlow 	uint_t mask;
389*2f172c55SRobert Thurlow 	timespec_t now;
390*2f172c55SRobert Thurlow 
391*2f172c55SRobert Thurlow 	/*
392*2f172c55SRobert Thurlow 	 * Set some attributes here for referrals.
393*2f172c55SRobert Thurlow 	 */
394*2f172c55SRobert Thurlow 	mask = vap->va_mask;
395*2f172c55SRobert Thurlow 	bzero(vap, sizeof (struct vattr));
396*2f172c55SRobert Thurlow 	vap->va_mask	= mask;
397*2f172c55SRobert Thurlow 	vap->va_uid	= 0;
398*2f172c55SRobert Thurlow 	vap->va_gid	= 0;
399*2f172c55SRobert Thurlow 	vap->va_nlink	= 1;
400*2f172c55SRobert Thurlow 	vap->va_size	= 1;
401*2f172c55SRobert Thurlow 	gethrestime(&now);
402*2f172c55SRobert Thurlow 	vap->va_atime	= now;
403*2f172c55SRobert Thurlow 	vap->va_mtime	= now;
404*2f172c55SRobert Thurlow 	vap->va_ctime	= now;
405*2f172c55SRobert Thurlow 	vap->va_type	= VDIR;
406*2f172c55SRobert Thurlow 	vap->va_mode	= 0555;
407*2f172c55SRobert Thurlow 	vap->va_fsid	= vp->v_vfsp->vfs_dev;
408*2f172c55SRobert Thurlow 	vap->va_rdev	= 0;
409*2f172c55SRobert Thurlow 	vap->va_blksize	= MAXBSIZE;
410*2f172c55SRobert Thurlow 	vap->va_nblocks	= 1;
411*2f172c55SRobert Thurlow 	vap->va_seq	= 0;
412*2f172c55SRobert Thurlow }
413*2f172c55SRobert Thurlow 
414b9238976Sth /*
415b9238976Sth  * For the majority of cases, nfs4_trigger_getattr() will not trigger
416b9238976Sth  * a mount. However, if ATTR_TRIGGER is set, we are being informed
417b9238976Sth  * that we need to force the mount before we attempt to determine
418b9238976Sth  * the attributes. The intent is an atomic operation for security
419b9238976Sth  * testing.
420*2f172c55SRobert Thurlow  *
421*2f172c55SRobert Thurlow  * If we're not triggering a mount, we can still inquire about the
422*2f172c55SRobert Thurlow  * actual attributes from the server in the mirror mount case,
423*2f172c55SRobert Thurlow  * and will return manufactured attributes for a referral (see
424*2f172c55SRobert Thurlow  * the 'create' branch of find_referral_stubvp()).
425b9238976Sth  */
426b9238976Sth static int
427da6c28aaSamw nfs4_trigger_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
428da6c28aaSamw     caller_context_t *ct)
429b9238976Sth {
430b9238976Sth 	int error;
431b9238976Sth 
432b9238976Sth 	if (flags & ATTR_TRIGGER) {
433b9238976Sth 		vnode_t	*newvp;
434b9238976Sth 
435546a3997SThomas Haynes 		error = nfs4_trigger_mount(vp, cr, &newvp);
436b9238976Sth 		if (error)
437b9238976Sth 			return (error);
438b9238976Sth 
439da6c28aaSamw 		error = VOP_GETATTR(newvp, vap, flags, cr, ct);
440b9238976Sth 		VN_RELE(newvp);
441*2f172c55SRobert Thurlow 
442*2f172c55SRobert Thurlow 	} else if (RP_ISSTUB_MIRRORMOUNT(VTOR4(vp))) {
443*2f172c55SRobert Thurlow 
444da6c28aaSamw 		error = nfs4_getattr(vp, vap, flags, cr, ct);
445*2f172c55SRobert Thurlow 
446*2f172c55SRobert Thurlow 	} else if (RP_ISSTUB_REFERRAL(VTOR4(vp))) {
447*2f172c55SRobert Thurlow 
448*2f172c55SRobert Thurlow 		nfs4_fake_attrs(vp, vap);
449*2f172c55SRobert Thurlow 		error = 0;
450b9238976Sth 	}
451b9238976Sth 
452b9238976Sth 	return (error);
453b9238976Sth }
454b9238976Sth 
455b9238976Sth static int
456b9238976Sth nfs4_trigger_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
457b9238976Sth 		caller_context_t *ct)
458b9238976Sth {
459b9238976Sth 	int error;
460b9238976Sth 	vnode_t *newvp;
461b9238976Sth 
462546a3997SThomas Haynes 	error = nfs4_trigger_mount(vp, cr, &newvp);
463b9238976Sth 	if (error)
464b9238976Sth 		return (error);
465b9238976Sth 
466b9238976Sth 	error = VOP_SETATTR(newvp, vap, flags, cr, ct);
467b9238976Sth 	VN_RELE(newvp);
468b9238976Sth 
469b9238976Sth 	return (error);
470b9238976Sth }
471b9238976Sth 
472b9238976Sth static int
473da6c28aaSamw nfs4_trigger_access(vnode_t *vp, int mode, int flags, cred_t *cr,
474da6c28aaSamw     caller_context_t *ct)
475b9238976Sth {
476b9238976Sth 	int error;
477b9238976Sth 	vnode_t *newvp;
478b9238976Sth 
479546a3997SThomas Haynes 	error = nfs4_trigger_mount(vp, cr, &newvp);
480b9238976Sth 	if (error)
481b9238976Sth 		return (error);
482b9238976Sth 
483da6c28aaSamw 	error = VOP_ACCESS(newvp, mode, flags, cr, ct);
484b9238976Sth 	VN_RELE(newvp);
485b9238976Sth 
486b9238976Sth 	return (error);
487b9238976Sth }
488b9238976Sth 
489b9238976Sth static int
490da6c28aaSamw nfs4_trigger_lookup(vnode_t *dvp, char *nm, vnode_t **vpp,
491da6c28aaSamw     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr,
492da6c28aaSamw     caller_context_t *ct, int *deflags, pathname_t *rpnp)
493b9238976Sth {
494b9238976Sth 	int error;
495b9238976Sth 	vnode_t *newdvp;
496b9238976Sth 	rnode4_t *drp = VTOR4(dvp);
497b9238976Sth 
498b9238976Sth 	ASSERT(RP_ISSTUB(drp));
499b9238976Sth 
500b9238976Sth 	/*
501b9238976Sth 	 * It's not legal to lookup ".." for an fs root, so we mustn't pass
502b9238976Sth 	 * that up. Instead, pass onto the regular op, regardless of whether
503b9238976Sth 	 * we've triggered a mount.
504b9238976Sth 	 */
505b9238976Sth 	if (strcmp(nm, "..") == 0)
506*2f172c55SRobert Thurlow 		if (RP_ISSTUB_MIRRORMOUNT(drp)) {
507*2f172c55SRobert Thurlow 			return (nfs4_lookup(dvp, nm, vpp, pnp, flags, rdir, cr,
508*2f172c55SRobert Thurlow 			    ct, deflags, rpnp));
509*2f172c55SRobert Thurlow 		} else if (RP_ISSTUB_REFERRAL(drp)) {
510*2f172c55SRobert Thurlow 			/* Return the parent vnode */
511*2f172c55SRobert Thurlow 			return (vtodv(dvp, vpp, cr, TRUE));
512*2f172c55SRobert Thurlow 		}
513b9238976Sth 
514546a3997SThomas Haynes 	error = nfs4_trigger_mount(dvp, cr, &newdvp);
515b9238976Sth 	if (error)
516b9238976Sth 		return (error);
517b9238976Sth 
518da6c28aaSamw 	error = VOP_LOOKUP(newdvp, nm, vpp, pnp, flags, rdir, cr, ct,
519da6c28aaSamw 	    deflags, rpnp);
520b9238976Sth 	VN_RELE(newdvp);
521b9238976Sth 
522b9238976Sth 	return (error);
523b9238976Sth }
524b9238976Sth 
525b9238976Sth static int
526b9238976Sth nfs4_trigger_create(vnode_t *dvp, char *nm, struct vattr *va,
527da6c28aaSamw     enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr,
528da6c28aaSamw     int flags, caller_context_t *ct, vsecattr_t *vsecp)
529b9238976Sth {
530b9238976Sth 	int error;
531b9238976Sth 	vnode_t *newdvp;
532b9238976Sth 
533546a3997SThomas Haynes 	error = nfs4_trigger_mount(dvp, cr, &newdvp);
534b9238976Sth 	if (error)
535b9238976Sth 		return (error);
536b9238976Sth 
537da6c28aaSamw 	error = VOP_CREATE(newdvp, nm, va, exclusive, mode, vpp, cr,
538da6c28aaSamw 	    flags, ct, vsecp);
539b9238976Sth 	VN_RELE(newdvp);
540b9238976Sth 
541b9238976Sth 	return (error);
542b9238976Sth }
543b9238976Sth 
544b9238976Sth static int
545da6c28aaSamw nfs4_trigger_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
546da6c28aaSamw     int flags)
547b9238976Sth {
548b9238976Sth 	int error;
549b9238976Sth 	vnode_t *newdvp;
550b9238976Sth 
551546a3997SThomas Haynes 	error = nfs4_trigger_mount(dvp, cr, &newdvp);
552b9238976Sth 	if (error)
553b9238976Sth 		return (error);
554b9238976Sth 
555da6c28aaSamw 	error = VOP_REMOVE(newdvp, nm, cr, ct, flags);
556b9238976Sth 	VN_RELE(newdvp);
557b9238976Sth 
558b9238976Sth 	return (error);
559b9238976Sth }
560b9238976Sth 
561b9238976Sth static int
562da6c28aaSamw nfs4_trigger_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr,
563da6c28aaSamw     caller_context_t *ct, int flags)
564b9238976Sth {
565b9238976Sth 	int error;
566b9238976Sth 	vnode_t *newtdvp;
567b9238976Sth 
568546a3997SThomas Haynes 	error = nfs4_trigger_mount(tdvp, cr, &newtdvp);
569b9238976Sth 	if (error)
570b9238976Sth 		return (error);
571b9238976Sth 
572b9238976Sth 	/*
573b9238976Sth 	 * We don't check whether svp is a stub. Let the NFSv4 code
574b9238976Sth 	 * detect that error, and return accordingly.
575b9238976Sth 	 */
576da6c28aaSamw 	error = VOP_LINK(newtdvp, svp, tnm, cr, ct, flags);
577b9238976Sth 	VN_RELE(newtdvp);
578b9238976Sth 
579b9238976Sth 	return (error);
580b9238976Sth }
581b9238976Sth 
582b9238976Sth static int
583b9238976Sth nfs4_trigger_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
584da6c28aaSamw     cred_t *cr, caller_context_t *ct, int flags)
585b9238976Sth {
586b9238976Sth 	int error;
587b9238976Sth 	vnode_t *newsdvp;
588b9238976Sth 	rnode4_t *tdrp = VTOR4(tdvp);
589b9238976Sth 
590b9238976Sth 	/*
591b9238976Sth 	 * We know that sdvp is a stub, otherwise we would not be here.
592b9238976Sth 	 *
593b9238976Sth 	 * If tdvp is also be a stub, there are two possibilities: it
594b9238976Sth 	 * is either the same stub as sdvp [i.e. VN_CMP(sdvp, tdvp)]
595b9238976Sth 	 * or it is a different stub [!VN_CMP(sdvp, tdvp)].
596b9238976Sth 	 *
597b9238976Sth 	 * In the former case, just trigger sdvp, and treat tdvp as
598b9238976Sth 	 * though it were not a stub.
599b9238976Sth 	 *
600b9238976Sth 	 * In the latter case, it might be a different stub for the
601b9238976Sth 	 * same server fs as sdvp, or for a different server fs.
602b9238976Sth 	 * Regardless, from the client perspective this would still
603b9238976Sth 	 * be a cross-filesystem rename, and should not be allowed,
604b9238976Sth 	 * so return EXDEV, without triggering either mount.
605b9238976Sth 	 */
606b9238976Sth 	if (RP_ISSTUB(tdrp) && !VN_CMP(sdvp, tdvp))
607b9238976Sth 		return (EXDEV);
608b9238976Sth 
609546a3997SThomas Haynes 	error = nfs4_trigger_mount(sdvp, cr, &newsdvp);
610b9238976Sth 	if (error)
611b9238976Sth 		return (error);
612b9238976Sth 
613da6c28aaSamw 	error = VOP_RENAME(newsdvp, snm, tdvp, tnm, cr, ct, flags);
614b9238976Sth 
615b9238976Sth 	VN_RELE(newsdvp);
616b9238976Sth 
617b9238976Sth 	return (error);
618b9238976Sth }
619b9238976Sth 
620da6c28aaSamw /* ARGSUSED */
621b9238976Sth static int
622b9238976Sth nfs4_trigger_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp,
623da6c28aaSamw     cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
624b9238976Sth {
625b9238976Sth 	int error;
626b9238976Sth 	vnode_t *newdvp;
627b9238976Sth 
628546a3997SThomas Haynes 	error = nfs4_trigger_mount(dvp, cr, &newdvp);
629b9238976Sth 	if (error)
630b9238976Sth 		return (error);
631b9238976Sth 
632da6c28aaSamw 	error = VOP_MKDIR(newdvp, nm, va, vpp, cr, ct, flags, vsecp);
633b9238976Sth 	VN_RELE(newdvp);
634b9238976Sth 
635b9238976Sth 	return (error);
636b9238976Sth }
637b9238976Sth 
638b9238976Sth static int
639da6c28aaSamw nfs4_trigger_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
640da6c28aaSamw     caller_context_t *ct, int flags)
641b9238976Sth {
642b9238976Sth 	int error;
643b9238976Sth 	vnode_t *newdvp;
644b9238976Sth 
645546a3997SThomas Haynes 	error = nfs4_trigger_mount(dvp, cr, &newdvp);
646b9238976Sth 	if (error)
647b9238976Sth 		return (error);
648b9238976Sth 
649da6c28aaSamw 	error = VOP_RMDIR(newdvp, nm, cdir, cr, ct, flags);
650b9238976Sth 	VN_RELE(newdvp);
651b9238976Sth 
652b9238976Sth 	return (error);
653b9238976Sth }
654b9238976Sth 
655b9238976Sth static int
656b9238976Sth nfs4_trigger_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm,
657da6c28aaSamw     cred_t *cr, caller_context_t *ct, int flags)
658b9238976Sth {
659b9238976Sth 	int error;
660b9238976Sth 	vnode_t *newdvp;
661b9238976Sth 
662546a3997SThomas Haynes 	error = nfs4_trigger_mount(dvp, cr, &newdvp);
663b9238976Sth 	if (error)
664b9238976Sth 		return (error);
665b9238976Sth 
666da6c28aaSamw 	error = VOP_SYMLINK(newdvp, lnm, tva, tnm, cr, ct, flags);
667b9238976Sth 	VN_RELE(newdvp);
668b9238976Sth 
669b9238976Sth 	return (error);
670b9238976Sth }
671b9238976Sth 
672b9238976Sth static int
673da6c28aaSamw nfs4_trigger_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr,
674da6c28aaSamw     caller_context_t *ct)
675b9238976Sth {
676b9238976Sth 	int error;
677b9238976Sth 	vnode_t *newvp;
678b9238976Sth 
679546a3997SThomas Haynes 	error = nfs4_trigger_mount(vp, cr, &newvp);
680b9238976Sth 	if (error)
681b9238976Sth 		return (error);
682b9238976Sth 
683da6c28aaSamw 	error = VOP_READLINK(newvp, uiop, cr, ct);
684b9238976Sth 	VN_RELE(newvp);
685b9238976Sth 
686b9238976Sth 	return (error);
687b9238976Sth }
688b9238976Sth 
689b9238976Sth /* end of trigger vnode ops */
690b9238976Sth 
6916962f5b8SThomas Haynes /*
6926962f5b8SThomas Haynes  * See if the mount has already been done by another caller.
6936962f5b8SThomas Haynes  */
6946962f5b8SThomas Haynes static int
6956962f5b8SThomas Haynes nfs4_trigger_mounted_already(vnode_t *vp, vnode_t **newvpp,
6966962f5b8SThomas Haynes     bool_t *was_mounted, vfs_t **vfsp)
6976962f5b8SThomas Haynes {
6986962f5b8SThomas Haynes 	int		error;
6996962f5b8SThomas Haynes 	mntinfo4_t	*mi = VTOMI4(vp);
7006962f5b8SThomas Haynes 
7016962f5b8SThomas Haynes 	*was_mounted = FALSE;
7026962f5b8SThomas Haynes 
7036962f5b8SThomas Haynes 	error = vn_vfsrlock_wait(vp);
7046962f5b8SThomas Haynes 	if (error)
7056962f5b8SThomas Haynes 		return (error);
7066962f5b8SThomas Haynes 
7076962f5b8SThomas Haynes 	*vfsp = vn_mountedvfs(vp);
7086962f5b8SThomas Haynes 	if (*vfsp != NULL) {
7096962f5b8SThomas Haynes 		/* the mount has already occurred */
7106962f5b8SThomas Haynes 		error = VFS_ROOT(*vfsp, newvpp);
7116962f5b8SThomas Haynes 		if (!error) {
7126962f5b8SThomas Haynes 			/* need to update the reference time  */
7136962f5b8SThomas Haynes 			mutex_enter(&mi->mi_lock);
7146962f5b8SThomas Haynes 			if (mi->mi_ephemeral)
7156962f5b8SThomas Haynes 				mi->mi_ephemeral->ne_ref_time =
7166962f5b8SThomas Haynes 				    gethrestime_sec();
7176962f5b8SThomas Haynes 			mutex_exit(&mi->mi_lock);
7186962f5b8SThomas Haynes 
7196962f5b8SThomas Haynes 			*was_mounted = TRUE;
7206962f5b8SThomas Haynes 		}
7216962f5b8SThomas Haynes 	}
7226962f5b8SThomas Haynes 
7236962f5b8SThomas Haynes 	vn_vfsunlock(vp);
7246962f5b8SThomas Haynes 	return (0);
7256962f5b8SThomas Haynes }
7266962f5b8SThomas Haynes 
727b9238976Sth /*
728*2f172c55SRobert Thurlow  * Mount upon a trigger vnode; for mirror-mounts, referrals, etc.
729b9238976Sth  *
730b9238976Sth  * The mount may have already occurred, via another thread. If not,
731b9238976Sth  * assemble the location information - which may require fetching - and
732b9238976Sth  * perform the mount.
733b9238976Sth  *
734b9238976Sth  * Sets newvp to be the root of the fs that is now covering vp. Note
735b9238976Sth  * that we return with VN_HELD(*newvp).
736b9238976Sth  *
737b9238976Sth  * The caller is responsible for passing the VOP onto the covering fs.
738b9238976Sth  */
739b9238976Sth static int
740546a3997SThomas Haynes nfs4_trigger_mount(vnode_t *vp, cred_t *cr, vnode_t **newvpp)
741b9238976Sth {
742b9238976Sth 	int			 error;
743b9238976Sth 	vfs_t			*vfsp;
744b9238976Sth 	rnode4_t		*rp = VTOR4(vp);
745b9238976Sth 	mntinfo4_t		*mi = VTOMI4(vp);
746b9238976Sth 	domount_args_t		*dma;
747b9238976Sth 
748b9238976Sth 	nfs4_ephemeral_tree_t	*net;
749b9238976Sth 
750b9238976Sth 	bool_t			must_unlock = FALSE;
751b9238976Sth 	bool_t			is_building = FALSE;
7526962f5b8SThomas Haynes 	bool_t			was_mounted = FALSE;
753b9238976Sth 
754546a3997SThomas Haynes 	cred_t			*mcred = NULL;
755b9238976Sth 
756b9238976Sth 	nfs4_trigger_globals_t	*ntg;
757b9238976Sth 
758b9238976Sth 	zone_t			*zone = curproc->p_zone;
759b9238976Sth 
760b9238976Sth 	ASSERT(RP_ISSTUB(rp));
761b9238976Sth 
762b9238976Sth 	*newvpp = NULL;
763b9238976Sth 
764b9238976Sth 	/*
765b9238976Sth 	 * Has the mount already occurred?
766b9238976Sth 	 */
7676962f5b8SThomas Haynes 	error = nfs4_trigger_mounted_already(vp, newvpp,
7686962f5b8SThomas Haynes 	    &was_mounted, &vfsp);
7696962f5b8SThomas Haynes 	if (error || was_mounted)
770b9238976Sth 		goto done;
771b9238976Sth 
772b9238976Sth 	ntg = zone_getspecific(nfs4_ephemeral_key, zone);
773b9238976Sth 	ASSERT(ntg != NULL);
774b9238976Sth 
775b9238976Sth 	mutex_enter(&mi->mi_lock);
776b9238976Sth 
777b9238976Sth 	/*
778b9238976Sth 	 * We need to lock down the ephemeral tree.
779b9238976Sth 	 */
780b9238976Sth 	if (mi->mi_ephemeral_tree == NULL) {
781b9238976Sth 		net = kmem_zalloc(sizeof (*net), KM_SLEEP);
782b9238976Sth 		mutex_init(&net->net_tree_lock, NULL, MUTEX_DEFAULT, NULL);
783b9238976Sth 		mutex_init(&net->net_cnt_lock, NULL, MUTEX_DEFAULT, NULL);
784b9238976Sth 		net->net_refcnt = 1;
785b9238976Sth 		net->net_status = NFS4_EPHEMERAL_TREE_BUILDING;
786b9238976Sth 		is_building = TRUE;
787b9238976Sth 
788b9238976Sth 		/*
789b9238976Sth 		 * We need to add it to the zone specific list for
790b9238976Sth 		 * automatic unmounting and harvesting of deadwood.
791b9238976Sth 		 */
792b9238976Sth 		mutex_enter(&ntg->ntg_forest_lock);
793b9238976Sth 		if (ntg->ntg_forest != NULL)
794b9238976Sth 			net->net_next = ntg->ntg_forest;
795b9238976Sth 		ntg->ntg_forest = net;
796b9238976Sth 		mutex_exit(&ntg->ntg_forest_lock);
797b9238976Sth 
798b9238976Sth 		/*
799b9238976Sth 		 * No lock order confusion with mi_lock because no
800b9238976Sth 		 * other node could have grabbed net_tree_lock.
801b9238976Sth 		 */
802b9238976Sth 		mutex_enter(&net->net_tree_lock);
803b9238976Sth 		mi->mi_ephemeral_tree = net;
804b9238976Sth 		net->net_mount = mi;
805b9238976Sth 		mutex_exit(&mi->mi_lock);
806b9238976Sth 	} else {
807b9238976Sth 		net = mi->mi_ephemeral_tree;
808d3a14591SThomas Haynes 		nfs4_ephemeral_tree_hold(net);
809d3a14591SThomas Haynes 
810d708af74SThomas Haynes 		mutex_exit(&mi->mi_lock);
811d708af74SThomas Haynes 
812d3a14591SThomas Haynes 		mutex_enter(&net->net_tree_lock);
813b9238976Sth 
814b9238976Sth 		/*
815d3a14591SThomas Haynes 		 * We can only procede if the tree is neither locked
816d3a14591SThomas Haynes 		 * nor being torn down.
817b9238976Sth 		 */
818d3a14591SThomas Haynes 		mutex_enter(&net->net_cnt_lock);
819d3a14591SThomas Haynes 		if (net->net_status & NFS4_EPHEMERAL_TREE_PROCESSING) {
820d3a14591SThomas Haynes 			nfs4_ephemeral_tree_decr(net);
821d3a14591SThomas Haynes 			mutex_exit(&net->net_cnt_lock);
822d3a14591SThomas Haynes 			mutex_exit(&net->net_tree_lock);
823d3a14591SThomas Haynes 
824d3a14591SThomas Haynes 			return (EIO);
825d3a14591SThomas Haynes 		}
826d3a14591SThomas Haynes 		mutex_exit(&net->net_cnt_lock);
827b9238976Sth 	}
828b9238976Sth 
829b9238976Sth 	mutex_enter(&net->net_cnt_lock);
830b9238976Sth 	net->net_status |= NFS4_EPHEMERAL_TREE_MOUNTING;
831b9238976Sth 	mutex_exit(&net->net_cnt_lock);
832b9238976Sth 
833b9238976Sth 	must_unlock = TRUE;
834b9238976Sth 
835*2f172c55SRobert Thurlow 	dma = nfs4_trigger_domount_args_create(vp, cr);
836b9238976Sth 	if (dma == NULL) {
837b9238976Sth 		error = EINVAL;
838b9238976Sth 		goto done;
839b9238976Sth 	}
840b9238976Sth 
841b9238976Sth 	/*
842b9238976Sth 	 * Note that since we define mirror mounts to work
843546a3997SThomas Haynes 	 * for any user, we simply extend the privileges of
844546a3997SThomas Haynes 	 * the user's credentials to allow the mount to
845546a3997SThomas Haynes 	 * proceed.
846b9238976Sth 	 */
847546a3997SThomas Haynes 	mcred = crdup(cr);
848546a3997SThomas Haynes 	if (mcred == NULL) {
849546a3997SThomas Haynes 		error = EINVAL;
850546a3997SThomas Haynes 		goto done;
851546a3997SThomas Haynes 	}
852546a3997SThomas Haynes 
853546a3997SThomas Haynes 	crset_zone_privall(mcred);
854*2f172c55SRobert Thurlow 	if (is_system_labeled())
855*2f172c55SRobert Thurlow 		(void) setpflags(NET_MAC_AWARE, 1, mcred);
856b9238976Sth 
8576962f5b8SThomas Haynes 	error = nfs4_trigger_domount(vp, dma, &vfsp, mcred, newvpp);
858b9238976Sth 	nfs4_trigger_domount_args_destroy(dma, vp);
859b9238976Sth 
860*2f172c55SRobert Thurlow 	DTRACE_PROBE2(nfs4clnt__func__referral__mount,
861*2f172c55SRobert Thurlow 	    vnode_t *, vp, int, error);
862*2f172c55SRobert Thurlow 
863546a3997SThomas Haynes 	crfree(mcred);
864b9238976Sth 
865b9238976Sth done:
8666962f5b8SThomas Haynes 
867b9238976Sth 	if (must_unlock) {
868b9238976Sth 		mutex_enter(&net->net_cnt_lock);
869b9238976Sth 		net->net_status &= ~NFS4_EPHEMERAL_TREE_MOUNTING;
870*2f172c55SRobert Thurlow 
871*2f172c55SRobert Thurlow 		/*
872*2f172c55SRobert Thurlow 		 * REFCNT: If we are the root of the tree, then we need
873*2f172c55SRobert Thurlow 		 * to keep a reference because we malloced the tree and
874*2f172c55SRobert Thurlow 		 * this is where we tied it to our mntinfo.
875*2f172c55SRobert Thurlow 		 *
876*2f172c55SRobert Thurlow 		 * If we are not the root of the tree, then our tie to
877*2f172c55SRobert Thurlow 		 * the mntinfo occured elsewhere and we need to
878*2f172c55SRobert Thurlow 		 * decrement the reference to the tree.
879*2f172c55SRobert Thurlow 		 */
880b9238976Sth 		if (is_building)
881b9238976Sth 			net->net_status &= ~NFS4_EPHEMERAL_TREE_BUILDING;
882*2f172c55SRobert Thurlow 		else
883*2f172c55SRobert Thurlow 			nfs4_ephemeral_tree_decr(net);
884b9238976Sth 		mutex_exit(&net->net_cnt_lock);
885b9238976Sth 
886b9238976Sth 		mutex_exit(&net->net_tree_lock);
887b9238976Sth 	}
888b9238976Sth 
889b9238976Sth 	if (!error && (newvpp == NULL || *newvpp == NULL))
890b9238976Sth 		error = ENOSYS;
891b9238976Sth 
892b9238976Sth 	return (error);
893b9238976Sth }
894b9238976Sth 
895b9238976Sth /*
896b9238976Sth  * Collect together both the generic & mount-type specific args.
897b9238976Sth  */
898b9238976Sth static domount_args_t *
899*2f172c55SRobert Thurlow nfs4_trigger_domount_args_create(vnode_t *vp, cred_t *cr)
900b9238976Sth {
901b9238976Sth 	int nointr;
902b9238976Sth 	char *hostlist;
903b9238976Sth 	servinfo4_t *svp;
904b9238976Sth 	struct nfs_args *nargs, *nargs_head;
905b9238976Sth 	enum clnt_stat status;
906b9238976Sth 	ephemeral_servinfo_t *esi, *esi_first;
907b9238976Sth 	domount_args_t *dma;
908b9238976Sth 	mntinfo4_t *mi = VTOMI4(vp);
909b9238976Sth 
910b9238976Sth 	nointr = !(mi->mi_flags & MI4_INT);
911b9238976Sth 	hostlist = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
912b9238976Sth 
913b9238976Sth 	svp = mi->mi_curr_serv;
914b9238976Sth 	/* check if the current server is responding */
915b9238976Sth 	status = nfs4_trigger_ping_server(svp, nointr);
916b9238976Sth 	if (status == RPC_SUCCESS) {
917*2f172c55SRobert Thurlow 		esi_first = nfs4_trigger_esi_create(vp, svp, cr);
918b9238976Sth 		if (esi_first == NULL) {
919b9238976Sth 			kmem_free(hostlist, MAXPATHLEN);
920b9238976Sth 			return (NULL);
921b9238976Sth 		}
922b9238976Sth 
923b9238976Sth 		(void) strlcpy(hostlist, esi_first->esi_hostname, MAXPATHLEN);
924b9238976Sth 
925b9238976Sth 		nargs_head = nfs4_trigger_nargs_create(mi, svp, esi_first);
926b9238976Sth 	} else {
927b9238976Sth 		/* current server did not respond */
928b9238976Sth 		esi_first = NULL;
929b9238976Sth 		nargs_head = NULL;
930b9238976Sth 	}
931b9238976Sth 	nargs = nargs_head;
932b9238976Sth 
933b9238976Sth 	/*
934b9238976Sth 	 * NFS RO failover.
935b9238976Sth 	 *
936b9238976Sth 	 * If we have multiple servinfo4 structures, linked via sv_next,
937b9238976Sth 	 * we must create one nfs_args for each, linking the nfs_args via
938b9238976Sth 	 * nfs_ext_u.nfs_extB.next.
939b9238976Sth 	 *
940b9238976Sth 	 * We need to build a corresponding esi for each, too, but that is
941b9238976Sth 	 * used solely for building nfs_args, and may be immediately
942b9238976Sth 	 * discarded, as domount() requires the info from just one esi,
943b9238976Sth 	 * but all the nfs_args.
944b9238976Sth 	 *
945b9238976Sth 	 * Currently, the NFS mount code will hang if not all servers
946b9238976Sth 	 * requested are available. To avoid that, we need to ping each
947b9238976Sth 	 * server, here, and remove it from the list if it is not
948b9238976Sth 	 * responding. This has the side-effect of that server then
949b9238976Sth 	 * being permanently unavailable for this failover mount, even if
950b9238976Sth 	 * it recovers. That's unfortunate, but the best we can do until
951b9238976Sth 	 * the mount code path is fixed.
952b9238976Sth 	 */
953b9238976Sth 
954b9238976Sth 	/*
955b9238976Sth 	 * If the current server was down, loop indefinitely until we find
956b9238976Sth 	 * at least one responsive server.
957b9238976Sth 	 */
958b9238976Sth 	do {
959b9238976Sth 		/* no locking needed for sv_next; it is only set at fs mount */
960b9238976Sth 		for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) {
961b9238976Sth 			struct nfs_args *next;
962b9238976Sth 
963b9238976Sth 			/*
964b9238976Sth 			 * nargs_head: the head of the nfs_args list
965b9238976Sth 			 * nargs: the current tail of the list
966b9238976Sth 			 * next: the newly-created element to be added
967b9238976Sth 			 */
968b9238976Sth 
969b9238976Sth 			/*
970b9238976Sth 			 * We've already tried the current server, above;
971b9238976Sth 			 * if it was responding, we have already included it
972b9238976Sth 			 * and it may now be ignored.
973b9238976Sth 			 *
974b9238976Sth 			 * Otherwise, try it again, since it may now have
975b9238976Sth 			 * recovered.
976b9238976Sth 			 */
977b9238976Sth 			if (svp == mi->mi_curr_serv && esi_first != NULL)
978b9238976Sth 				continue;
979b9238976Sth 
980b9238976Sth 			(void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
981b9238976Sth 			if (svp->sv_flags & SV4_NOTINUSE) {
982b9238976Sth 				nfs_rw_exit(&svp->sv_lock);
983b9238976Sth 				continue;
984b9238976Sth 			}
985b9238976Sth 			nfs_rw_exit(&svp->sv_lock);
986b9238976Sth 
987b9238976Sth 			/* check if the server is responding */
988b9238976Sth 			status = nfs4_trigger_ping_server(svp, nointr);
989b9238976Sth 			/* if the server did not respond, ignore it */
990b9238976Sth 			if (status != RPC_SUCCESS)
991b9238976Sth 				continue;
992b9238976Sth 
993*2f172c55SRobert Thurlow 			esi = nfs4_trigger_esi_create(vp, svp, cr);
994b9238976Sth 			if (esi == NULL)
995b9238976Sth 				continue;
996b9238976Sth 
997b9238976Sth 			/*
998b9238976Sth 			 * If the original current server (mi_curr_serv)
999b9238976Sth 			 * was down when when we first tried it,
1000b9238976Sth 			 * (i.e. esi_first == NULL),
1001b9238976Sth 			 * we select this new server (svp) to be the server
1002b9238976Sth 			 * that we will actually contact (esi_first).
1003b9238976Sth 			 *
1004b9238976Sth 			 * Note that it's possible that mi_curr_serv == svp,
1005b9238976Sth 			 * if that mi_curr_serv was down but has now recovered.
1006b9238976Sth 			 */
1007b9238976Sth 			next = nfs4_trigger_nargs_create(mi, svp, esi);
1008b9238976Sth 			if (esi_first == NULL) {
1009b9238976Sth 				ASSERT(nargs == NULL);
1010b9238976Sth 				ASSERT(nargs_head == NULL);
1011b9238976Sth 				nargs_head = next;
1012b9238976Sth 				esi_first = esi;
1013b9238976Sth 				(void) strlcpy(hostlist,
1014b9238976Sth 				    esi_first->esi_hostname, MAXPATHLEN);
1015b9238976Sth 			} else {
1016b9238976Sth 				ASSERT(nargs_head != NULL);
1017b9238976Sth 				nargs->nfs_ext_u.nfs_extB.next = next;
1018b9238976Sth 				(void) strlcat(hostlist, ",", MAXPATHLEN);
1019b9238976Sth 				(void) strlcat(hostlist, esi->esi_hostname,
1020b9238976Sth 				    MAXPATHLEN);
1021b9238976Sth 				/* esi was only needed for hostname & nargs */
1022b9238976Sth 				nfs4_trigger_esi_destroy(esi, vp);
1023b9238976Sth 			}
1024b9238976Sth 
1025b9238976Sth 			nargs = next;
1026b9238976Sth 		}
1027b9238976Sth 
1028b9238976Sth 		/* if we've had no response at all, wait a second */
1029b9238976Sth 		if (esi_first == NULL)
1030b9238976Sth 			delay(drv_usectohz(1000000));
1031b9238976Sth 
1032b9238976Sth 	} while (esi_first == NULL);
1033b9238976Sth 	ASSERT(nargs_head != NULL);
1034b9238976Sth 
1035b9238976Sth 	dma = kmem_zalloc(sizeof (domount_args_t), KM_SLEEP);
1036b9238976Sth 	dma->dma_esi = esi_first;
1037b9238976Sth 	dma->dma_hostlist = hostlist;
1038b9238976Sth 	dma->dma_nargs = nargs_head;
1039b9238976Sth 
1040b9238976Sth 	return (dma);
1041b9238976Sth }
1042b9238976Sth 
1043b9238976Sth static void
1044b9238976Sth nfs4_trigger_domount_args_destroy(domount_args_t *dma, vnode_t *vp)
1045b9238976Sth {
1046b9238976Sth 	if (dma != NULL) {
1047b9238976Sth 		if (dma->dma_esi != NULL && vp != NULL)
1048b9238976Sth 			nfs4_trigger_esi_destroy(dma->dma_esi, vp);
1049b9238976Sth 
1050b9238976Sth 		if (dma->dma_hostlist != NULL)
1051b9238976Sth 			kmem_free(dma->dma_hostlist, MAXPATHLEN);
1052b9238976Sth 
1053b9238976Sth 		if (dma->dma_nargs != NULL) {
1054b9238976Sth 			struct nfs_args *nargs = dma->dma_nargs;
1055b9238976Sth 
1056b9238976Sth 			do {
1057b9238976Sth 				struct nfs_args *next =
1058b9238976Sth 				    nargs->nfs_ext_u.nfs_extB.next;
1059b9238976Sth 
1060b9238976Sth 				nfs4_trigger_nargs_destroy(nargs);
1061b9238976Sth 				nargs = next;
1062b9238976Sth 			} while (nargs != NULL);
1063b9238976Sth 		}
1064b9238976Sth 
1065b9238976Sth 		kmem_free(dma, sizeof (domount_args_t));
1066b9238976Sth 	}
1067b9238976Sth }
1068b9238976Sth 
1069b9238976Sth /*
1070b9238976Sth  * The ephemeral_servinfo_t struct contains basic information we will need to
1071b9238976Sth  * perform the mount. Whilst the structure is generic across different
1072b9238976Sth  * types of ephemeral mount, the way we gather its contents differs.
1073b9238976Sth  */
1074b9238976Sth static ephemeral_servinfo_t *
1075*2f172c55SRobert Thurlow nfs4_trigger_esi_create(vnode_t *vp, servinfo4_t *svp, cred_t *cr)
1076b9238976Sth {
1077b9238976Sth 	ephemeral_servinfo_t *esi;
1078b9238976Sth 	rnode4_t *rp = VTOR4(vp);
1079b9238976Sth 
1080b9238976Sth 	ASSERT(RP_ISSTUB(rp));
1081b9238976Sth 
1082b9238976Sth 	/* Call the ephemeral type-specific routine */
1083b9238976Sth 	if (RP_ISSTUB_MIRRORMOUNT(rp))
1084b9238976Sth 		esi = nfs4_trigger_esi_create_mirrormount(vp, svp);
1085*2f172c55SRobert Thurlow 	else if (RP_ISSTUB_REFERRAL(rp))
1086*2f172c55SRobert Thurlow 		esi = nfs4_trigger_esi_create_referral(vp, cr);
1087b9238976Sth 	else
1088b9238976Sth 		esi = NULL;
1089b9238976Sth 	return (esi);
1090b9238976Sth }
1091b9238976Sth 
1092b9238976Sth static void
1093b9238976Sth nfs4_trigger_esi_destroy(ephemeral_servinfo_t *esi, vnode_t *vp)
1094b9238976Sth {
1095b9238976Sth 	rnode4_t *rp = VTOR4(vp);
1096b9238976Sth 
1097b9238976Sth 	ASSERT(RP_ISSTUB(rp));
1098b9238976Sth 
1099b9238976Sth 	/* Currently, no need for an ephemeral type-specific routine */
1100b9238976Sth 
1101b9238976Sth 	/*
1102b9238976Sth 	 * The contents of ephemeral_servinfo_t goes into nfs_args,
1103b9238976Sth 	 * and will be handled by nfs4_trigger_nargs_destroy().
1104b9238976Sth 	 * We need only free the structure itself.
1105b9238976Sth 	 */
1106b9238976Sth 	if (esi != NULL)
1107b9238976Sth 		kmem_free(esi, sizeof (ephemeral_servinfo_t));
1108b9238976Sth }
1109b9238976Sth 
1110b9238976Sth /*
1111b9238976Sth  * Some of this may turn out to be common with other ephemeral types,
1112b9238976Sth  * in which case it should be moved to nfs4_trigger_esi_create(), or a
1113b9238976Sth  * common function called.
1114b9238976Sth  */
1115*2f172c55SRobert Thurlow 
1116*2f172c55SRobert Thurlow /*
1117*2f172c55SRobert Thurlow  * Mirror mounts case - should have all data available
1118*2f172c55SRobert Thurlow  */
1119b9238976Sth static ephemeral_servinfo_t *
1120b9238976Sth nfs4_trigger_esi_create_mirrormount(vnode_t *vp, servinfo4_t *svp)
1121b9238976Sth {
1122b9238976Sth 	char			*stubpath;
1123b9238976Sth 	struct knetconfig	*sikncp, *svkncp;
1124b9238976Sth 	struct netbuf		*bufp;
1125b9238976Sth 	ephemeral_servinfo_t	*esi;
1126b9238976Sth 
1127b9238976Sth 	esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP);
1128b9238976Sth 
1129b9238976Sth 	/* initially set to be our type of ephemeral mount; may be added to */
1130b9238976Sth 	esi->esi_mount_flags = NFSMNT_MIRRORMOUNT;
1131b9238976Sth 
1132b9238976Sth 	/*
1133b9238976Sth 	 * We're copying info from the stub rnode's servinfo4, but
1134b9238976Sth 	 * we must create new copies, not pointers, since this information
1135b9238976Sth 	 * is to be associated with the new mount, which will be
1136b9238976Sth 	 * unmounted (and its structures freed) separately
1137b9238976Sth 	 */
1138b9238976Sth 
1139b9238976Sth 	/*
1140b9238976Sth 	 * Sizes passed to kmem_[z]alloc here must match those freed
1141b9238976Sth 	 * in nfs4_free_args()
1142b9238976Sth 	 */
1143b9238976Sth 
1144b9238976Sth 	/*
1145b9238976Sth 	 * We hold sv_lock across kmem_zalloc() calls that may sleep, but this
1146b9238976Sth 	 * is difficult to avoid: as we need to read svp to calculate the
1147b9238976Sth 	 * sizes to be allocated.
1148b9238976Sth 	 */
1149b9238976Sth 	(void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1150b9238976Sth 
1151b9238976Sth 	esi->esi_hostname = kmem_zalloc(strlen(svp->sv_hostname) + 1, KM_SLEEP);
1152b9238976Sth 	(void) strcat(esi->esi_hostname, svp->sv_hostname);
1153b9238976Sth 
1154b9238976Sth 	esi->esi_addr = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP);
1155b9238976Sth 	bufp = esi->esi_addr;
1156b9238976Sth 	bufp->len = svp->sv_addr.len;
1157b9238976Sth 	bufp->maxlen = svp->sv_addr.maxlen;
1158b9238976Sth 	bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP);
1159b9238976Sth 	bcopy(svp->sv_addr.buf, bufp->buf, bufp->len);
1160b9238976Sth 
1161b9238976Sth 	esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP);
1162b9238976Sth 	sikncp = esi->esi_knconf;
1163b9238976Sth 	svkncp = svp->sv_knconf;
1164b9238976Sth 	sikncp->knc_semantics = svkncp->knc_semantics;
1165b9238976Sth 	sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1166b9238976Sth 	(void) strcat((char *)sikncp->knc_protofmly,
1167b9238976Sth 	    (char *)svkncp->knc_protofmly);
1168b9238976Sth 	sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1169b9238976Sth 	(void) strcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto);
1170b9238976Sth 	sikncp->knc_rdev = svkncp->knc_rdev;
1171b9238976Sth 
1172b9238976Sth 	/*
1173b9238976Sth 	 * Used when AUTH_DH is negotiated.
1174b9238976Sth 	 *
1175b9238976Sth 	 * This is ephemeral mount-type specific, since it contains the
1176b9238976Sth 	 * server's time-sync syncaddr.
1177b9238976Sth 	 */
1178b9238976Sth 	if (svp->sv_dhsec) {
1179b9238976Sth 		struct netbuf *bufp;
1180b9238976Sth 		sec_data_t *sdata;
1181b9238976Sth 		dh_k4_clntdata_t *data;
1182b9238976Sth 
1183b9238976Sth 		sdata = svp->sv_dhsec;
1184b9238976Sth 		data = (dh_k4_clntdata_t *)sdata->data;
1185b9238976Sth 		ASSERT(sdata->rpcflavor == AUTH_DH);
1186b9238976Sth 
1187b9238976Sth 		bufp = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP);
1188b9238976Sth 		bufp->len = data->syncaddr.len;
1189b9238976Sth 		bufp->maxlen = data->syncaddr.maxlen;
1190b9238976Sth 		bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP);
1191b9238976Sth 		bcopy(data->syncaddr.buf, bufp->buf, bufp->len);
1192b9238976Sth 		esi->esi_syncaddr = bufp;
1193b9238976Sth 
1194b9238976Sth 		if (data->netname != NULL) {
1195b9238976Sth 			int nmlen = data->netnamelen;
1196b9238976Sth 
1197b9238976Sth 			/*
1198b9238976Sth 			 * We need to copy from a dh_k4_clntdata_t
1199b9238976Sth 			 * netname/netnamelen pair to a NUL-terminated
1200b9238976Sth 			 * netname string suitable for putting in nfs_args,
1201b9238976Sth 			 * where the latter has no netnamelen field.
1202b9238976Sth 			 */
1203b9238976Sth 			esi->esi_netname = kmem_zalloc(nmlen + 1, KM_SLEEP);
1204b9238976Sth 			bcopy(data->netname, esi->esi_netname, nmlen);
1205b9238976Sth 		}
1206b9238976Sth 	} else {
1207b9238976Sth 		esi->esi_syncaddr = NULL;
1208b9238976Sth 		esi->esi_netname = NULL;
1209b9238976Sth 	}
1210b9238976Sth 
1211b9238976Sth 	stubpath = fn_path(VTOSV(vp)->sv_name);
1212b9238976Sth 	/* step over initial '.', to avoid e.g. sv_path: "/tank./ws" */
1213b9238976Sth 	ASSERT(*stubpath == '.');
1214b9238976Sth 	stubpath += 1;
1215b9238976Sth 
1216b9238976Sth 	/* for nfs_args->fh */
1217*2f172c55SRobert Thurlow 	esi->esi_path_len = strlen(stubpath) + 1;
1218*2f172c55SRobert Thurlow 	if (strcmp(svp->sv_path, "/") != 0)
1219*2f172c55SRobert Thurlow 		esi->esi_path_len += strlen(svp->sv_path);
1220b9238976Sth 	esi->esi_path = kmem_zalloc(esi->esi_path_len, KM_SLEEP);
1221*2f172c55SRobert Thurlow 	if (strcmp(svp->sv_path, "/") != 0)
1222*2f172c55SRobert Thurlow 		(void) strcat(esi->esi_path, svp->sv_path);
1223b9238976Sth 	(void) strcat(esi->esi_path, stubpath);
1224b9238976Sth 
1225b9238976Sth 	stubpath -= 1;
1226b9238976Sth 	/* stubpath allocated by fn_path() */
1227b9238976Sth 	kmem_free(stubpath, strlen(stubpath) + 1);
1228b9238976Sth 
1229b9238976Sth 	nfs_rw_exit(&svp->sv_lock);
1230b9238976Sth 
1231b9238976Sth 	return (esi);
1232b9238976Sth }
1233b9238976Sth 
1234*2f172c55SRobert Thurlow /*
1235*2f172c55SRobert Thurlow  * Makes an upcall to NFSMAPID daemon to resolve hostname of NFS server to
1236*2f172c55SRobert Thurlow  * get network information required to do the mount call.
1237*2f172c55SRobert Thurlow  */
1238*2f172c55SRobert Thurlow int
1239*2f172c55SRobert Thurlow nfs4_callmapid(utf8string *server, struct nfs_fsl_info *resp)
1240*2f172c55SRobert Thurlow {
1241*2f172c55SRobert Thurlow 	door_arg_t	door_args;
1242*2f172c55SRobert Thurlow 	door_handle_t	dh;
1243*2f172c55SRobert Thurlow 	XDR		xdr;
1244*2f172c55SRobert Thurlow 	refd_door_args_t *xdr_argsp;
1245*2f172c55SRobert Thurlow 	refd_door_res_t  *orig_resp;
1246*2f172c55SRobert Thurlow 	k_sigset_t	smask;
1247*2f172c55SRobert Thurlow 	int		xdr_len = 0;
1248*2f172c55SRobert Thurlow 	int 		res_len = 16; /* length of an ip adress */
1249*2f172c55SRobert Thurlow 	int		orig_reslen = res_len;
1250*2f172c55SRobert Thurlow 	int		error = 0;
1251*2f172c55SRobert Thurlow 	struct nfsidmap_globals *nig;
1252*2f172c55SRobert Thurlow 
1253*2f172c55SRobert Thurlow 	if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)
1254*2f172c55SRobert Thurlow 		return (ECONNREFUSED);
1255*2f172c55SRobert Thurlow 
1256*2f172c55SRobert Thurlow 	nig = zone_getspecific(nfsidmap_zone_key, nfs_zone());
1257*2f172c55SRobert Thurlow 	ASSERT(nig != NULL);
1258*2f172c55SRobert Thurlow 
1259*2f172c55SRobert Thurlow 	mutex_enter(&nig->nfsidmap_daemon_lock);
1260*2f172c55SRobert Thurlow 	dh = nig->nfsidmap_daemon_dh;
1261*2f172c55SRobert Thurlow 	if (dh == NULL) {
1262*2f172c55SRobert Thurlow 		mutex_exit(&nig->nfsidmap_daemon_lock);
1263*2f172c55SRobert Thurlow 		cmn_err(CE_NOTE,
1264*2f172c55SRobert Thurlow 		    "nfs4_callmapid: nfsmapid daemon not " \
1265*2f172c55SRobert Thurlow 		    "running unable to resolve host name\n");
1266*2f172c55SRobert Thurlow 		return (EINVAL);
1267*2f172c55SRobert Thurlow 	}
1268*2f172c55SRobert Thurlow 	door_ki_hold(dh);
1269*2f172c55SRobert Thurlow 	mutex_exit(&nig->nfsidmap_daemon_lock);
1270*2f172c55SRobert Thurlow 
1271*2f172c55SRobert Thurlow 	xdr_len = xdr_sizeof(&(xdr_utf8string), server);
1272*2f172c55SRobert Thurlow 
1273*2f172c55SRobert Thurlow 	xdr_argsp = kmem_zalloc(xdr_len + sizeof (*xdr_argsp), KM_SLEEP);
1274*2f172c55SRobert Thurlow 	xdr_argsp->xdr_len = xdr_len;
1275*2f172c55SRobert Thurlow 	xdr_argsp->cmd = NFSMAPID_SRV_NETINFO;
1276*2f172c55SRobert Thurlow 
1277*2f172c55SRobert Thurlow 	xdrmem_create(&xdr, (char *)&xdr_argsp->xdr_arg,
1278*2f172c55SRobert Thurlow 	    xdr_len, XDR_ENCODE);
1279*2f172c55SRobert Thurlow 
1280*2f172c55SRobert Thurlow 	if (!xdr_utf8string(&xdr, server)) {
1281*2f172c55SRobert Thurlow 		kmem_free(xdr_argsp, xdr_len + sizeof (*xdr_argsp));
1282*2f172c55SRobert Thurlow 		door_ki_rele(dh);
1283*2f172c55SRobert Thurlow 		return (1);
1284*2f172c55SRobert Thurlow 	}
1285*2f172c55SRobert Thurlow 
1286*2f172c55SRobert Thurlow 	if (orig_reslen)
1287*2f172c55SRobert Thurlow 		orig_resp = kmem_alloc(orig_reslen, KM_SLEEP);
1288*2f172c55SRobert Thurlow 
1289*2f172c55SRobert Thurlow 	door_args.data_ptr = (char *)xdr_argsp;
1290*2f172c55SRobert Thurlow 	door_args.data_size = sizeof (*xdr_argsp) + xdr_argsp->xdr_len;
1291*2f172c55SRobert Thurlow 	door_args.desc_ptr = NULL;
1292*2f172c55SRobert Thurlow 	door_args.desc_num = 0;
1293*2f172c55SRobert Thurlow 	door_args.rbuf = orig_resp ? (char *)orig_resp : NULL;
1294*2f172c55SRobert Thurlow 	door_args.rsize = res_len;
1295*2f172c55SRobert Thurlow 
1296*2f172c55SRobert Thurlow 	sigintr(&smask, 1);
1297*2f172c55SRobert Thurlow 	error = door_ki_upcall(dh, &door_args);
1298*2f172c55SRobert Thurlow 	sigunintr(&smask);
1299*2f172c55SRobert Thurlow 
1300*2f172c55SRobert Thurlow 	door_ki_rele(dh);
1301*2f172c55SRobert Thurlow 
1302*2f172c55SRobert Thurlow 	kmem_free(xdr_argsp, xdr_len + sizeof (*xdr_argsp));
1303*2f172c55SRobert Thurlow 	if (error) {
1304*2f172c55SRobert Thurlow 		kmem_free(orig_resp, orig_reslen);
1305*2f172c55SRobert Thurlow 		/*
1306*2f172c55SRobert Thurlow 		 * There is no door to connect to. The referral daemon
1307*2f172c55SRobert Thurlow 		 * must not be running yet.
1308*2f172c55SRobert Thurlow 		 */
1309*2f172c55SRobert Thurlow 		cmn_err(CE_WARN,
1310*2f172c55SRobert Thurlow 		    "nfsmapid not running cannot resolve host name");
1311*2f172c55SRobert Thurlow 		goto out;
1312*2f172c55SRobert Thurlow 	}
1313*2f172c55SRobert Thurlow 
1314*2f172c55SRobert Thurlow 	/*
1315*2f172c55SRobert Thurlow 	 * If the results buffer passed back are not the same as
1316*2f172c55SRobert Thurlow 	 * what was sent free the old buffer and use the new one.
1317*2f172c55SRobert Thurlow 	 */
1318*2f172c55SRobert Thurlow 	if (orig_resp && orig_reslen) {
1319*2f172c55SRobert Thurlow 		refd_door_res_t *door_resp;
1320*2f172c55SRobert Thurlow 
1321*2f172c55SRobert Thurlow 		door_resp = (refd_door_res_t *)door_args.rbuf;
1322*2f172c55SRobert Thurlow 		if ((void *)door_args.rbuf != orig_resp)
1323*2f172c55SRobert Thurlow 			kmem_free(orig_resp, orig_reslen);
1324*2f172c55SRobert Thurlow 		if (door_resp->res_status == 0) {
1325*2f172c55SRobert Thurlow 			xdrmem_create(&xdr, (char *)&door_resp->xdr_res,
1326*2f172c55SRobert Thurlow 			    door_resp->xdr_len, XDR_DECODE);
1327*2f172c55SRobert Thurlow 			bzero(resp, sizeof (struct nfs_fsl_info));
1328*2f172c55SRobert Thurlow 			if (!xdr_nfs_fsl_info(&xdr, resp)) {
1329*2f172c55SRobert Thurlow 				DTRACE_PROBE2(
1330*2f172c55SRobert Thurlow 				    nfs4clnt__debug__referral__upcall__xdrfail,
1331*2f172c55SRobert Thurlow 				    struct nfs_fsl_info *, resp,
1332*2f172c55SRobert Thurlow 				    char *, "nfs4_callmapid");
1333*2f172c55SRobert Thurlow 				error = EINVAL;
1334*2f172c55SRobert Thurlow 			}
1335*2f172c55SRobert Thurlow 		} else {
1336*2f172c55SRobert Thurlow 			DTRACE_PROBE2(
1337*2f172c55SRobert Thurlow 			    nfs4clnt__debug__referral__upcall__badstatus,
1338*2f172c55SRobert Thurlow 			    int, door_resp->res_status,
1339*2f172c55SRobert Thurlow 			    char *, "nfs4_callmapid");
1340*2f172c55SRobert Thurlow 			error = door_resp->res_status;
1341*2f172c55SRobert Thurlow 		}
1342*2f172c55SRobert Thurlow 		kmem_free(door_args.rbuf, door_args.rsize);
1343*2f172c55SRobert Thurlow 	}
1344*2f172c55SRobert Thurlow out:
1345*2f172c55SRobert Thurlow 	DTRACE_PROBE2(nfs4clnt__func__referral__upcall,
1346*2f172c55SRobert Thurlow 	    char *, server, int, error);
1347*2f172c55SRobert Thurlow 	return (error);
1348*2f172c55SRobert Thurlow }
1349*2f172c55SRobert Thurlow 
1350*2f172c55SRobert Thurlow /*
1351*2f172c55SRobert Thurlow  * Fetches the fs_locations attribute. Typically called
1352*2f172c55SRobert Thurlow  * from a Replication/Migration/Referrals/Mirror-mount context
1353*2f172c55SRobert Thurlow  *
1354*2f172c55SRobert Thurlow  * Fills in the attributes in garp. The caller is assumed
1355*2f172c55SRobert Thurlow  * to have allocated memory for garp.
1356*2f172c55SRobert Thurlow  *
1357*2f172c55SRobert Thurlow  * lock: if set do not lock s_recovlock and mi_recovlock mutex,
1358*2f172c55SRobert Thurlow  *	 it's already done by caller. Otherwise lock these mutexes
1359*2f172c55SRobert Thurlow  *	 before doing the rfs4call().
1360*2f172c55SRobert Thurlow  *
1361*2f172c55SRobert Thurlow  * Returns
1362*2f172c55SRobert Thurlow  * 	1	 for success
1363*2f172c55SRobert Thurlow  * 	0	 for failure
1364*2f172c55SRobert Thurlow  */
1365*2f172c55SRobert Thurlow int
1366*2f172c55SRobert Thurlow nfs4_fetch_locations(mntinfo4_t *mi, nfs4_sharedfh_t *sfh, char *nm,
1367*2f172c55SRobert Thurlow     cred_t *cr, nfs4_ga_res_t *garp, COMPOUND4res_clnt *callres, bool_t lock)
1368*2f172c55SRobert Thurlow {
1369*2f172c55SRobert Thurlow 	COMPOUND4args_clnt args;
1370*2f172c55SRobert Thurlow 	COMPOUND4res_clnt res;
1371*2f172c55SRobert Thurlow 	nfs_argop4 *argop;
1372*2f172c55SRobert Thurlow 	int argoplist_size = 3 * sizeof (nfs_argop4);
1373*2f172c55SRobert Thurlow 	nfs4_server_t *sp = NULL;
1374*2f172c55SRobert Thurlow 	int doqueue = 1;
1375*2f172c55SRobert Thurlow 	nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
1376*2f172c55SRobert Thurlow 	int retval = 1;
1377*2f172c55SRobert Thurlow 	struct nfs4_clnt *nfscl;
1378*2f172c55SRobert Thurlow 
1379*2f172c55SRobert Thurlow 	if (lock == TRUE)
1380*2f172c55SRobert Thurlow 		(void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0);
1381*2f172c55SRobert Thurlow 	else
1382*2f172c55SRobert Thurlow 		ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) ||
1383*2f172c55SRobert Thurlow 		    nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER));
1384*2f172c55SRobert Thurlow 
1385*2f172c55SRobert Thurlow 	sp = find_nfs4_server(mi);
1386*2f172c55SRobert Thurlow 	if (lock == TRUE)
1387*2f172c55SRobert Thurlow 		nfs_rw_exit(&mi->mi_recovlock);
1388*2f172c55SRobert Thurlow 
1389*2f172c55SRobert Thurlow 	if (sp != NULL)
1390*2f172c55SRobert Thurlow 		mutex_exit(&sp->s_lock);
1391*2f172c55SRobert Thurlow 
1392*2f172c55SRobert Thurlow 	if (lock == TRUE) {
1393*2f172c55SRobert Thurlow 		if (sp != NULL)
1394*2f172c55SRobert Thurlow 			(void) nfs_rw_enter_sig(&sp->s_recovlock,
1395*2f172c55SRobert Thurlow 			    RW_WRITER, 0);
1396*2f172c55SRobert Thurlow 		(void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_WRITER, 0);
1397*2f172c55SRobert Thurlow 	} else {
1398*2f172c55SRobert Thurlow 		if (sp != NULL) {
1399*2f172c55SRobert Thurlow 			ASSERT(nfs_rw_lock_held(&sp->s_recovlock, RW_READER) ||
1400*2f172c55SRobert Thurlow 			    nfs_rw_lock_held(&sp->s_recovlock, RW_WRITER));
1401*2f172c55SRobert Thurlow 		}
1402*2f172c55SRobert Thurlow 	}
1403*2f172c55SRobert Thurlow 
1404*2f172c55SRobert Thurlow 	/*
1405*2f172c55SRobert Thurlow 	 * Do we want to do the setup for recovery here?
1406*2f172c55SRobert Thurlow 	 *
1407*2f172c55SRobert Thurlow 	 * We know that the server responded to a null ping a very
1408*2f172c55SRobert Thurlow 	 * short time ago, and we know that we intend to do a
1409*2f172c55SRobert Thurlow 	 * single stateless operation - we want to fetch attributes,
1410*2f172c55SRobert Thurlow 	 * so we know we can't encounter errors about state.  If
1411*2f172c55SRobert Thurlow 	 * something goes wrong with the GETATTR, like not being
1412*2f172c55SRobert Thurlow 	 * able to get a response from the server or getting any
1413*2f172c55SRobert Thurlow 	 * kind of FH error, we should fail the mount.
1414*2f172c55SRobert Thurlow 	 *
1415*2f172c55SRobert Thurlow 	 * We may want to re-visited this at a later time.
1416*2f172c55SRobert Thurlow 	 */
1417*2f172c55SRobert Thurlow 	argop = kmem_alloc(argoplist_size, KM_SLEEP);
1418*2f172c55SRobert Thurlow 
1419*2f172c55SRobert Thurlow 	args.ctag = TAG_GETATTR_FSLOCATION;
1420*2f172c55SRobert Thurlow 	/* PUTFH LOOKUP GETATTR */
1421*2f172c55SRobert Thurlow 	args.array_len = 3;
1422*2f172c55SRobert Thurlow 	args.array = argop;
1423*2f172c55SRobert Thurlow 
1424*2f172c55SRobert Thurlow 	/* 0. putfh file */
1425*2f172c55SRobert Thurlow 	argop[0].argop = OP_CPUTFH;
1426*2f172c55SRobert Thurlow 	argop[0].nfs_argop4_u.opcputfh.sfh = sfh;
1427*2f172c55SRobert Thurlow 
1428*2f172c55SRobert Thurlow 	/* 1. lookup name, can't be dotdot */
1429*2f172c55SRobert Thurlow 	argop[1].argop = OP_CLOOKUP;
1430*2f172c55SRobert Thurlow 	argop[1].nfs_argop4_u.opclookup.cname = nm;
1431*2f172c55SRobert Thurlow 
1432*2f172c55SRobert Thurlow 	/* 2. file attrs */
1433*2f172c55SRobert Thurlow 	argop[2].argop = OP_GETATTR;
1434*2f172c55SRobert Thurlow 	argop[2].nfs_argop4_u.opgetattr.attr_request =
1435*2f172c55SRobert Thurlow 	    FATTR4_FSID_MASK | FATTR4_FS_LOCATIONS_MASK |
1436*2f172c55SRobert Thurlow 	    FATTR4_MOUNTED_ON_FILEID_MASK;
1437*2f172c55SRobert Thurlow 	argop[2].nfs_argop4_u.opgetattr.mi = mi;
1438*2f172c55SRobert Thurlow 
1439*2f172c55SRobert Thurlow 	rfs4call(mi, &args, &res, cr, &doqueue, 0, &e);
1440*2f172c55SRobert Thurlow 
1441*2f172c55SRobert Thurlow 	if (lock == TRUE) {
1442*2f172c55SRobert Thurlow 		nfs_rw_exit(&mi->mi_recovlock);
1443*2f172c55SRobert Thurlow 		if (sp != NULL)
1444*2f172c55SRobert Thurlow 			nfs_rw_exit(&sp->s_recovlock);
1445*2f172c55SRobert Thurlow 	}
1446*2f172c55SRobert Thurlow 
1447*2f172c55SRobert Thurlow 	nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone());
1448*2f172c55SRobert Thurlow 	nfscl->nfscl_stat.referrals.value.ui64++;
1449*2f172c55SRobert Thurlow 	DTRACE_PROBE3(nfs4clnt__func__referral__fsloc,
1450*2f172c55SRobert Thurlow 	    nfs4_sharedfh_t *, sfh, char *, nm, nfs4_error_t *, &e);
1451*2f172c55SRobert Thurlow 
1452*2f172c55SRobert Thurlow 	if (e.error != 0) {
1453*2f172c55SRobert Thurlow 		if (sp != NULL)
1454*2f172c55SRobert Thurlow 			nfs4_server_rele(sp);
1455*2f172c55SRobert Thurlow 		kmem_free(argop, argoplist_size);
1456*2f172c55SRobert Thurlow 		return (0);
1457*2f172c55SRobert Thurlow 	}
1458*2f172c55SRobert Thurlow 
1459*2f172c55SRobert Thurlow 	/*
1460*2f172c55SRobert Thurlow 	 * Check for all possible error conditions.
1461*2f172c55SRobert Thurlow 	 * For valid replies without an ops array or for illegal
1462*2f172c55SRobert Thurlow 	 * replies, return a failure.
1463*2f172c55SRobert Thurlow 	 */
1464*2f172c55SRobert Thurlow 	if (res.status != NFS4_OK || res.array_len < 3 ||
1465*2f172c55SRobert Thurlow 	    res.array[2].nfs_resop4_u.opgetattr.status != NFS4_OK) {
1466*2f172c55SRobert Thurlow 		retval = 0;
1467*2f172c55SRobert Thurlow 		goto exit;
1468*2f172c55SRobert Thurlow 	}
1469*2f172c55SRobert Thurlow 
1470*2f172c55SRobert Thurlow 	/*
1471*2f172c55SRobert Thurlow 	 * There isn't much value in putting the attributes
1472*2f172c55SRobert Thurlow 	 * in the attr cache since fs_locations4 aren't
1473*2f172c55SRobert Thurlow 	 * encountered very frequently, so just make them
1474*2f172c55SRobert Thurlow 	 * available to the caller.
1475*2f172c55SRobert Thurlow 	 */
1476*2f172c55SRobert Thurlow 	*garp = res.array[2].nfs_resop4_u.opgetattr.ga_res;
1477*2f172c55SRobert Thurlow 
1478*2f172c55SRobert Thurlow 	DTRACE_PROBE2(nfs4clnt__debug__referral__fsloc,
1479*2f172c55SRobert Thurlow 	    nfs4_ga_res_t *, garp, char *, "nfs4_fetch_locations");
1480*2f172c55SRobert Thurlow 
1481*2f172c55SRobert Thurlow 	/* No fs_locations? -- return a failure */
1482*2f172c55SRobert Thurlow 	if (garp->n4g_ext_res == NULL ||
1483*2f172c55SRobert Thurlow 	    garp->n4g_ext_res->n4g_fslocations.locations_val == NULL) {
1484*2f172c55SRobert Thurlow 		retval = 0;
1485*2f172c55SRobert Thurlow 		goto exit;
1486*2f172c55SRobert Thurlow 	}
1487*2f172c55SRobert Thurlow 
1488*2f172c55SRobert Thurlow 	if (!garp->n4g_fsid_valid)
1489*2f172c55SRobert Thurlow 		retval = 0;
1490*2f172c55SRobert Thurlow 
1491*2f172c55SRobert Thurlow exit:
1492*2f172c55SRobert Thurlow 	if (retval == 0) {
1493*2f172c55SRobert Thurlow 		/* the call was ok but failed validating the call results */
1494*2f172c55SRobert Thurlow 		(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1495*2f172c55SRobert Thurlow 	} else {
1496*2f172c55SRobert Thurlow 		ASSERT(callres != NULL);
1497*2f172c55SRobert Thurlow 		*callres = res;
1498*2f172c55SRobert Thurlow 	}
1499*2f172c55SRobert Thurlow 
1500*2f172c55SRobert Thurlow 	if (sp != NULL)
1501*2f172c55SRobert Thurlow 		nfs4_server_rele(sp);
1502*2f172c55SRobert Thurlow 	kmem_free(argop, argoplist_size);
1503*2f172c55SRobert Thurlow 	return (retval);
1504*2f172c55SRobert Thurlow }
1505*2f172c55SRobert Thurlow 
1506*2f172c55SRobert Thurlow /* tunable to disable referral mounts */
1507*2f172c55SRobert Thurlow int nfs4_no_referrals = 0;
1508*2f172c55SRobert Thurlow 
1509*2f172c55SRobert Thurlow /*
1510*2f172c55SRobert Thurlow  * Returns NULL if the vnode cannot be created or found.
1511*2f172c55SRobert Thurlow  */
1512*2f172c55SRobert Thurlow vnode_t *
1513*2f172c55SRobert Thurlow find_referral_stubvp(vnode_t *dvp, char *nm, cred_t *cr)
1514*2f172c55SRobert Thurlow {
1515*2f172c55SRobert Thurlow 	nfs_fh4 *stub_fh, *dfh;
1516*2f172c55SRobert Thurlow 	nfs4_sharedfh_t *sfhp;
1517*2f172c55SRobert Thurlow 	char *newfhval;
1518*2f172c55SRobert Thurlow 	vnode_t *vp = NULL;
1519*2f172c55SRobert Thurlow 	fattr4_mounted_on_fileid mnt_on_fileid;
1520*2f172c55SRobert Thurlow 	nfs4_ga_res_t garp;
1521*2f172c55SRobert Thurlow 	mntinfo4_t *mi;
1522*2f172c55SRobert Thurlow 	COMPOUND4res_clnt callres;
1523*2f172c55SRobert Thurlow 	hrtime_t t;
1524*2f172c55SRobert Thurlow 
1525*2f172c55SRobert Thurlow 	if (nfs4_no_referrals)
1526*2f172c55SRobert Thurlow 		return (NULL);
1527*2f172c55SRobert Thurlow 
1528*2f172c55SRobert Thurlow 	/*
1529*2f172c55SRobert Thurlow 	 * Get the mounted_on_fileid, unique on that server::fsid
1530*2f172c55SRobert Thurlow 	 */
1531*2f172c55SRobert Thurlow 	mi = VTOMI4(dvp);
1532*2f172c55SRobert Thurlow 	if (nfs4_fetch_locations(mi, VTOR4(dvp)->r_fh, nm, cr,
1533*2f172c55SRobert Thurlow 	    &garp, &callres, FALSE) == 0)
1534*2f172c55SRobert Thurlow 		return (NULL);
1535*2f172c55SRobert Thurlow 	mnt_on_fileid = garp.n4g_mon_fid;
1536*2f172c55SRobert Thurlow 	(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres);
1537*2f172c55SRobert Thurlow 
1538*2f172c55SRobert Thurlow 	/*
1539*2f172c55SRobert Thurlow 	 * Build a fake filehandle from the dir FH and the mounted_on_fileid
1540*2f172c55SRobert Thurlow 	 */
1541*2f172c55SRobert Thurlow 	dfh = &VTOR4(dvp)->r_fh->sfh_fh;
1542*2f172c55SRobert Thurlow 	stub_fh = kmem_alloc(sizeof (nfs_fh4), KM_SLEEP);
1543*2f172c55SRobert Thurlow 	stub_fh->nfs_fh4_val = kmem_alloc(dfh->nfs_fh4_len +
1544*2f172c55SRobert Thurlow 	    sizeof (fattr4_mounted_on_fileid), KM_SLEEP);
1545*2f172c55SRobert Thurlow 	newfhval = stub_fh->nfs_fh4_val;
1546*2f172c55SRobert Thurlow 
1547*2f172c55SRobert Thurlow 	/* copy directory's file handle */
1548*2f172c55SRobert Thurlow 	bcopy(dfh->nfs_fh4_val, newfhval, dfh->nfs_fh4_len);
1549*2f172c55SRobert Thurlow 	stub_fh->nfs_fh4_len = dfh->nfs_fh4_len;
1550*2f172c55SRobert Thurlow 	newfhval = newfhval + dfh->nfs_fh4_len;
1551*2f172c55SRobert Thurlow 
1552*2f172c55SRobert Thurlow 	/* Add mounted_on_fileid. Use bcopy to avoid alignment problem */
1553*2f172c55SRobert Thurlow 	bcopy((char *)&mnt_on_fileid, newfhval,
1554*2f172c55SRobert Thurlow 	    sizeof (fattr4_mounted_on_fileid));
1555*2f172c55SRobert Thurlow 	stub_fh->nfs_fh4_len += sizeof (fattr4_mounted_on_fileid);
1556*2f172c55SRobert Thurlow 
1557*2f172c55SRobert Thurlow 	sfhp = sfh4_put(stub_fh, VTOMI4(dvp), NULL);
1558*2f172c55SRobert Thurlow 	kmem_free(stub_fh->nfs_fh4_val, dfh->nfs_fh4_len +
1559*2f172c55SRobert Thurlow 	    sizeof (fattr4_mounted_on_fileid));
1560*2f172c55SRobert Thurlow 	kmem_free(stub_fh, sizeof (nfs_fh4));
1561*2f172c55SRobert Thurlow 	if (sfhp == NULL)
1562*2f172c55SRobert Thurlow 		return (NULL);
1563*2f172c55SRobert Thurlow 
1564*2f172c55SRobert Thurlow 	t = gethrtime();
1565*2f172c55SRobert Thurlow 	garp.n4g_va.va_type = VDIR;
1566*2f172c55SRobert Thurlow 	vp = makenfs4node(sfhp, NULL, dvp->v_vfsp, t,
1567*2f172c55SRobert Thurlow 	    cr, dvp, fn_get(VTOSV(dvp)->sv_name, nm, sfhp));
1568*2f172c55SRobert Thurlow 
1569*2f172c55SRobert Thurlow 	if (vp != NULL)
1570*2f172c55SRobert Thurlow 		vp->v_type = VDIR;
1571*2f172c55SRobert Thurlow 
1572*2f172c55SRobert Thurlow 	sfh4_rele(&sfhp);
1573*2f172c55SRobert Thurlow 	return (vp);
1574*2f172c55SRobert Thurlow }
1575*2f172c55SRobert Thurlow 
1576*2f172c55SRobert Thurlow int
1577*2f172c55SRobert Thurlow nfs4_setup_referral(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr)
1578*2f172c55SRobert Thurlow {
1579*2f172c55SRobert Thurlow 	vnode_t *nvp;
1580*2f172c55SRobert Thurlow 	rnode4_t *rp;
1581*2f172c55SRobert Thurlow 
1582*2f172c55SRobert Thurlow 	if ((nvp = find_referral_stubvp(dvp, nm, cr)) == NULL)
1583*2f172c55SRobert Thurlow 		return (EINVAL);
1584*2f172c55SRobert Thurlow 
1585*2f172c55SRobert Thurlow 	rp = VTOR4(nvp);
1586*2f172c55SRobert Thurlow 	mutex_enter(&rp->r_statelock);
1587*2f172c55SRobert Thurlow 	r4_stub_referral(rp);
1588*2f172c55SRobert Thurlow 	mutex_exit(&rp->r_statelock);
1589*2f172c55SRobert Thurlow 	dnlc_enter(dvp, nm, nvp);
1590*2f172c55SRobert Thurlow 
1591*2f172c55SRobert Thurlow 	if (*vpp != NULL)
1592*2f172c55SRobert Thurlow 		VN_RELE(*vpp);	/* no longer need this vnode */
1593*2f172c55SRobert Thurlow 
1594*2f172c55SRobert Thurlow 	*vpp = nvp;
1595*2f172c55SRobert Thurlow 
1596*2f172c55SRobert Thurlow 	return (0);
1597*2f172c55SRobert Thurlow }
1598*2f172c55SRobert Thurlow 
1599*2f172c55SRobert Thurlow /*
1600*2f172c55SRobert Thurlow  * Fetch the location information and resolve the new server.
1601*2f172c55SRobert Thurlow  * Caller needs to free up the XDR data which is returned.
1602*2f172c55SRobert Thurlow  * Input: mount info, shared filehandle, nodename
1603*2f172c55SRobert Thurlow  * Return: Index to the result or Error(-1)
1604*2f172c55SRobert Thurlow  * Output: FsLocations Info, Resolved Server Info.
1605*2f172c55SRobert Thurlow  */
1606*2f172c55SRobert Thurlow int
1607*2f172c55SRobert Thurlow nfs4_process_referral(mntinfo4_t *mi, nfs4_sharedfh_t *sfh,
1608*2f172c55SRobert Thurlow     char *nm, cred_t *cr, nfs4_ga_res_t *grp, COMPOUND4res_clnt *res,
1609*2f172c55SRobert Thurlow     struct nfs_fsl_info *fsloc)
1610*2f172c55SRobert Thurlow {
1611*2f172c55SRobert Thurlow 	fs_location4 *fsp;
1612*2f172c55SRobert Thurlow 	struct nfs_fsl_info nfsfsloc;
1613*2f172c55SRobert Thurlow 	int ret, i, error;
1614*2f172c55SRobert Thurlow 	nfs4_ga_res_t garp;
1615*2f172c55SRobert Thurlow 	COMPOUND4res_clnt callres;
1616*2f172c55SRobert Thurlow 	struct knetconfig *knc;
1617*2f172c55SRobert Thurlow 
1618*2f172c55SRobert Thurlow 	ret = nfs4_fetch_locations(mi, sfh, nm, cr, &garp, &callres, TRUE);
1619*2f172c55SRobert Thurlow 	if (ret == 0)
1620*2f172c55SRobert Thurlow 		return (-1);
1621*2f172c55SRobert Thurlow 
1622*2f172c55SRobert Thurlow 	/*
1623*2f172c55SRobert Thurlow 	 * As a lame attempt to figuring out if we're
1624*2f172c55SRobert Thurlow 	 * handling a migration event or a referral,
1625*2f172c55SRobert Thurlow 	 * look for rnodes with this fsid in the rnode
1626*2f172c55SRobert Thurlow 	 * cache.
1627*2f172c55SRobert Thurlow 	 *
1628*2f172c55SRobert Thurlow 	 * If we can find one or more such rnodes, it
1629*2f172c55SRobert Thurlow 	 * means we're handling a migration event and
1630*2f172c55SRobert Thurlow 	 * we want to bail out in that case.
1631*2f172c55SRobert Thurlow 	 */
1632*2f172c55SRobert Thurlow 	if (r4find_by_fsid(mi, &garp.n4g_fsid)) {
1633*2f172c55SRobert Thurlow 		DTRACE_PROBE3(nfs4clnt__debug__referral__migration,
1634*2f172c55SRobert Thurlow 		    mntinfo4_t *, mi, nfs4_ga_res_t *, &garp,
1635*2f172c55SRobert Thurlow 		    char *, "nfs4_process_referral");
1636*2f172c55SRobert Thurlow 		(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres);
1637*2f172c55SRobert Thurlow 		return (-1);
1638*2f172c55SRobert Thurlow 	}
1639*2f172c55SRobert Thurlow 
1640*2f172c55SRobert Thurlow 	/*
1641*2f172c55SRobert Thurlow 	 * Find the first responsive server to mount.  When we find
1642*2f172c55SRobert Thurlow 	 * one, fsp will point to it.
1643*2f172c55SRobert Thurlow 	 */
1644*2f172c55SRobert Thurlow 	for (i = 0; i < garp.n4g_ext_res->n4g_fslocations.locations_len; i++) {
1645*2f172c55SRobert Thurlow 
1646*2f172c55SRobert Thurlow 		fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[i];
1647*2f172c55SRobert Thurlow 		if (fsp->server_len == 0 || fsp->server_val == NULL)
1648*2f172c55SRobert Thurlow 			continue;
1649*2f172c55SRobert Thurlow 
1650*2f172c55SRobert Thurlow 		error = nfs4_callmapid(fsp->server_val, &nfsfsloc);
1651*2f172c55SRobert Thurlow 		if (error != 0)
1652*2f172c55SRobert Thurlow 			continue;
1653*2f172c55SRobert Thurlow 
1654*2f172c55SRobert Thurlow 		error = nfs4_ping_server_common(nfsfsloc.knconf,
1655*2f172c55SRobert Thurlow 		    nfsfsloc.addr, !(mi->mi_flags & MI4_INT));
1656*2f172c55SRobert Thurlow 		if (error == RPC_SUCCESS)
1657*2f172c55SRobert Thurlow 			break;
1658*2f172c55SRobert Thurlow 
1659*2f172c55SRobert Thurlow 		DTRACE_PROBE2(nfs4clnt__debug__referral__srvaddr,
1660*2f172c55SRobert Thurlow 		    sockaddr_in *, (struct sockaddr_in *)nfsfsloc.addr->buf,
1661*2f172c55SRobert Thurlow 		    char *, "nfs4_process_referral");
1662*2f172c55SRobert Thurlow 
1663*2f172c55SRobert Thurlow 		(void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc);
1664*2f172c55SRobert Thurlow 	}
1665*2f172c55SRobert Thurlow 	knc = nfsfsloc.knconf;
1666*2f172c55SRobert Thurlow 	if ((i >= garp.n4g_ext_res->n4g_fslocations.locations_len) ||
1667*2f172c55SRobert Thurlow 	    (knc->knc_protofmly == NULL) || (knc->knc_proto == NULL)) {
1668*2f172c55SRobert Thurlow 		DTRACE_PROBE2(nfs4clnt__debug__referral__nofsloc,
1669*2f172c55SRobert Thurlow 		    nfs4_ga_res_t *, &garp, char *, "nfs4_process_referral");
1670*2f172c55SRobert Thurlow 		(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres);
1671*2f172c55SRobert Thurlow 		return (-1);
1672*2f172c55SRobert Thurlow 	}
1673*2f172c55SRobert Thurlow 
1674*2f172c55SRobert Thurlow 	/* Send the results back */
1675*2f172c55SRobert Thurlow 	*fsloc = nfsfsloc;
1676*2f172c55SRobert Thurlow 	*grp = garp;
1677*2f172c55SRobert Thurlow 	*res = callres;
1678*2f172c55SRobert Thurlow 	return (i);
1679*2f172c55SRobert Thurlow }
1680*2f172c55SRobert Thurlow 
1681*2f172c55SRobert Thurlow /*
1682*2f172c55SRobert Thurlow  * Referrals case - need to fetch referral data and then upcall to
1683*2f172c55SRobert Thurlow  * user-level to get complete mount data.
1684*2f172c55SRobert Thurlow  */
1685*2f172c55SRobert Thurlow static ephemeral_servinfo_t *
1686*2f172c55SRobert Thurlow nfs4_trigger_esi_create_referral(vnode_t *vp, cred_t *cr)
1687*2f172c55SRobert Thurlow {
1688*2f172c55SRobert Thurlow 	struct knetconfig	*sikncp, *svkncp;
1689*2f172c55SRobert Thurlow 	struct netbuf		*bufp;
1690*2f172c55SRobert Thurlow 	ephemeral_servinfo_t	*esi;
1691*2f172c55SRobert Thurlow 	vnode_t			*dvp;
1692*2f172c55SRobert Thurlow 	rnode4_t		*drp;
1693*2f172c55SRobert Thurlow 	fs_location4		*fsp;
1694*2f172c55SRobert Thurlow 	struct nfs_fsl_info	nfsfsloc;
1695*2f172c55SRobert Thurlow 	nfs4_ga_res_t		garp;
1696*2f172c55SRobert Thurlow 	char			*p;
1697*2f172c55SRobert Thurlow 	char			fn[MAXNAMELEN];
1698*2f172c55SRobert Thurlow 	int			i, index = -1;
1699*2f172c55SRobert Thurlow 	mntinfo4_t		*mi;
1700*2f172c55SRobert Thurlow 	COMPOUND4res_clnt	callres;
1701*2f172c55SRobert Thurlow 
1702*2f172c55SRobert Thurlow 	/*
1703*2f172c55SRobert Thurlow 	 * If we're passed in a stub vnode that
1704*2f172c55SRobert Thurlow 	 * isn't a "referral" stub, bail out
1705*2f172c55SRobert Thurlow 	 * and return a failure
1706*2f172c55SRobert Thurlow 	 */
1707*2f172c55SRobert Thurlow 	if (!RP_ISSTUB_REFERRAL(VTOR4(vp)))
1708*2f172c55SRobert Thurlow 		return (NULL);
1709*2f172c55SRobert Thurlow 
1710*2f172c55SRobert Thurlow 	if (vtodv(vp, &dvp, CRED(), TRUE) != 0)
1711*2f172c55SRobert Thurlow 		return (NULL);
1712*2f172c55SRobert Thurlow 
1713*2f172c55SRobert Thurlow 	drp = VTOR4(dvp);
1714*2f172c55SRobert Thurlow 	if (nfs_rw_enter_sig(&drp->r_rwlock, RW_READER, INTR4(dvp))) {
1715*2f172c55SRobert Thurlow 		VN_RELE(dvp);
1716*2f172c55SRobert Thurlow 		return (NULL);
1717*2f172c55SRobert Thurlow 	}
1718*2f172c55SRobert Thurlow 
1719*2f172c55SRobert Thurlow 	if (vtoname(vp, fn, MAXNAMELEN) != 0) {
1720*2f172c55SRobert Thurlow 		nfs_rw_exit(&drp->r_rwlock);
1721*2f172c55SRobert Thurlow 		VN_RELE(dvp);
1722*2f172c55SRobert Thurlow 		return (NULL);
1723*2f172c55SRobert Thurlow 	}
1724*2f172c55SRobert Thurlow 
1725*2f172c55SRobert Thurlow 	mi = VTOMI4(dvp);
1726*2f172c55SRobert Thurlow 	index = nfs4_process_referral(mi, drp->r_fh, fn, cr,
1727*2f172c55SRobert Thurlow 	    &garp, &callres, &nfsfsloc);
1728*2f172c55SRobert Thurlow 	nfs_rw_exit(&drp->r_rwlock);
1729*2f172c55SRobert Thurlow 	VN_RELE(dvp);
1730*2f172c55SRobert Thurlow 	if (index < 0)
1731*2f172c55SRobert Thurlow 		return (NULL);
1732*2f172c55SRobert Thurlow 
1733*2f172c55SRobert Thurlow 	fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[index];
1734*2f172c55SRobert Thurlow 	esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP);
1735*2f172c55SRobert Thurlow 
1736*2f172c55SRobert Thurlow 	/* initially set to be our type of ephemeral mount; may be added to */
1737*2f172c55SRobert Thurlow 	esi->esi_mount_flags = NFSMNT_REFERRAL;
1738*2f172c55SRobert Thurlow 
1739*2f172c55SRobert Thurlow 	esi->esi_hostname =
1740*2f172c55SRobert Thurlow 	    kmem_zalloc(fsp->server_val->utf8string_len + 1, KM_SLEEP);
1741*2f172c55SRobert Thurlow 	bcopy(fsp->server_val->utf8string_val, esi->esi_hostname,
1742*2f172c55SRobert Thurlow 	    fsp->server_val->utf8string_len);
1743*2f172c55SRobert Thurlow 	esi->esi_hostname[fsp->server_val->utf8string_len] = '\0';
1744*2f172c55SRobert Thurlow 
1745*2f172c55SRobert Thurlow 	bufp = kmem_alloc(sizeof (struct netbuf), KM_SLEEP);
1746*2f172c55SRobert Thurlow 	bufp->len = nfsfsloc.addr->len;
1747*2f172c55SRobert Thurlow 	bufp->maxlen = nfsfsloc.addr->maxlen;
1748*2f172c55SRobert Thurlow 	bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP);
1749*2f172c55SRobert Thurlow 	bcopy(nfsfsloc.addr->buf, bufp->buf, bufp->len);
1750*2f172c55SRobert Thurlow 	esi->esi_addr = bufp;
1751*2f172c55SRobert Thurlow 
1752*2f172c55SRobert Thurlow 	esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP);
1753*2f172c55SRobert Thurlow 	sikncp = esi->esi_knconf;
1754*2f172c55SRobert Thurlow 
1755*2f172c55SRobert Thurlow 	DTRACE_PROBE2(nfs4clnt__debug__referral__nfsfsloc,
1756*2f172c55SRobert Thurlow 	    struct nfs_fsl_info *, &nfsfsloc,
1757*2f172c55SRobert Thurlow 	    char *, "nfs4_trigger_esi_create_referral");
1758*2f172c55SRobert Thurlow 
1759*2f172c55SRobert Thurlow 	svkncp = nfsfsloc.knconf;
1760*2f172c55SRobert Thurlow 	sikncp->knc_semantics = svkncp->knc_semantics;
1761*2f172c55SRobert Thurlow 	sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1762*2f172c55SRobert Thurlow 	(void) strlcat((char *)sikncp->knc_protofmly,
1763*2f172c55SRobert Thurlow 	    (char *)svkncp->knc_protofmly, KNC_STRSIZE);
1764*2f172c55SRobert Thurlow 	sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1765*2f172c55SRobert Thurlow 	(void) strlcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto,
1766*2f172c55SRobert Thurlow 	    KNC_STRSIZE);
1767*2f172c55SRobert Thurlow 	sikncp->knc_rdev = svkncp->knc_rdev;
1768*2f172c55SRobert Thurlow 
1769*2f172c55SRobert Thurlow 	DTRACE_PROBE2(nfs4clnt__debug__referral__knetconf,
1770*2f172c55SRobert Thurlow 	    struct knetconfig *, sikncp,
1771*2f172c55SRobert Thurlow 	    char *, "nfs4_trigger_esi_create_referral");
1772*2f172c55SRobert Thurlow 
1773*2f172c55SRobert Thurlow 	esi->esi_netname = kmem_zalloc(nfsfsloc.netnm_len, KM_SLEEP);
1774*2f172c55SRobert Thurlow 	bcopy(nfsfsloc.netname, esi->esi_netname, nfsfsloc.netnm_len);
1775*2f172c55SRobert Thurlow 	esi->esi_syncaddr = NULL;
1776*2f172c55SRobert Thurlow 
1777*2f172c55SRobert Thurlow 	esi->esi_path = p = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1778*2f172c55SRobert Thurlow 	esi->esi_path_len = MAXPATHLEN;
1779*2f172c55SRobert Thurlow 	*p++ = '/';
1780*2f172c55SRobert Thurlow 	for (i = 0; i < fsp->rootpath.pathname4_len; i++) {
1781*2f172c55SRobert Thurlow 		component4 *comp;
1782*2f172c55SRobert Thurlow 
1783*2f172c55SRobert Thurlow 		comp = &fsp->rootpath.pathname4_val[i];
1784*2f172c55SRobert Thurlow 		/* If no space, null the string and bail */
1785*2f172c55SRobert Thurlow 		if ((p - esi->esi_path) + comp->utf8string_len + 1 > MAXPATHLEN)
1786*2f172c55SRobert Thurlow 			goto err;
1787*2f172c55SRobert Thurlow 		bcopy(comp->utf8string_val, p, comp->utf8string_len);
1788*2f172c55SRobert Thurlow 		p += comp->utf8string_len;
1789*2f172c55SRobert Thurlow 		*p++ = '/';
1790*2f172c55SRobert Thurlow 	}
1791*2f172c55SRobert Thurlow 	if (fsp->rootpath.pathname4_len != 0)
1792*2f172c55SRobert Thurlow 		*(p - 1) = '\0';
1793*2f172c55SRobert Thurlow 	else
1794*2f172c55SRobert Thurlow 		*p = '\0';
1795*2f172c55SRobert Thurlow 	p = esi->esi_path;
1796*2f172c55SRobert Thurlow 	esi->esi_path = strdup(p);
1797*2f172c55SRobert Thurlow 	esi->esi_path_len = strlen(p) + 1;
1798*2f172c55SRobert Thurlow 	kmem_free(p, MAXPATHLEN);
1799*2f172c55SRobert Thurlow 
1800*2f172c55SRobert Thurlow 	/* Allocated in nfs4_process_referral() */
1801*2f172c55SRobert Thurlow 	(void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc);
1802*2f172c55SRobert Thurlow 	(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres);
1803*2f172c55SRobert Thurlow 
1804*2f172c55SRobert Thurlow 	return (esi);
1805*2f172c55SRobert Thurlow err:
1806*2f172c55SRobert Thurlow 	kmem_free(esi->esi_path, esi->esi_path_len);
1807*2f172c55SRobert Thurlow 	kmem_free(esi->esi_hostname, fsp->server_val->utf8string_len + 1);
1808*2f172c55SRobert Thurlow 	kmem_free(esi->esi_addr->buf, esi->esi_addr->len);
1809*2f172c55SRobert Thurlow 	kmem_free(esi->esi_addr, sizeof (struct netbuf));
1810*2f172c55SRobert Thurlow 	kmem_free(esi->esi_knconf->knc_protofmly, KNC_STRSIZE);
1811*2f172c55SRobert Thurlow 	kmem_free(esi->esi_knconf->knc_proto, KNC_STRSIZE);
1812*2f172c55SRobert Thurlow 	kmem_free(esi->esi_knconf, sizeof (*esi->esi_knconf));
1813*2f172c55SRobert Thurlow 	kmem_free(esi->esi_netname, nfsfsloc.netnm_len);
1814*2f172c55SRobert Thurlow 	kmem_free(esi, sizeof (ephemeral_servinfo_t));
1815*2f172c55SRobert Thurlow 	(void) xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc);
1816*2f172c55SRobert Thurlow 	(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres);
1817*2f172c55SRobert Thurlow 	return (NULL);
1818*2f172c55SRobert Thurlow }
1819*2f172c55SRobert Thurlow 
1820b9238976Sth /*
1821b9238976Sth  * Assemble the args, and call the generic VFS mount function to
1822b9238976Sth  * finally perform the ephemeral mount.
1823b9238976Sth  */
1824b9238976Sth static int
1825b9238976Sth nfs4_trigger_domount(vnode_t *stubvp, domount_args_t *dma, vfs_t **vfsp,
18266962f5b8SThomas Haynes     cred_t *cr, vnode_t **newvpp)
1827b9238976Sth {
1828b9238976Sth 	struct mounta	*uap;
1829b9238976Sth 	char		*mntpt, *orig_path, *path;
1830b9238976Sth 	const char	*orig_mntpt;
1831b9238976Sth 	int		retval;
1832b9238976Sth 	int		mntpt_len;
1833b9238976Sth 	int		spec_len;
1834b9238976Sth 	zone_t		*zone = curproc->p_zone;
1835b9238976Sth 	bool_t		has_leading_slash;
18366962f5b8SThomas Haynes 	int		i;
1837b9238976Sth 
1838b9238976Sth 	vfs_t			*stubvfsp = stubvp->v_vfsp;
1839b9238976Sth 	ephemeral_servinfo_t	*esi = dma->dma_esi;
1840b9238976Sth 	struct nfs_args		*nargs = dma->dma_nargs;
1841b9238976Sth 
1842b9238976Sth 	/* first, construct the mount point for the ephemeral mount */
1843b9238976Sth 	orig_path = path = fn_path(VTOSV(stubvp)->sv_name);
1844b9238976Sth 	orig_mntpt = (char *)refstr_value(stubvfsp->vfs_mntpt);
1845b9238976Sth 
1846b9238976Sth 	if (*orig_path == '.')
1847b9238976Sth 		orig_path++;
1848b9238976Sth 
1849b9238976Sth 	/*
1850b9238976Sth 	 * Get rid of zone's root path
1851b9238976Sth 	 */
1852b9238976Sth 	if (zone != global_zone) {
1853b9238976Sth 		/*
1854b9238976Sth 		 * -1 for trailing '/' and -1 for EOS.
1855b9238976Sth 		 */
1856b9238976Sth 		if (strncmp(zone->zone_rootpath, orig_mntpt,
1857b9238976Sth 		    zone->zone_rootpathlen - 1) == 0) {
1858b9238976Sth 			orig_mntpt += (zone->zone_rootpathlen - 2);
1859b9238976Sth 		}
1860b9238976Sth 	}
1861b9238976Sth 
1862b9238976Sth 	mntpt_len = strlen(orig_mntpt) + strlen(orig_path);
1863b9238976Sth 	mntpt = kmem_zalloc(mntpt_len + 1, KM_SLEEP);
1864b9238976Sth 	(void) strcat(mntpt, orig_mntpt);
1865b9238976Sth 	(void) strcat(mntpt, orig_path);
1866b9238976Sth 
1867b9238976Sth 	kmem_free(path, strlen(path) + 1);
1868b9238976Sth 	path = esi->esi_path;
1869b9238976Sth 	if (*path == '.')
1870b9238976Sth 		path++;
1871b9238976Sth 	if (path[0] == '/' && path[1] == '/')
1872b9238976Sth 		path++;
1873b9238976Sth 	has_leading_slash = (*path == '/');
1874b9238976Sth 
1875b9238976Sth 	spec_len = strlen(dma->dma_hostlist);
1876b9238976Sth 	spec_len += strlen(path);
1877b9238976Sth 
1878b9238976Sth 	/* We are going to have to add this in */
1879b9238976Sth 	if (!has_leading_slash)
1880b9238976Sth 		spec_len++;
1881b9238976Sth 
1882b9238976Sth 	/* We need to get the ':' for dma_hostlist:esi_path */
1883b9238976Sth 	spec_len++;
1884b9238976Sth 
1885b9238976Sth 	uap = kmem_zalloc(sizeof (struct mounta), KM_SLEEP);
1886b9238976Sth 	uap->spec = kmem_zalloc(spec_len + 1, KM_SLEEP);
1887b9238976Sth 	(void) snprintf(uap->spec, spec_len + 1, "%s:%s%s", dma->dma_hostlist,
1888b9238976Sth 	    has_leading_slash ? "" : "/", path);
1889b9238976Sth 
1890b9238976Sth 	uap->dir = mntpt;
1891b9238976Sth 
1892b9238976Sth 	uap->flags = MS_SYSSPACE | MS_DATA;
1893b9238976Sth 	/* fstype-independent mount options not covered elsewhere */
1894b9238976Sth 	/* copy parent's mount(1M) "-m" flag */
1895b9238976Sth 	if (stubvfsp->vfs_flag & VFS_NOMNTTAB)
1896b9238976Sth 		uap->flags |= MS_NOMNTTAB;
1897b9238976Sth 
1898b9238976Sth 	uap->fstype = MNTTYPE_NFS4;
1899b9238976Sth 	uap->dataptr = (char *)nargs;
1900b9238976Sth 	/* not needed for MS_SYSSPACE */
1901b9238976Sth 	uap->datalen = 0;
1902b9238976Sth 
1903b9238976Sth 	/* use optptr to pass in extra mount options */
1904b9238976Sth 	uap->flags |= MS_OPTIONSTR;
1905b9238976Sth 	uap->optptr = nfs4_trigger_create_mntopts(stubvfsp);
1906b9238976Sth 	if (uap->optptr == NULL) {
1907b9238976Sth 		retval = EINVAL;
1908b9238976Sth 		goto done;
1909b9238976Sth 	}
1910546a3997SThomas Haynes 
1911b9238976Sth 	/* domount() expects us to count the trailing NUL */
1912b9238976Sth 	uap->optlen = strlen(uap->optptr) + 1;
1913b9238976Sth 
19146962f5b8SThomas Haynes 	/*
19156962f5b8SThomas Haynes 	 * If we get EBUSY, we try again once to see if we can perform
19166962f5b8SThomas Haynes 	 * the mount. We do this because of a spurious race condition.
19176962f5b8SThomas Haynes 	 */
19186962f5b8SThomas Haynes 	for (i = 0; i < 2; i++) {
19196962f5b8SThomas Haynes 		int	error;
19206962f5b8SThomas Haynes 		bool_t	was_mounted;
19216962f5b8SThomas Haynes 
19226962f5b8SThomas Haynes 		retval = domount(NULL, uap, stubvp, cr, vfsp);
19236962f5b8SThomas Haynes 		if (retval == 0) {
19246962f5b8SThomas Haynes 			retval = VFS_ROOT(*vfsp, newvpp);
19256962f5b8SThomas Haynes 			VFS_RELE(*vfsp);
19266962f5b8SThomas Haynes 			break;
19276962f5b8SThomas Haynes 		} else if (retval != EBUSY) {
19286962f5b8SThomas Haynes 			break;
19296962f5b8SThomas Haynes 		}
19306962f5b8SThomas Haynes 
19316962f5b8SThomas Haynes 		/*
19326962f5b8SThomas Haynes 		 * We might find it mounted by the other racer...
19336962f5b8SThomas Haynes 		 */
19346962f5b8SThomas Haynes 		error = nfs4_trigger_mounted_already(stubvp,
19356962f5b8SThomas Haynes 		    newvpp, &was_mounted, vfsp);
19366962f5b8SThomas Haynes 		if (error) {
19376962f5b8SThomas Haynes 			goto done;
19386962f5b8SThomas Haynes 		} else if (was_mounted) {
19396962f5b8SThomas Haynes 			retval = 0;
19406962f5b8SThomas Haynes 			break;
19416962f5b8SThomas Haynes 		}
19426962f5b8SThomas Haynes 	}
1943546a3997SThomas Haynes 
1944b9238976Sth done:
1945b9238976Sth 	if (uap->optptr)
1946b9238976Sth 		nfs4_trigger_destroy_mntopts(uap->optptr);
1947b9238976Sth 
1948b9238976Sth 	kmem_free(uap->spec, spec_len + 1);
1949b9238976Sth 	kmem_free(uap, sizeof (struct mounta));
1950b9238976Sth 	kmem_free(mntpt, mntpt_len + 1);
1951b9238976Sth 
1952b9238976Sth 	return (retval);
1953b9238976Sth }
1954b9238976Sth 
1955b9238976Sth /*
1956b9238976Sth  * Build an nfs_args structure for passing to domount().
1957b9238976Sth  *
1958b9238976Sth  * Ephemeral mount-type specific data comes from the ephemeral_servinfo_t;
1959b9238976Sth  * generic data - common to all ephemeral mount types - is read directly
1960b9238976Sth  * from the parent mount's servinfo4_t and mntinfo4_t, via the stub vnode.
1961b9238976Sth  */
1962b9238976Sth static struct nfs_args *
1963b9238976Sth nfs4_trigger_nargs_create(mntinfo4_t *mi, servinfo4_t *svp,
1964b9238976Sth     ephemeral_servinfo_t *esi)
1965b9238976Sth {
1966b9238976Sth 	sec_data_t *secdata;
1967b9238976Sth 	struct nfs_args *nargs;
1968b9238976Sth 
1969b9238976Sth 	/* setup the nfs args */
1970b9238976Sth 	nargs = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP);
1971b9238976Sth 
1972b9238976Sth 	(void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1973b9238976Sth 
1974b9238976Sth 	nargs->addr = esi->esi_addr;
1975b9238976Sth 
1976b9238976Sth 	/* for AUTH_DH by negotiation */
1977b9238976Sth 	if (esi->esi_syncaddr || esi->esi_netname) {
1978b9238976Sth 		nargs->flags |= NFSMNT_SECURE;
1979b9238976Sth 		nargs->syncaddr = esi->esi_syncaddr;
1980b9238976Sth 		nargs->netname = esi->esi_netname;
1981b9238976Sth 	}
1982b9238976Sth 
1983b9238976Sth 	nargs->flags |= NFSMNT_KNCONF;
1984b9238976Sth 	nargs->knconf = esi->esi_knconf;
1985b9238976Sth 	nargs->flags |= NFSMNT_HOSTNAME;
1986b9238976Sth 	nargs->hostname = esi->esi_hostname;
1987b9238976Sth 	nargs->fh = esi->esi_path;
1988b9238976Sth 
1989b9238976Sth 	/* general mount settings, all copied from parent mount */
1990b9238976Sth 	mutex_enter(&mi->mi_lock);
1991b9238976Sth 
1992b9238976Sth 	if (!(mi->mi_flags & MI4_HARD))
1993b9238976Sth 		nargs->flags |= NFSMNT_SOFT;
1994b9238976Sth 
1995b9238976Sth 	nargs->flags |= NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_TIMEO |
1996b9238976Sth 	    NFSMNT_RETRANS;
1997b9238976Sth 	nargs->wsize = mi->mi_stsize;
1998b9238976Sth 	nargs->rsize = mi->mi_tsize;
1999b9238976Sth 	nargs->timeo = mi->mi_timeo;
2000b9238976Sth 	nargs->retrans = mi->mi_retrans;
2001b9238976Sth 
2002b9238976Sth 	if (mi->mi_flags & MI4_INT)
2003b9238976Sth 		nargs->flags |= NFSMNT_INT;
2004b9238976Sth 	if (mi->mi_flags & MI4_NOAC)
2005b9238976Sth 		nargs->flags |= NFSMNT_NOAC;
2006b9238976Sth 
2007b9238976Sth 	nargs->flags |= NFSMNT_ACREGMIN | NFSMNT_ACREGMAX | NFSMNT_ACDIRMIN |
2008b9238976Sth 	    NFSMNT_ACDIRMAX;
2009b9238976Sth 	nargs->acregmin = HR2SEC(mi->mi_acregmin);
2010b9238976Sth 	nargs->acregmax = HR2SEC(mi->mi_acregmax);
2011b9238976Sth 	nargs->acdirmin = HR2SEC(mi->mi_acdirmin);
2012b9238976Sth 	nargs->acdirmax = HR2SEC(mi->mi_acdirmax);
2013b9238976Sth 
2014*2f172c55SRobert Thurlow 	/* add any specific flags for this type of ephemeral mount */
2015*2f172c55SRobert Thurlow 	nargs->flags |= esi->esi_mount_flags;
2016*2f172c55SRobert Thurlow 
2017b9238976Sth 	if (mi->mi_flags & MI4_NOCTO)
2018b9238976Sth 		nargs->flags |= NFSMNT_NOCTO;
2019b9238976Sth 	if (mi->mi_flags & MI4_GRPID)
2020b9238976Sth 		nargs->flags |= NFSMNT_GRPID;
2021b9238976Sth 	if (mi->mi_flags & MI4_LLOCK)
2022b9238976Sth 		nargs->flags |= NFSMNT_LLOCK;
2023b9238976Sth 	if (mi->mi_flags & MI4_NOPRINT)
2024b9238976Sth 		nargs->flags |= NFSMNT_NOPRINT;
2025b9238976Sth 	if (mi->mi_flags & MI4_DIRECTIO)
2026b9238976Sth 		nargs->flags |= NFSMNT_DIRECTIO;
2027*2f172c55SRobert Thurlow 	if (mi->mi_flags & MI4_PUBLIC && nargs->flags & NFSMNT_MIRRORMOUNT)
2028b9238976Sth 		nargs->flags |= NFSMNT_PUBLIC;
2029b9238976Sth 
2030*2f172c55SRobert Thurlow 	/* Do some referral-specific option tweaking */
2031*2f172c55SRobert Thurlow 	if (nargs->flags & NFSMNT_REFERRAL) {
2032*2f172c55SRobert Thurlow 		nargs->flags &= ~NFSMNT_DORDMA;
2033*2f172c55SRobert Thurlow 		nargs->flags |= NFSMNT_TRYRDMA;
2034*2f172c55SRobert Thurlow 	}
2035b9238976Sth 
2036*2f172c55SRobert Thurlow 	mutex_exit(&mi->mi_lock);
2037b9238976Sth 
2038b9238976Sth 	/*
2039b9238976Sth 	 * Security data & negotiation policy.
2040b9238976Sth 	 *
2041*2f172c55SRobert Thurlow 	 * For mirror mounts, we need to preserve the parent mount's
2042*2f172c55SRobert Thurlow 	 * preference for security negotiation, translating SV4_TRYSECDEFAULT
2043*2f172c55SRobert Thurlow 	 * to NFSMNT_SECDEFAULT if present.
2044*2f172c55SRobert Thurlow 	 *
2045*2f172c55SRobert Thurlow 	 * For referrals, we always want security negotiation and will
2046*2f172c55SRobert Thurlow 	 * set NFSMNT_SECDEFAULT and we will not copy current secdata.
2047*2f172c55SRobert Thurlow 	 * The reason is that we can't negotiate down from a parent's
2048*2f172c55SRobert Thurlow 	 * Kerberos flavor to AUTH_SYS.
2049b9238976Sth 	 *
2050b9238976Sth 	 * If SV4_TRYSECDEFAULT is not set, that indicates that a specific
2051b9238976Sth 	 * security flavour was requested, with data in sv_secdata, and that
2052b9238976Sth 	 * no negotiation should occur. If this specified flavour fails, that's
2053b9238976Sth 	 * it. We will copy sv_secdata, and not set NFSMNT_SECDEFAULT.
2054b9238976Sth 	 *
2055b9238976Sth 	 * If SV4_TRYSECDEFAULT is set, then we start with a passed-in
2056b9238976Sth 	 * default flavour, in sv_secdata, but then negotiate a new flavour.
2057b9238976Sth 	 * Possible flavours are recorded in an array in sv_secinfo, with
2058b9238976Sth 	 * currently in-use flavour pointed to by sv_currsec.
2059b9238976Sth 	 *
2060b9238976Sth 	 * If sv_currsec is set, i.e. if negotiation has already occurred,
2061b9238976Sth 	 * we will copy sv_currsec. Otherwise, copy sv_secdata. Regardless,
2062b9238976Sth 	 * we will set NFSMNT_SECDEFAULT, to enable negotiation.
2063b9238976Sth 	 */
2064*2f172c55SRobert Thurlow 	if (nargs->flags & NFSMNT_REFERRAL) {
2065*2f172c55SRobert Thurlow 		/* enable negotiation for referral mount */
2066*2f172c55SRobert Thurlow 		nargs->flags |= NFSMNT_SECDEFAULT;
2067*2f172c55SRobert Thurlow 		secdata = kmem_alloc(sizeof (sec_data_t), KM_SLEEP);
2068*2f172c55SRobert Thurlow 		secdata->secmod = secdata->rpcflavor = AUTH_SYS;
2069*2f172c55SRobert Thurlow 		secdata->data = NULL;
2070*2f172c55SRobert Thurlow 	}
2071*2f172c55SRobert Thurlow 
2072*2f172c55SRobert Thurlow 	else if (svp->sv_flags & SV4_TRYSECDEFAULT) {
2073*2f172c55SRobert Thurlow 		/* enable negotiation for mirror mount */
2074b9238976Sth 		nargs->flags |= NFSMNT_SECDEFAULT;
2075b9238976Sth 
2076b9238976Sth 		/*
2077b9238976Sth 		 * As a starting point for negotiation, copy parent
2078b9238976Sth 		 * mount's negotiated flavour (sv_currsec) if available,
2079b9238976Sth 		 * or its passed-in flavour (sv_secdata) if not.
2080b9238976Sth 		 */
2081b9238976Sth 		if (svp->sv_currsec != NULL)
2082b9238976Sth 			secdata = copy_sec_data(svp->sv_currsec);
2083b9238976Sth 		else if (svp->sv_secdata != NULL)
2084b9238976Sth 			secdata = copy_sec_data(svp->sv_secdata);
2085b9238976Sth 		else
2086b9238976Sth 			secdata = NULL;
2087b9238976Sth 	} else {
2088b9238976Sth 		/* do not enable negotiation; copy parent's passed-in flavour */
2089b9238976Sth 		if (svp->sv_secdata != NULL)
2090b9238976Sth 			secdata = copy_sec_data(svp->sv_secdata);
2091b9238976Sth 		else
2092b9238976Sth 			secdata = NULL;
2093b9238976Sth 	}
2094b9238976Sth 
2095b9238976Sth 	nfs_rw_exit(&svp->sv_lock);
2096b9238976Sth 
2097b9238976Sth 	nargs->flags |= NFSMNT_NEWARGS;
2098b9238976Sth 	nargs->nfs_args_ext = NFS_ARGS_EXTB;
2099b9238976Sth 	nargs->nfs_ext_u.nfs_extB.secdata = secdata;
2100b9238976Sth 
2101b9238976Sth 	/* for NFS RO failover; caller will set if necessary */
2102b9238976Sth 	nargs->nfs_ext_u.nfs_extB.next = NULL;
2103b9238976Sth 
2104b9238976Sth 	return (nargs);
2105b9238976Sth }
2106b9238976Sth 
2107b9238976Sth static void
2108b9238976Sth nfs4_trigger_nargs_destroy(struct nfs_args *nargs)
2109b9238976Sth {
2110b9238976Sth 	/*
2111b9238976Sth 	 * Either the mount failed, in which case the data is not needed, or
2112b9238976Sth 	 * nfs4_mount() has either taken copies of what it needs or,
2113b9238976Sth 	 * where it has merely copied the ptr, it has set *our* ptr to NULL,
2114b9238976Sth 	 * whereby nfs4_free_args() will ignore it.
2115b9238976Sth 	 */
2116b9238976Sth 	nfs4_free_args(nargs);
2117b9238976Sth 	kmem_free(nargs, sizeof (struct nfs_args));
2118b9238976Sth }
2119b9238976Sth 
2120b9238976Sth /*
2121b9238976Sth  * When we finally get into the mounting, we need to add this
2122b9238976Sth  * node to the ephemeral tree.
2123b9238976Sth  *
2124b9238976Sth  * This is called from nfs4_mount().
2125b9238976Sth  */
2126d3a14591SThomas Haynes int
2127b9238976Sth nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp)
2128b9238976Sth {
2129b9238976Sth 	mntinfo4_t		*mi_parent;
2130b9238976Sth 	nfs4_ephemeral_t	*eph;
2131b9238976Sth 	nfs4_ephemeral_tree_t	*net;
2132b9238976Sth 
2133b9238976Sth 	nfs4_ephemeral_t	*prior;
2134b9238976Sth 	nfs4_ephemeral_t	*child;
2135b9238976Sth 
2136b9238976Sth 	nfs4_ephemeral_t	*peer;
2137b9238976Sth 
2138b9238976Sth 	nfs4_trigger_globals_t	*ntg;
2139b9238976Sth 	zone_t			*zone = curproc->p_zone;
2140b9238976Sth 
2141d3a14591SThomas Haynes 	int			rc = 0;
2142d3a14591SThomas Haynes 
2143b9238976Sth 	mi_parent = VTOMI4(mvp);
2144b9238976Sth 
2145b9238976Sth 	/*
2146b9238976Sth 	 * Get this before grabbing anything else!
2147b9238976Sth 	 */
2148b9238976Sth 	ntg = zone_getspecific(nfs4_ephemeral_key, zone);
2149b9238976Sth 	if (!ntg->ntg_thread_started) {
2150b9238976Sth 		nfs4_ephemeral_start_harvester(ntg);
2151b9238976Sth 	}
2152b9238976Sth 
2153b9238976Sth 	mutex_enter(&mi_parent->mi_lock);
2154b9238976Sth 	mutex_enter(&mi->mi_lock);
2155b9238976Sth 
2156d3a14591SThomas Haynes 	net = mi->mi_ephemeral_tree =
2157d3a14591SThomas Haynes 	    mi_parent->mi_ephemeral_tree;
2158d3a14591SThomas Haynes 
2159d3a14591SThomas Haynes 	/*
2160d3a14591SThomas Haynes 	 * If the mi_ephemeral_tree is NULL, then it
2161d3a14591SThomas Haynes 	 * means that either the harvester or a manual
2162d3a14591SThomas Haynes 	 * umount has cleared the tree out right before
2163d3a14591SThomas Haynes 	 * we got here.
2164d3a14591SThomas Haynes 	 *
2165d3a14591SThomas Haynes 	 * There is nothing we can do here, so return
2166d3a14591SThomas Haynes 	 * to the caller and let them decide whether they
2167d3a14591SThomas Haynes 	 * try again.
2168d3a14591SThomas Haynes 	 */
2169d3a14591SThomas Haynes 	if (net == NULL) {
2170d3a14591SThomas Haynes 		mutex_exit(&mi->mi_lock);
2171d3a14591SThomas Haynes 		mutex_exit(&mi_parent->mi_lock);
2172d3a14591SThomas Haynes 
2173d3a14591SThomas Haynes 		return (EBUSY);
2174d3a14591SThomas Haynes 	}
2175d3a14591SThomas Haynes 
2176*2f172c55SRobert Thurlow 	/*
2177*2f172c55SRobert Thurlow 	 * We've just tied the mntinfo to the tree, so
2178*2f172c55SRobert Thurlow 	 * now we bump the refcnt and hold it there until
2179*2f172c55SRobert Thurlow 	 * this mntinfo is removed from the tree.
2180*2f172c55SRobert Thurlow 	 */
2181d3a14591SThomas Haynes 	nfs4_ephemeral_tree_hold(net);
2182d3a14591SThomas Haynes 
2183b9238976Sth 	/*
2184b9238976Sth 	 * We need to tack together the ephemeral mount
2185b9238976Sth 	 * with this new mntinfo.
2186b9238976Sth 	 */
2187b9238976Sth 	eph = kmem_zalloc(sizeof (*eph), KM_SLEEP);
2188b9238976Sth 	eph->ne_mount = mi;
2189b9238976Sth 	eph->ne_ref_time = gethrestime_sec();
2190b9238976Sth 
2191b9238976Sth 	/*
2192b9238976Sth 	 * We need to tell the ephemeral mount when
2193b9238976Sth 	 * to time out.
2194b9238976Sth 	 */
2195b9238976Sth 	eph->ne_mount_to = ntg->ntg_mount_to;
2196b9238976Sth 
2197b9238976Sth 	mi->mi_ephemeral = eph;
2198b9238976Sth 
2199b9238976Sth 	/*
2200b9238976Sth 	 * If the enclosing mntinfo4 is also ephemeral,
2201b9238976Sth 	 * then we need to point to its enclosing parent.
2202b9238976Sth 	 * Else the enclosing mntinfo4 is the enclosing parent.
2203b9238976Sth 	 *
2204b9238976Sth 	 * We also need to weave this ephemeral node
2205b9238976Sth 	 * into the tree.
2206b9238976Sth 	 */
2207b9238976Sth 	if (mi_parent->mi_flags & MI4_EPHEMERAL) {
2208b9238976Sth 		/*
2209b9238976Sth 		 * We need to decide if we are
2210b9238976Sth 		 * the root node of this branch
2211b9238976Sth 		 * or if we are a sibling of this
2212b9238976Sth 		 * branch.
2213b9238976Sth 		 */
2214b9238976Sth 		prior = mi_parent->mi_ephemeral;
2215d3a14591SThomas Haynes 		if (prior == NULL) {
2216d3a14591SThomas Haynes 			/*
2217d3a14591SThomas Haynes 			 * Race condition, clean up, and
2218d3a14591SThomas Haynes 			 * let caller handle mntinfo.
2219d3a14591SThomas Haynes 			 */
2220d3a14591SThomas Haynes 			mi->mi_flags &= ~MI4_EPHEMERAL;
2221d3a14591SThomas Haynes 			mi->mi_ephemeral = NULL;
2222d3a14591SThomas Haynes 			kmem_free(eph, sizeof (*eph));
2223*2f172c55SRobert Thurlow 			nfs4_ephemeral_tree_rele(net);
2224d3a14591SThomas Haynes 			rc = EBUSY;
2225b9238976Sth 		} else {
2226d3a14591SThomas Haynes 			if (prior->ne_child == NULL) {
2227d3a14591SThomas Haynes 				prior->ne_child = eph;
2228d3a14591SThomas Haynes 			} else {
2229d3a14591SThomas Haynes 				child = prior->ne_child;
2230b9238976Sth 
2231d3a14591SThomas Haynes 				prior->ne_child = eph;
2232d3a14591SThomas Haynes 				eph->ne_peer = child;
2233b9238976Sth 
2234d3a14591SThomas Haynes 				child->ne_prior = eph;
2235d3a14591SThomas Haynes 			}
2236b9238976Sth 
2237d3a14591SThomas Haynes 			eph->ne_prior = prior;
2238d3a14591SThomas Haynes 		}
2239b9238976Sth 	} else {
2240b9238976Sth 		/*
2241b9238976Sth 		 * The parent mntinfo4 is the non-ephemeral
2242b9238976Sth 		 * root of the ephemeral tree. We
2243b9238976Sth 		 * need to decide if we are the root
2244b9238976Sth 		 * node of that tree or if we are a
2245b9238976Sth 		 * sibling of the root node.
2246b9238976Sth 		 *
2247b9238976Sth 		 * We are the root if there is no
2248b9238976Sth 		 * other node.
2249b9238976Sth 		 */
2250b9238976Sth 		if (net->net_root == NULL) {
2251b9238976Sth 			net->net_root = eph;
2252b9238976Sth 		} else {
2253b9238976Sth 			eph->ne_peer = peer = net->net_root;
2254b9238976Sth 			ASSERT(peer != NULL);
2255b9238976Sth 			net->net_root = eph;
2256b9238976Sth 
2257b9238976Sth 			peer->ne_prior = eph;
2258b9238976Sth 		}
2259b9238976Sth 
2260b9238976Sth 		eph->ne_prior = NULL;
2261b9238976Sth 	}
2262b9238976Sth 
2263b9238976Sth 	mutex_exit(&mi->mi_lock);
2264b9238976Sth 	mutex_exit(&mi_parent->mi_lock);
2265d3a14591SThomas Haynes 
2266d3a14591SThomas Haynes 	return (rc);
2267b9238976Sth }
2268b9238976Sth 
2269b9238976Sth /*
2270b9238976Sth  * Commit the changes to the ephemeral tree for removing this node.
2271b9238976Sth  */
2272b9238976Sth static void
2273b9238976Sth nfs4_ephemeral_umount_cleanup(nfs4_ephemeral_t *eph)
2274b9238976Sth {
2275b9238976Sth 	nfs4_ephemeral_t	*e = eph;
2276b9238976Sth 	nfs4_ephemeral_t	*peer;
2277b9238976Sth 	nfs4_ephemeral_t	*prior;
2278b9238976Sth 
2279b9238976Sth 	peer = eph->ne_peer;
2280b9238976Sth 	prior = e->ne_prior;
2281b9238976Sth 
2282b9238976Sth 	/*
2283b9238976Sth 	 * If this branch root was not the
2284b9238976Sth 	 * tree root, then we need to fix back pointers.
2285b9238976Sth 	 */
2286b9238976Sth 	if (prior) {
2287b9238976Sth 		if (prior->ne_child == e) {
2288b9238976Sth 			prior->ne_child = peer;
2289b9238976Sth 		} else {
2290b9238976Sth 			prior->ne_peer = peer;
2291b9238976Sth 		}
2292b9238976Sth 
2293b9238976Sth 		if (peer)
2294b9238976Sth 			peer->ne_prior = prior;
2295b9238976Sth 	} else if (peer) {
2296b9238976Sth 		peer->ne_mount->mi_ephemeral_tree->net_root = peer;
2297b9238976Sth 		peer->ne_prior = NULL;
2298b9238976Sth 	} else {
2299b9238976Sth 		e->ne_mount->mi_ephemeral_tree->net_root = NULL;
2300b9238976Sth 	}
2301b9238976Sth }
2302b9238976Sth 
2303b9238976Sth /*
2304b9238976Sth  * We want to avoid recursion at all costs. So we need to
2305b9238976Sth  * unroll the tree. We do this by a depth first traversal to
2306b9238976Sth  * leaf nodes. We blast away the leaf and work our way back
2307b9238976Sth  * up and down the tree.
2308b9238976Sth  */
2309b9238976Sth static int
2310b9238976Sth nfs4_ephemeral_unmount_engine(nfs4_ephemeral_t *eph,
2311b9238976Sth     int isTreeRoot, int flag, cred_t *cr)
2312b9238976Sth {
2313b9238976Sth 	nfs4_ephemeral_t	*e = eph;
2314b9238976Sth 	nfs4_ephemeral_t	*prior;
2315b9238976Sth 	mntinfo4_t		*mi;
2316b9238976Sth 	vfs_t			*vfsp;
2317b9238976Sth 	int			error;
2318b9238976Sth 
2319b9238976Sth 	/*
2320b9238976Sth 	 * We use the loop while unrolling the ephemeral tree.
2321b9238976Sth 	 */
2322b9238976Sth 	for (;;) {
2323b9238976Sth 		/*
2324b9238976Sth 		 * First we walk down the child.
2325b9238976Sth 		 */
2326b9238976Sth 		if (e->ne_child) {
2327b9238976Sth 			prior = e;
2328b9238976Sth 			e = e->ne_child;
2329b9238976Sth 			continue;
2330b9238976Sth 		}
2331b9238976Sth 
2332b9238976Sth 		/*
2333b9238976Sth 		 * If we are the root of the branch we are removing,
2334b9238976Sth 		 * we end it here. But if the branch is the root of
2335b9238976Sth 		 * the tree, we have to forge on. We do not consider
2336b9238976Sth 		 * the peer list for the root because while it may
2337b9238976Sth 		 * be okay to remove, it is both extra work and a
2338b9238976Sth 		 * potential for a false-positive error to stall the
2339b9238976Sth 		 * unmount attempt.
2340b9238976Sth 		 */
2341b9238976Sth 		if (e == eph && isTreeRoot == FALSE)
2342b9238976Sth 			return (0);
2343b9238976Sth 
2344b9238976Sth 		/*
2345b9238976Sth 		 * Next we walk down the peer list.
2346b9238976Sth 		 */
2347b9238976Sth 		if (e->ne_peer) {
2348b9238976Sth 			prior = e;
2349b9238976Sth 			e = e->ne_peer;
2350b9238976Sth 			continue;
2351b9238976Sth 		}
2352b9238976Sth 
2353b9238976Sth 		/*
2354b9238976Sth 		 * We can only remove the node passed in by the
2355b9238976Sth 		 * caller if it is the root of the ephemeral tree.
2356b9238976Sth 		 * Otherwise, the caller will remove it.
2357b9238976Sth 		 */
2358b9238976Sth 		if (e == eph && isTreeRoot == FALSE)
2359b9238976Sth 			return (0);
2360b9238976Sth 
2361b9238976Sth 		/*
2362b9238976Sth 		 * Okay, we have a leaf node, time
2363b9238976Sth 		 * to prune it!
2364b9238976Sth 		 *
2365b9238976Sth 		 * Note that prior can only be NULL if
2366b9238976Sth 		 * and only if it is the root of the
2367b9238976Sth 		 * ephemeral tree.
2368b9238976Sth 		 */
2369b9238976Sth 		prior = e->ne_prior;
2370b9238976Sth 
2371b9238976Sth 		mi = e->ne_mount;
2372b9238976Sth 		mutex_enter(&mi->mi_lock);
2373b9238976Sth 		vfsp = mi->mi_vfsp;
2374b9238976Sth 
2375b9238976Sth 		/*
2376b9238976Sth 		 * Cleared by umount2_engine.
2377b9238976Sth 		 */
2378b9238976Sth 		VFS_HOLD(vfsp);
2379b9238976Sth 
2380b9238976Sth 		/*
2381b9238976Sth 		 * Inform nfs4_unmount to not recursively
2382b9238976Sth 		 * descend into this node's children when it
2383b9238976Sth 		 * gets processed.
2384b9238976Sth 		 */
2385b9238976Sth 		mi->mi_flags |= MI4_EPHEMERAL_RECURSED;
2386b9238976Sth 		mutex_exit(&mi->mi_lock);
2387b9238976Sth 
2388b9238976Sth 		error = umount2_engine(vfsp, flag, cr, FALSE);
2389b9238976Sth 		if (error) {
2390b9238976Sth 			/*
2391b9238976Sth 			 * We need to reenable nfs4_unmount's ability
2392b9238976Sth 			 * to recursively descend on this node.
2393b9238976Sth 			 */
2394b9238976Sth 			mutex_enter(&mi->mi_lock);
2395b9238976Sth 			mi->mi_flags &= ~MI4_EPHEMERAL_RECURSED;
2396b9238976Sth 			mutex_exit(&mi->mi_lock);
2397b9238976Sth 
2398b9238976Sth 			return (error);
2399b9238976Sth 		}
2400b9238976Sth 
2401b9238976Sth 		/*
2402b9238976Sth 		 * If we are the current node, we do not want to
2403b9238976Sth 		 * touch anything else. At this point, the only
2404b9238976Sth 		 * way the current node can have survived to here
2405b9238976Sth 		 * is if it is the root of the ephemeral tree and
2406b9238976Sth 		 * we are unmounting the enclosing mntinfo4.
2407b9238976Sth 		 */
2408b9238976Sth 		if (e == eph) {
2409b9238976Sth 			ASSERT(prior == NULL);
2410b9238976Sth 			return (0);
2411b9238976Sth 		}
2412b9238976Sth 
2413b9238976Sth 		/*
2414b9238976Sth 		 * Stitch up the prior node. Note that since
2415b9238976Sth 		 * we have handled the root of the tree, prior
2416b9238976Sth 		 * must be non-NULL.
2417b9238976Sth 		 */
2418b9238976Sth 		ASSERT(prior != NULL);
2419b9238976Sth 		if (prior->ne_child == e) {
2420b9238976Sth 			prior->ne_child = NULL;
2421b9238976Sth 		} else {
2422b9238976Sth 			ASSERT(prior->ne_peer == e);
2423b9238976Sth 
2424b9238976Sth 			prior->ne_peer = NULL;
2425b9238976Sth 		}
2426b9238976Sth 
2427b9238976Sth 		e = prior;
2428b9238976Sth 	}
2429b9238976Sth 
2430b9238976Sth 	/* NOTREACHED */
2431b9238976Sth }
2432b9238976Sth 
2433b9238976Sth /*
2434b9238976Sth  * Common code to safely release net_cnt_lock and net_tree_lock
2435b9238976Sth  */
2436b9238976Sth void
2437b9238976Sth nfs4_ephemeral_umount_unlock(bool_t *pmust_unlock,
2438*2f172c55SRobert Thurlow     nfs4_ephemeral_tree_t **pnet)
2439b9238976Sth {
2440b9238976Sth 	nfs4_ephemeral_tree_t	*net = *pnet;
2441b9238976Sth 
2442b9238976Sth 	if (*pmust_unlock) {
2443b9238976Sth 		mutex_enter(&net->net_cnt_lock);
2444b9238976Sth 		net->net_status &= ~NFS4_EPHEMERAL_TREE_UMOUNTING;
2445b9238976Sth 		mutex_exit(&net->net_cnt_lock);
2446b9238976Sth 
2447b9238976Sth 		mutex_exit(&net->net_tree_lock);
2448b9238976Sth 
2449b9238976Sth 		*pmust_unlock = FALSE;
2450b9238976Sth 	}
2451b9238976Sth }
2452b9238976Sth 
2453b9238976Sth /*
2454b9238976Sth  * While we may have removed any child or sibling nodes of this
2455b9238976Sth  * ephemeral node, we can not nuke it until we know that there
2456b9238976Sth  * were no actived vnodes on it. This will do that final
2457b9238976Sth  * work once we know it is not busy.
2458b9238976Sth  */
2459b9238976Sth void
2460b9238976Sth nfs4_ephemeral_umount_activate(mntinfo4_t *mi, bool_t *pmust_unlock,
2461*2f172c55SRobert Thurlow     nfs4_ephemeral_tree_t **pnet)
2462b9238976Sth {
2463b9238976Sth 	/*
2464b9238976Sth 	 * Now we need to get rid of the ephemeral data if it exists.
2465b9238976Sth 	 */
2466b9238976Sth 	mutex_enter(&mi->mi_lock);
2467b9238976Sth 	if (mi->mi_ephemeral) {
2468b9238976Sth 		/*
2469b9238976Sth 		 * If we are the root node of an ephemeral branch
2470b9238976Sth 		 * which is being removed, then we need to fixup
2471b9238976Sth 		 * pointers into and out of the node.
2472b9238976Sth 		 */
2473b9238976Sth 		if (!(mi->mi_flags & MI4_EPHEMERAL_RECURSED))
2474b9238976Sth 			nfs4_ephemeral_umount_cleanup(mi->mi_ephemeral);
2475b9238976Sth 
2476*2f172c55SRobert Thurlow 		nfs4_ephemeral_tree_rele(*pnet);
2477b9238976Sth 		ASSERT(mi->mi_ephemeral != NULL);
2478b9238976Sth 
2479b9238976Sth 		kmem_free(mi->mi_ephemeral, sizeof (*mi->mi_ephemeral));
2480b9238976Sth 		mi->mi_ephemeral = NULL;
2481b9238976Sth 	}
2482b9238976Sth 	mutex_exit(&mi->mi_lock);
2483b9238976Sth 
2484*2f172c55SRobert Thurlow 	nfs4_ephemeral_umount_unlock(pmust_unlock, pnet);
2485b9238976Sth }
2486b9238976Sth 
2487b9238976Sth /*
2488b9238976Sth  * Unmount an ephemeral node.
2489*2f172c55SRobert Thurlow  *
2490*2f172c55SRobert Thurlow  * Note that if this code fails, then it must unlock.
2491*2f172c55SRobert Thurlow  *
2492*2f172c55SRobert Thurlow  * If it succeeds, then the caller must be prepared to do so.
2493b9238976Sth  */
2494b9238976Sth int
2495b9238976Sth nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr,
2496*2f172c55SRobert Thurlow     bool_t *pmust_unlock, nfs4_ephemeral_tree_t **pnet)
2497b9238976Sth {
2498b9238976Sth 	int			error = 0;
2499b9238976Sth 	nfs4_ephemeral_t	*eph;
2500b9238976Sth 	nfs4_ephemeral_tree_t	*net;
2501b9238976Sth 	int			is_derooting = FALSE;
2502b9238976Sth 	int			is_recursed = FALSE;
2503d3a14591SThomas Haynes 	int			was_locked = FALSE;
2504d3a14591SThomas Haynes 
2505d3a14591SThomas Haynes 	/*
2506d3a14591SThomas Haynes 	 * Make sure to set the default state for cleaning
2507d3a14591SThomas Haynes 	 * up the tree in the caller (and on the way out).
2508d3a14591SThomas Haynes 	 */
2509*2f172c55SRobert Thurlow 	*pmust_unlock = FALSE;
2510b9238976Sth 
2511b9238976Sth 	/*
2512b9238976Sth 	 * The active vnodes on this file system may be ephemeral
2513b9238976Sth 	 * children. We need to check for and try to unmount them
2514b9238976Sth 	 * here. If any can not be unmounted, we are going
2515b9238976Sth 	 * to return EBUSY.
2516b9238976Sth 	 */
2517b9238976Sth 	mutex_enter(&mi->mi_lock);
2518b9238976Sth 
2519b9238976Sth 	/*
2520b9238976Sth 	 * If an ephemeral tree, we need to check to see if
2521b9238976Sth 	 * the lock is already held. If it is, then we need
2522b9238976Sth 	 * to see if we are being called as a result of
2523b9238976Sth 	 * the recursive removal of some node of the tree or
2524b9238976Sth 	 * if we are another attempt to remove the tree.
2525b9238976Sth 	 *
2526b9238976Sth 	 * mi_flags & MI4_EPHEMERAL indicates an ephemeral
2527b9238976Sth 	 * node. mi_ephemeral being non-NULL also does this.
2528b9238976Sth 	 *
2529b9238976Sth 	 * mi_ephemeral_tree being non-NULL is sufficient
2530b9238976Sth 	 * to also indicate either it is an ephemeral node
2531b9238976Sth 	 * or the enclosing mntinfo4.
2532b9238976Sth 	 *
2533b9238976Sth 	 * Do we need MI4_EPHEMERAL? Yes, it is useful for
2534b9238976Sth 	 * when we delete the ephemeral node and need to
2535b9238976Sth 	 * differentiate from an ephemeral node and the
2536b9238976Sth 	 * enclosing root node.
2537b9238976Sth 	 */
2538b9238976Sth 	*pnet = net = mi->mi_ephemeral_tree;
2539eabd0450Sth 	if (net == NULL) {
2540b9238976Sth 		mutex_exit(&mi->mi_lock);
2541eabd0450Sth 		return (0);
2542eabd0450Sth 	}
2543b9238976Sth 
2544eabd0450Sth 	eph = mi->mi_ephemeral;
2545eabd0450Sth 	is_recursed = mi->mi_flags & MI4_EPHEMERAL_RECURSED;
2546eabd0450Sth 	is_derooting = (eph == NULL);
2547b9238976Sth 
2548*2f172c55SRobert Thurlow 	mutex_enter(&net->net_cnt_lock);
2549*2f172c55SRobert Thurlow 
2550eabd0450Sth 	/*
2551eabd0450Sth 	 * If this is not recursion, then we need to
2552*2f172c55SRobert Thurlow 	 * check to see if a harvester thread has
2553*2f172c55SRobert Thurlow 	 * already grabbed the lock.
2554eabd0450Sth 	 *
2555*2f172c55SRobert Thurlow 	 * After we exit this branch, we may not
2556*2f172c55SRobert Thurlow 	 * blindly return, we need to jump to
2557*2f172c55SRobert Thurlow 	 * is_busy!
2558eabd0450Sth 	 */
2559eabd0450Sth 	if (!is_recursed) {
2560eabd0450Sth 		if (net->net_status &
2561eabd0450Sth 		    NFS4_EPHEMERAL_TREE_LOCKED) {
2562b9238976Sth 			/*
2563d3a14591SThomas Haynes 			 * If the tree is locked, we need
2564d3a14591SThomas Haynes 			 * to decide whether we are the
2565d3a14591SThomas Haynes 			 * harvester or some explicit call
2566d3a14591SThomas Haynes 			 * for a umount. The only way that
2567d3a14591SThomas Haynes 			 * we are the harvester is if
2568d3a14591SThomas Haynes 			 * MS_SYSSPACE is set.
2569d3a14591SThomas Haynes 			 *
2570d3a14591SThomas Haynes 			 * We only let the harvester through
2571d3a14591SThomas Haynes 			 * at this point.
2572eabd0450Sth 			 *
2573eabd0450Sth 			 * We return EBUSY so that the
2574eabd0450Sth 			 * caller knows something is
2575eabd0450Sth 			 * going on. Note that by that
2576eabd0450Sth 			 * time, the umount in the other
2577eabd0450Sth 			 * thread may have already occured.
2578b9238976Sth 			 */
2579d3a14591SThomas Haynes 			if (!(flag & MS_SYSSPACE)) {
2580d3a14591SThomas Haynes 				mutex_exit(&net->net_cnt_lock);
2581d3a14591SThomas Haynes 				mutex_exit(&mi->mi_lock);
2582d3a14591SThomas Haynes 
2583d3a14591SThomas Haynes 				return (EBUSY);
2584d3a14591SThomas Haynes 			}
2585d3a14591SThomas Haynes 
2586d3a14591SThomas Haynes 			was_locked = TRUE;
2587d3a14591SThomas Haynes 		}
2588eabd0450Sth 	}
2589*2f172c55SRobert Thurlow 
2590*2f172c55SRobert Thurlow 	mutex_exit(&net->net_cnt_lock);
2591eabd0450Sth 	mutex_exit(&mi->mi_lock);
2592b9238976Sth 
2593eabd0450Sth 	/*
2594d3a14591SThomas Haynes 	 * If we are not the harvester, we need to check
2595d3a14591SThomas Haynes 	 * to see if we need to grab the tree lock.
2596eabd0450Sth 	 */
2597d3a14591SThomas Haynes 	if (was_locked == FALSE) {
2598d3a14591SThomas Haynes 		/*
2599d3a14591SThomas Haynes 		 * If we grab the lock, it means that no other
2600d3a14591SThomas Haynes 		 * operation is working on the tree. If we don't
2601d3a14591SThomas Haynes 		 * grab it, we need to decide if this is because
2602d3a14591SThomas Haynes 		 * we are a recursive call or a new operation.
2603d3a14591SThomas Haynes 		 */
2604d3a14591SThomas Haynes 		if (mutex_tryenter(&net->net_tree_lock)) {
2605d3a14591SThomas Haynes 			*pmust_unlock = TRUE;
2606d3a14591SThomas Haynes 		} else {
2607b9238976Sth 			/*
2608d3a14591SThomas Haynes 			 * If we are a recursive call, we can
2609d3a14591SThomas Haynes 			 * proceed without the lock.
2610d3a14591SThomas Haynes 			 * Otherwise we have to wait until
2611d3a14591SThomas Haynes 			 * the lock becomes free.
2612b9238976Sth 			 */
2613d3a14591SThomas Haynes 			if (!is_recursed) {
2614d3a14591SThomas Haynes 				mutex_enter(&net->net_cnt_lock);
2615d3a14591SThomas Haynes 				if (net->net_status &
2616d3a14591SThomas Haynes 				    (NFS4_EPHEMERAL_TREE_DEROOTING
2617d3a14591SThomas Haynes 				    | NFS4_EPHEMERAL_TREE_INVALID)) {
2618d3a14591SThomas Haynes 					mutex_exit(&net->net_cnt_lock);
2619d3a14591SThomas Haynes 					goto is_busy;
2620d3a14591SThomas Haynes 				}
2621d3a14591SThomas Haynes 				mutex_exit(&net->net_cnt_lock);
2622b9238976Sth 
2623d3a14591SThomas Haynes 				/*
2624d3a14591SThomas Haynes 				 * We can't hold any other locks whilst
2625d3a14591SThomas Haynes 				 * we wait on this to free up.
2626d3a14591SThomas Haynes 				 */
2627d3a14591SThomas Haynes 				mutex_enter(&net->net_tree_lock);
2628b9238976Sth 
2629d3a14591SThomas Haynes 				/*
2630d3a14591SThomas Haynes 				 * Note that while mi->mi_ephemeral
2631d3a14591SThomas Haynes 				 * may change and thus we have to
2632d3a14591SThomas Haynes 				 * update eph, it is the case that
2633d3a14591SThomas Haynes 				 * we have tied down net and
2634d3a14591SThomas Haynes 				 * do not care if mi->mi_ephemeral_tree
2635d3a14591SThomas Haynes 				 * has changed.
2636d3a14591SThomas Haynes 				 */
2637d3a14591SThomas Haynes 				mutex_enter(&mi->mi_lock);
2638d3a14591SThomas Haynes 				eph = mi->mi_ephemeral;
2639d3a14591SThomas Haynes 				mutex_exit(&mi->mi_lock);
2640d3a14591SThomas Haynes 
2641d3a14591SThomas Haynes 				/*
2642d3a14591SThomas Haynes 				 * Okay, we need to see if either the
2643d3a14591SThomas Haynes 				 * tree got nuked or the current node
2644d3a14591SThomas Haynes 				 * got nuked. Both of which will cause
2645d3a14591SThomas Haynes 				 * an error.
2646d3a14591SThomas Haynes 				 *
2647d3a14591SThomas Haynes 				 * Note that a subsequent retry of the
2648d3a14591SThomas Haynes 				 * umount shall work.
2649d3a14591SThomas Haynes 				 */
2650d3a14591SThomas Haynes 				mutex_enter(&net->net_cnt_lock);
2651d3a14591SThomas Haynes 				if (net->net_status &
2652d3a14591SThomas Haynes 				    NFS4_EPHEMERAL_TREE_INVALID ||
2653d3a14591SThomas Haynes 				    (!is_derooting && eph == NULL)) {
2654d3a14591SThomas Haynes 					mutex_exit(&net->net_cnt_lock);
2655d3a14591SThomas Haynes 					mutex_exit(&net->net_tree_lock);
2656d3a14591SThomas Haynes 					goto is_busy;
2657d3a14591SThomas Haynes 				}
2658eabd0450Sth 				mutex_exit(&net->net_cnt_lock);
2659d3a14591SThomas Haynes 				*pmust_unlock = TRUE;
2660eabd0450Sth 			}
2661eabd0450Sth 		}
2662eabd0450Sth 	}
2663eabd0450Sth 
2664eabd0450Sth 	/*
2665eabd0450Sth 	 * Only once we have grabbed the lock can we mark what we
2666eabd0450Sth 	 * are planning on doing to the ephemeral tree.
2667eabd0450Sth 	 */
2668eabd0450Sth 	if (*pmust_unlock) {
2669eabd0450Sth 		mutex_enter(&net->net_cnt_lock);
2670eabd0450Sth 		net->net_status |= NFS4_EPHEMERAL_TREE_UMOUNTING;
2671eabd0450Sth 
2672eabd0450Sth 		/*
2673eabd0450Sth 		 * Check to see if we are nuking the root.
2674eabd0450Sth 		 */
2675eabd0450Sth 		if (is_derooting)
2676eabd0450Sth 			net->net_status |=
2677eabd0450Sth 			    NFS4_EPHEMERAL_TREE_DEROOTING;
2678eabd0450Sth 		mutex_exit(&net->net_cnt_lock);
2679eabd0450Sth 	}
2680eabd0450Sth 
2681eabd0450Sth 	if (!is_derooting) {
2682eabd0450Sth 		/*
2683eabd0450Sth 		 * Only work on children if the caller has not already
2684eabd0450Sth 		 * done so.
2685eabd0450Sth 		 */
2686eabd0450Sth 		if (!is_recursed) {
2687eabd0450Sth 			ASSERT(eph != NULL);
2688eabd0450Sth 
2689eabd0450Sth 			error = nfs4_ephemeral_unmount_engine(eph,
2690eabd0450Sth 			    FALSE, flag, cr);
2691eabd0450Sth 			if (error)
2692eabd0450Sth 				goto is_busy;
2693eabd0450Sth 		}
2694eabd0450Sth 	} else {
2695eabd0450Sth 		eph = net->net_root;
2696eabd0450Sth 
2697eabd0450Sth 		/*
2698eabd0450Sth 		 * Only work if there is something there.
2699eabd0450Sth 		 */
2700eabd0450Sth 		if (eph) {
2701eabd0450Sth 			error = nfs4_ephemeral_unmount_engine(eph, TRUE,
2702eabd0450Sth 			    flag, cr);
2703eabd0450Sth 			if (error) {
2704eabd0450Sth 				mutex_enter(&net->net_cnt_lock);
2705eabd0450Sth 				net->net_status &=
2706eabd0450Sth 				    ~NFS4_EPHEMERAL_TREE_DEROOTING;
2707eabd0450Sth 				mutex_exit(&net->net_cnt_lock);
2708eabd0450Sth 				goto is_busy;
2709eabd0450Sth 			}
2710b9238976Sth 
2711b9238976Sth 			/*
2712eabd0450Sth 			 * Nothing else which goes wrong will
2713eabd0450Sth 			 * invalidate the blowing away of the
2714eabd0450Sth 			 * ephmeral tree.
2715b9238976Sth 			 */
2716eabd0450Sth 			net->net_root = NULL;
2717b9238976Sth 		}
2718eabd0450Sth 
2719eabd0450Sth 		/*
2720eabd0450Sth 		 * We have derooted and we have caused the tree to be
2721d3a14591SThomas Haynes 		 * invalidated.
2722eabd0450Sth 		 */
2723eabd0450Sth 		mutex_enter(&net->net_cnt_lock);
2724eabd0450Sth 		net->net_status &= ~NFS4_EPHEMERAL_TREE_DEROOTING;
2725eabd0450Sth 		net->net_status |= NFS4_EPHEMERAL_TREE_INVALID;
2726*2f172c55SRobert Thurlow 		DTRACE_NFSV4_1(nfs4clnt__dbg__ephemeral__tree__derooting,
2727*2f172c55SRobert Thurlow 		    uint_t, net->net_refcnt);
2728*2f172c55SRobert Thurlow 
2729*2f172c55SRobert Thurlow 		/*
2730*2f172c55SRobert Thurlow 		 * We will not finalize this node, so safe to
2731*2f172c55SRobert Thurlow 		 * release it.
2732*2f172c55SRobert Thurlow 		 */
2733*2f172c55SRobert Thurlow 		nfs4_ephemeral_tree_decr(net);
2734eabd0450Sth 		mutex_exit(&net->net_cnt_lock);
2735eabd0450Sth 
2736d3a14591SThomas Haynes 		if (was_locked == FALSE)
2737d3a14591SThomas Haynes 			mutex_exit(&net->net_tree_lock);
2738d3a14591SThomas Haynes 
2739d3a14591SThomas Haynes 		/*
2740d3a14591SThomas Haynes 		 * We have just blown away any notation of this
2741*2f172c55SRobert Thurlow 		 * tree being locked or having a refcnt.
2742*2f172c55SRobert Thurlow 		 * We can't let the caller try to clean things up.
2743d3a14591SThomas Haynes 		 */
2744d3a14591SThomas Haynes 		*pmust_unlock = FALSE;
2745d3a14591SThomas Haynes 
2746eabd0450Sth 		/*
2747d708af74SThomas Haynes 		 * At this point, the tree should no longer be
2748d708af74SThomas Haynes 		 * associated with the mntinfo4. We need to pull
2749d708af74SThomas Haynes 		 * it off there and let the harvester take
2750eabd0450Sth 		 * care of it once the refcnt drops.
2751eabd0450Sth 		 */
2752eabd0450Sth 		mutex_enter(&mi->mi_lock);
2753eabd0450Sth 		mi->mi_ephemeral_tree = NULL;
2754b9238976Sth 		mutex_exit(&mi->mi_lock);
2755b9238976Sth 	}
2756b9238976Sth 
2757b9238976Sth 	return (0);
2758b9238976Sth 
2759b9238976Sth is_busy:
2760b9238976Sth 
2761*2f172c55SRobert Thurlow 	nfs4_ephemeral_umount_unlock(pmust_unlock, pnet);
2762b9238976Sth 
2763b9238976Sth 	return (error);
2764b9238976Sth }
2765b9238976Sth 
2766b9238976Sth /*
2767b9238976Sth  * Do the umount and record any error in the parent.
2768b9238976Sth  */
2769b9238976Sth static void
2770b9238976Sth nfs4_ephemeral_record_umount(vfs_t *vfsp, int flag,
2771b9238976Sth     nfs4_ephemeral_t *e, nfs4_ephemeral_t *prior)
2772b9238976Sth {
2773b9238976Sth 	int	error;
2774b9238976Sth 
2775b9238976Sth 	error = umount2_engine(vfsp, flag, kcred, FALSE);
2776b9238976Sth 	if (error) {
2777b9238976Sth 		if (prior) {
2778b9238976Sth 			if (prior->ne_child == e)
2779b9238976Sth 				prior->ne_state |=
2780b9238976Sth 				    NFS4_EPHEMERAL_CHILD_ERROR;
2781b9238976Sth 			else
2782b9238976Sth 				prior->ne_state |=
2783b9238976Sth 				    NFS4_EPHEMERAL_PEER_ERROR;
2784b9238976Sth 		}
2785b9238976Sth 	}
2786b9238976Sth }
2787b9238976Sth 
2788b9238976Sth /*
2789b9238976Sth  * For each tree in the forest (where the forest is in
2790b9238976Sth  * effect all of the ephemeral trees for this zone),
2791b9238976Sth  * scan to see if a node can be unmounted. Note that
2792b9238976Sth  * unlike nfs4_ephemeral_unmount_engine(), we do
2793b9238976Sth  * not process the current node before children or
2794b9238976Sth  * siblings. I.e., if a node can be unmounted, we
2795b9238976Sth  * do not recursively check to see if the nodes
2796b9238976Sth  * hanging off of it can also be unmounted.
2797b9238976Sth  *
2798b9238976Sth  * Instead, we delve down deep to try and remove the
2799b9238976Sth  * children first. Then, because we share code with
2800b9238976Sth  * nfs4_ephemeral_unmount_engine(), we will try
2801b9238976Sth  * them again. This could be a performance issue in
2802b9238976Sth  * the future.
2803b9238976Sth  *
2804b9238976Sth  * Also note that unlike nfs4_ephemeral_unmount_engine(),
2805b9238976Sth  * we do not halt on an error. We will not remove the
2806b9238976Sth  * current node, but we will keep on trying to remove
2807b9238976Sth  * the others.
2808b9238976Sth  *
2809b9238976Sth  * force indicates that we want the unmount to occur
2810b9238976Sth  * even if there is something blocking it.
2811b9238976Sth  *
2812b9238976Sth  * time_check indicates that we want to see if the
2813b9238976Sth  * mount has expired past mount_to or not. Typically
2814b9238976Sth  * we want to do this and only on a shutdown of the
2815b9238976Sth  * zone would we want to ignore the check.
2816b9238976Sth  */
2817b9238976Sth static void
2818b9238976Sth nfs4_ephemeral_harvest_forest(nfs4_trigger_globals_t *ntg,
2819b9238976Sth     bool_t force, bool_t time_check)
2820b9238976Sth {
2821b9238976Sth 	nfs4_ephemeral_tree_t	*net;
2822b9238976Sth 	nfs4_ephemeral_tree_t	*prev = NULL;
2823b9238976Sth 	nfs4_ephemeral_tree_t	*next;
2824b9238976Sth 	nfs4_ephemeral_t	*e;
2825b9238976Sth 	nfs4_ephemeral_t	*prior;
2826b9238976Sth 	time_t			now = gethrestime_sec();
2827b9238976Sth 
2828b9238976Sth 	nfs4_ephemeral_tree_t	*harvest = NULL;
2829b9238976Sth 
2830b9238976Sth 	int			flag;
2831b9238976Sth 
2832b9238976Sth 	mntinfo4_t		*mi;
2833b9238976Sth 	vfs_t			*vfsp;
2834b9238976Sth 
2835b9238976Sth 	if (force)
2836d3a14591SThomas Haynes 		flag = MS_FORCE | MS_SYSSPACE;
2837b9238976Sth 	else
2838d3a14591SThomas Haynes 		flag = MS_SYSSPACE;
2839b9238976Sth 
2840b9238976Sth 	mutex_enter(&ntg->ntg_forest_lock);
2841b9238976Sth 	for (net = ntg->ntg_forest; net != NULL; net = next) {
2842b9238976Sth 		next = net->net_next;
2843b9238976Sth 
2844d3a14591SThomas Haynes 		nfs4_ephemeral_tree_hold(net);
2845b9238976Sth 
2846b9238976Sth 		mutex_enter(&net->net_tree_lock);
2847b9238976Sth 
2848b9238976Sth 		/*
2849b9238976Sth 		 * Let the unmount code know that the
2850b9238976Sth 		 * tree is already locked!
2851b9238976Sth 		 */
2852b9238976Sth 		mutex_enter(&net->net_cnt_lock);
2853b9238976Sth 		net->net_status |= NFS4_EPHEMERAL_TREE_LOCKED;
2854b9238976Sth 		mutex_exit(&net->net_cnt_lock);
2855b9238976Sth 
2856b9238976Sth 		/*
2857b9238976Sth 		 * If the intent is force all ephemeral nodes to
2858b9238976Sth 		 * be unmounted in this zone, we can short circuit a
2859b9238976Sth 		 * lot of tree traversal and simply zap the root node.
2860b9238976Sth 		 */
2861b9238976Sth 		if (force) {
2862b9238976Sth 			if (net->net_root) {
2863b9238976Sth 				mi = net->net_root->ne_mount;
2864b9238976Sth 				vfsp = mi->mi_vfsp;
2865b9238976Sth 
2866b9238976Sth 				/*
2867b9238976Sth 				 * Cleared by umount2_engine.
2868b9238976Sth 				 */
2869b9238976Sth 				VFS_HOLD(vfsp);
2870b9238976Sth 
2871b9238976Sth 				(void) umount2_engine(vfsp, flag,
2872b9238976Sth 				    kcred, FALSE);
2873b9238976Sth 
2874b9238976Sth 				goto check_done;
2875b9238976Sth 			}
2876b9238976Sth 		}
2877b9238976Sth 
2878b9238976Sth 		e = net->net_root;
2879b9238976Sth 		if (e)
2880b9238976Sth 			e->ne_state = NFS4_EPHEMERAL_VISIT_CHILD;
2881b9238976Sth 
2882b9238976Sth 		while (e) {
2883b9238976Sth 			if (e->ne_state == NFS4_EPHEMERAL_VISIT_CHILD) {
2884b9238976Sth 				e->ne_state = NFS4_EPHEMERAL_VISIT_SIBLING;
2885b9238976Sth 				if (e->ne_child) {
2886b9238976Sth 					e = e->ne_child;
2887b9238976Sth 					e->ne_state =
2888b9238976Sth 					    NFS4_EPHEMERAL_VISIT_CHILD;
2889b9238976Sth 				}
2890b9238976Sth 
2891b9238976Sth 				continue;
2892b9238976Sth 			} else if (e->ne_state ==
2893b9238976Sth 			    NFS4_EPHEMERAL_VISIT_SIBLING) {
2894b9238976Sth 				e->ne_state = NFS4_EPHEMERAL_PROCESS_ME;
2895b9238976Sth 				if (e->ne_peer) {
2896b9238976Sth 					e = e->ne_peer;
2897b9238976Sth 					e->ne_state =
2898b9238976Sth 					    NFS4_EPHEMERAL_VISIT_CHILD;
2899b9238976Sth 				}
2900b9238976Sth 
2901b9238976Sth 				continue;
2902b9238976Sth 			} else if (e->ne_state ==
2903b9238976Sth 			    NFS4_EPHEMERAL_CHILD_ERROR) {
2904b9238976Sth 				prior = e->ne_prior;
2905b9238976Sth 
2906b9238976Sth 				/*
2907b9238976Sth 				 * If a child reported an error, do
2908b9238976Sth 				 * not bother trying to unmount.
2909b9238976Sth 				 *
2910b9238976Sth 				 * If your prior node is a parent,
2911b9238976Sth 				 * pass the error up such that they
2912b9238976Sth 				 * also do not try to unmount.
2913b9238976Sth 				 *
2914b9238976Sth 				 * However, if your prior is a sibling,
2915b9238976Sth 				 * let them try to unmount if they can.
2916b9238976Sth 				 */
2917b9238976Sth 				if (prior) {
2918b9238976Sth 					if (prior->ne_child == e)
2919b9238976Sth 						prior->ne_state |=
2920b9238976Sth 						    NFS4_EPHEMERAL_CHILD_ERROR;
2921b9238976Sth 					else
2922b9238976Sth 						prior->ne_state |=
2923b9238976Sth 						    NFS4_EPHEMERAL_PEER_ERROR;
2924b9238976Sth 				}
2925b9238976Sth 
2926b9238976Sth 				/*
2927b9238976Sth 				 * Clear the error and if needed, process peers.
2928b9238976Sth 				 *
2929b9238976Sth 				 * Once we mask out the error, we know whether
2930b9238976Sth 				 * or we have to process another node.
2931b9238976Sth 				 */
2932b9238976Sth 				e->ne_state &= ~NFS4_EPHEMERAL_CHILD_ERROR;
2933b9238976Sth 				if (e->ne_state == NFS4_EPHEMERAL_PROCESS_ME)
2934b9238976Sth 					e = prior;
2935b9238976Sth 
2936b9238976Sth 				continue;
2937b9238976Sth 			} else if (e->ne_state ==
2938b9238976Sth 			    NFS4_EPHEMERAL_PEER_ERROR) {
2939b9238976Sth 				prior = e->ne_prior;
2940b9238976Sth 
2941b9238976Sth 				if (prior) {
2942b9238976Sth 					if (prior->ne_child == e)
2943b9238976Sth 						prior->ne_state =
2944b9238976Sth 						    NFS4_EPHEMERAL_CHILD_ERROR;
2945b9238976Sth 					else
2946b9238976Sth 						prior->ne_state =
2947b9238976Sth 						    NFS4_EPHEMERAL_PEER_ERROR;
2948b9238976Sth 				}
2949b9238976Sth 
2950b9238976Sth 				/*
2951b9238976Sth 				 * Clear the error from this node and do the
2952b9238976Sth 				 * correct processing.
2953b9238976Sth 				 */
2954b9238976Sth 				e->ne_state &= ~NFS4_EPHEMERAL_PEER_ERROR;
2955b9238976Sth 				continue;
2956b9238976Sth 			}
2957b9238976Sth 
2958b9238976Sth 			prior = e->ne_prior;
2959b9238976Sth 			e->ne_state = NFS4_EPHEMERAL_OK;
2960b9238976Sth 
2961b9238976Sth 			/*
2962b9238976Sth 			 * It must be the case that we need to process
2963b9238976Sth 			 * this node.
2964b9238976Sth 			 */
2965b9238976Sth 			if (!time_check ||
2966b9238976Sth 			    now - e->ne_ref_time > e->ne_mount_to) {
2967b9238976Sth 				mi = e->ne_mount;
2968b9238976Sth 				vfsp = mi->mi_vfsp;
2969b9238976Sth 
2970b9238976Sth 				/*
2971b9238976Sth 				 * Cleared by umount2_engine.
2972b9238976Sth 				 */
2973b9238976Sth 				VFS_HOLD(vfsp);
2974b9238976Sth 
2975b9238976Sth 				/*
2976b9238976Sth 				 * Note that we effectively work down to the
2977b9238976Sth 				 * leaf nodes first, try to unmount them,
2978b9238976Sth 				 * then work our way back up into the leaf
2979b9238976Sth 				 * nodes.
2980b9238976Sth 				 *
2981b9238976Sth 				 * Also note that we deal with a lot of
2982b9238976Sth 				 * complexity by sharing the work with
2983b9238976Sth 				 * the manual unmount code.
2984b9238976Sth 				 */
2985b9238976Sth 				nfs4_ephemeral_record_umount(vfsp, flag,
2986b9238976Sth 				    e, prior);
2987b9238976Sth 			}
2988b9238976Sth 
2989b9238976Sth 			e = prior;
2990b9238976Sth 		}
2991b9238976Sth 
2992b9238976Sth check_done:
2993b9238976Sth 
2994b9238976Sth 		/*
2995d3a14591SThomas Haynes 		 * At this point we are done processing this tree.
2996d3a14591SThomas Haynes 		 *
2997*2f172c55SRobert Thurlow 		 * If the tree is invalid and we were the only reference
2998d3a14591SThomas Haynes 		 * to it, then we push it on the local linked list
2999d3a14591SThomas Haynes 		 * to remove it at the end. We avoid that action now
3000d3a14591SThomas Haynes 		 * to keep the tree processing going along at a fair clip.
3001d3a14591SThomas Haynes 		 *
3002*2f172c55SRobert Thurlow 		 * Else, even if we were the only reference, we
3003*2f172c55SRobert Thurlow 		 * allow it to be reused as needed.
3004b9238976Sth 		 */
3005b9238976Sth 		mutex_enter(&net->net_cnt_lock);
3006*2f172c55SRobert Thurlow 		nfs4_ephemeral_tree_decr(net);
3007*2f172c55SRobert Thurlow 		if (net->net_refcnt == 0 &&
3008b9238976Sth 		    net->net_status & NFS4_EPHEMERAL_TREE_INVALID) {
3009b9238976Sth 			net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED;
3010b9238976Sth 			mutex_exit(&net->net_cnt_lock);
3011b9238976Sth 			mutex_exit(&net->net_tree_lock);
3012b9238976Sth 
3013b9238976Sth 			if (prev)
3014b9238976Sth 				prev->net_next = net->net_next;
3015b9238976Sth 			else
3016b9238976Sth 				ntg->ntg_forest = net->net_next;
3017b9238976Sth 
3018b9238976Sth 			net->net_next = harvest;
3019b9238976Sth 			harvest = net;
3020b9238976Sth 			continue;
3021b9238976Sth 		}
3022b9238976Sth 
3023b9238976Sth 		net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED;
3024b9238976Sth 		mutex_exit(&net->net_cnt_lock);
3025b9238976Sth 		mutex_exit(&net->net_tree_lock);
3026b9238976Sth 
3027b9238976Sth 		prev = net;
3028b9238976Sth 	}
3029b9238976Sth 	mutex_exit(&ntg->ntg_forest_lock);
3030b9238976Sth 
3031b9238976Sth 	for (net = harvest; net != NULL; net = next) {
3032b9238976Sth 		next = net->net_next;
3033b9238976Sth 
3034b9238976Sth 		mutex_destroy(&net->net_tree_lock);
3035b9238976Sth 		mutex_destroy(&net->net_cnt_lock);
3036b9238976Sth 		kmem_free(net, sizeof (*net));
3037b9238976Sth 	}
3038b9238976Sth }
3039b9238976Sth 
3040b9238976Sth /*
3041b9238976Sth  * This is the thread which decides when the harvesting
3042b9238976Sth  * can proceed and when to kill it off for this zone.
3043b9238976Sth  */
3044b9238976Sth static void
3045b9238976Sth nfs4_ephemeral_harvester(nfs4_trigger_globals_t *ntg)
3046b9238976Sth {
3047b9238976Sth 	clock_t		timeleft;
3048b9238976Sth 	zone_t		*zone = curproc->p_zone;
3049b9238976Sth 
3050b9238976Sth 	for (;;) {
3051d3d50737SRafael Vanoni 		timeleft = zone_status_timedwait(zone, ddi_get_lbolt() +
3052b9238976Sth 		    nfs4_trigger_thread_timer * hz, ZONE_IS_SHUTTING_DOWN);
3053b9238976Sth 
3054b9238976Sth 		/*
3055b9238976Sth 		 * zone is exiting...
3056b9238976Sth 		 */
3057b9238976Sth 		if (timeleft != -1) {
3058b9238976Sth 			ASSERT(zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN);
3059b9238976Sth 			zthread_exit();
3060b9238976Sth 			/* NOTREACHED */
3061b9238976Sth 		}
3062b9238976Sth 
3063b9238976Sth 		/*
3064b9238976Sth 		 * Only bother scanning if there is potential
3065b9238976Sth 		 * work to be done.
3066b9238976Sth 		 */
3067b9238976Sth 		if (ntg->ntg_forest == NULL)
3068b9238976Sth 			continue;
3069b9238976Sth 
3070b9238976Sth 		/*
3071b9238976Sth 		 * Now scan the list and get rid of everything which
3072b9238976Sth 		 * is old.
3073b9238976Sth 		 */
3074b9238976Sth 		nfs4_ephemeral_harvest_forest(ntg, FALSE, TRUE);
3075b9238976Sth 	}
3076b9238976Sth 
3077b9238976Sth 	/* NOTREACHED */
3078b9238976Sth }
3079b9238976Sth 
3080b9238976Sth /*
3081b9238976Sth  * The zone specific glue needed to start the unmount harvester.
3082b9238976Sth  *
3083b9238976Sth  * Note that we want to avoid holding the mutex as long as possible,
3084b9238976Sth  * hence the multiple checks.
3085b9238976Sth  *
3086b9238976Sth  * The caller should avoid us getting down here in the first
3087b9238976Sth  * place.
3088b9238976Sth  */
3089b9238976Sth static void
3090b9238976Sth nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *ntg)
3091b9238976Sth {
3092b9238976Sth 	/*
3093b9238976Sth 	 * It got started before we got here...
3094b9238976Sth 	 */
3095b9238976Sth 	if (ntg->ntg_thread_started)
3096b9238976Sth 		return;
3097b9238976Sth 
3098b9238976Sth 	mutex_enter(&nfs4_ephemeral_thread_lock);
3099b9238976Sth 
3100b9238976Sth 	if (ntg->ntg_thread_started) {
3101b9238976Sth 		mutex_exit(&nfs4_ephemeral_thread_lock);
3102b9238976Sth 		return;
3103b9238976Sth 	}
3104b9238976Sth 
3105b9238976Sth 	/*
3106b9238976Sth 	 * Start the unmounter harvester thread for this zone.
3107b9238976Sth 	 */
3108b9238976Sth 	(void) zthread_create(NULL, 0, nfs4_ephemeral_harvester,
3109b9238976Sth 	    ntg, 0, minclsyspri);
3110b9238976Sth 
3111b9238976Sth 	ntg->ntg_thread_started = TRUE;
3112b9238976Sth 	mutex_exit(&nfs4_ephemeral_thread_lock);
3113b9238976Sth }
3114b9238976Sth 
3115b9238976Sth /*ARGSUSED*/
3116b9238976Sth static void *
3117b9238976Sth nfs4_ephemeral_zsd_create(zoneid_t zoneid)
3118b9238976Sth {
3119b9238976Sth 	nfs4_trigger_globals_t	*ntg;
3120b9238976Sth 
3121b9238976Sth 	ntg = kmem_zalloc(sizeof (*ntg), KM_SLEEP);
3122b9238976Sth 	ntg->ntg_thread_started = FALSE;
3123b9238976Sth 
3124b9238976Sth 	/*
3125b9238976Sth 	 * This is the default....
3126b9238976Sth 	 */
3127b9238976Sth 	ntg->ntg_mount_to = nfs4_trigger_thread_timer;
3128b9238976Sth 
3129b9238976Sth 	mutex_init(&ntg->ntg_forest_lock, NULL,
3130b9238976Sth 	    MUTEX_DEFAULT, NULL);
3131b9238976Sth 
3132b9238976Sth 	return (ntg);
3133b9238976Sth }
3134b9238976Sth 
3135b9238976Sth /*
3136b9238976Sth  * Try a nice gentle walk down the forest and convince
3137b9238976Sth  * all of the trees to gracefully give it up.
3138b9238976Sth  */
3139b9238976Sth /*ARGSUSED*/
3140b9238976Sth static void
3141b9238976Sth nfs4_ephemeral_zsd_shutdown(zoneid_t zoneid, void *arg)
3142b9238976Sth {
3143b9238976Sth 	nfs4_trigger_globals_t	*ntg = arg;
3144b9238976Sth 
3145b9238976Sth 	if (!ntg)
3146b9238976Sth 		return;
3147b9238976Sth 
3148b9238976Sth 	nfs4_ephemeral_harvest_forest(ntg, FALSE, FALSE);
3149b9238976Sth }
3150b9238976Sth 
3151b9238976Sth /*
3152b9238976Sth  * Race along the forest and rip all of the trees out by
3153b9238976Sth  * their rootballs!
3154b9238976Sth  */
3155b9238976Sth /*ARGSUSED*/
3156b9238976Sth static void
3157b9238976Sth nfs4_ephemeral_zsd_destroy(zoneid_t zoneid, void *arg)
3158b9238976Sth {
3159b9238976Sth 	nfs4_trigger_globals_t	*ntg = arg;
3160b9238976Sth 
3161b9238976Sth 	if (!ntg)
3162b9238976Sth 		return;
3163b9238976Sth 
3164b9238976Sth 	nfs4_ephemeral_harvest_forest(ntg, TRUE, FALSE);
3165b9238976Sth 
3166b9238976Sth 	mutex_destroy(&ntg->ntg_forest_lock);
3167b9238976Sth 	kmem_free(ntg, sizeof (*ntg));
3168b9238976Sth }
3169b9238976Sth 
3170b9238976Sth /*
3171b9238976Sth  * This is the zone independent cleanup needed for
3172b9238976Sth  * emphemeral mount processing.
3173b9238976Sth  */
3174b9238976Sth void
3175b9238976Sth nfs4_ephemeral_fini(void)
3176b9238976Sth {
3177b9238976Sth 	(void) zone_key_delete(nfs4_ephemeral_key);
3178b9238976Sth 	mutex_destroy(&nfs4_ephemeral_thread_lock);
3179b9238976Sth }
3180b9238976Sth 
3181b9238976Sth /*
3182b9238976Sth  * This is the zone independent initialization needed for
3183b9238976Sth  * emphemeral mount processing.
3184b9238976Sth  */
3185b9238976Sth void
3186b9238976Sth nfs4_ephemeral_init(void)
3187b9238976Sth {
3188b9238976Sth 	mutex_init(&nfs4_ephemeral_thread_lock, NULL, MUTEX_DEFAULT,
3189b9238976Sth 	    NULL);
3190b9238976Sth 
3191b9238976Sth 	zone_key_create(&nfs4_ephemeral_key, nfs4_ephemeral_zsd_create,
3192b9238976Sth 	    nfs4_ephemeral_zsd_shutdown, nfs4_ephemeral_zsd_destroy);
3193b9238976Sth }
3194b9238976Sth 
3195b9238976Sth /*
3196b9238976Sth  * nfssys() calls this function to set the per-zone
3197b9238976Sth  * value of mount_to to drive when an ephemeral mount is
3198b9238976Sth  * timed out. Each mount will grab a copy of this value
3199b9238976Sth  * when mounted.
3200b9238976Sth  */
3201b9238976Sth void
3202b9238976Sth nfs4_ephemeral_set_mount_to(uint_t mount_to)
3203b9238976Sth {
3204b9238976Sth 	nfs4_trigger_globals_t	*ntg;
3205b9238976Sth 	zone_t			*zone = curproc->p_zone;
3206b9238976Sth 
3207b9238976Sth 	ntg = zone_getspecific(nfs4_ephemeral_key, zone);
3208b9238976Sth 
3209b9238976Sth 	ntg->ntg_mount_to = mount_to;
3210b9238976Sth }
3211b9238976Sth 
3212b9238976Sth /*
3213b9238976Sth  * Walk the list of v4 mount options; if they are currently set in vfsp,
3214b9238976Sth  * append them to a new comma-separated mount option string, and return it.
3215b9238976Sth  *
3216b9238976Sth  * Caller should free by calling nfs4_trigger_destroy_mntopts().
3217b9238976Sth  */
3218b9238976Sth static char *
3219b9238976Sth nfs4_trigger_create_mntopts(vfs_t *vfsp)
3220b9238976Sth {
3221b9238976Sth 	uint_t i;
3222b9238976Sth 	char *mntopts;
3223b9238976Sth 	struct vfssw *vswp;
3224b9238976Sth 	mntopts_t *optproto;
3225b9238976Sth 
3226b9238976Sth 	mntopts = kmem_zalloc(MAX_MNTOPT_STR, KM_SLEEP);
3227b9238976Sth 
3228b9238976Sth 	/* get the list of applicable mount options for v4; locks *vswp */
3229b9238976Sth 	vswp = vfs_getvfssw(MNTTYPE_NFS4);
3230b9238976Sth 	optproto = &vswp->vsw_optproto;
3231b9238976Sth 
3232b9238976Sth 	for (i = 0; i < optproto->mo_count; i++) {
3233b9238976Sth 		struct mntopt *mop = &optproto->mo_list[i];
3234b9238976Sth 
3235b9238976Sth 		if (mop->mo_flags & MO_EMPTY)
3236b9238976Sth 			continue;
3237b9238976Sth 
3238b9238976Sth 		if (nfs4_trigger_add_mntopt(mntopts, mop->mo_name, vfsp)) {
3239b9238976Sth 			kmem_free(mntopts, MAX_MNTOPT_STR);
3240b9238976Sth 			vfs_unrefvfssw(vswp);
3241b9238976Sth 			return (NULL);
3242b9238976Sth 		}
3243b9238976Sth 	}
3244b9238976Sth 
3245b9238976Sth 	vfs_unrefvfssw(vswp);
3246b9238976Sth 
3247b9238976Sth 	/*
3248b9238976Sth 	 * MNTOPT_XATTR is not in the v4 mount opt proto list,
3249b9238976Sth 	 * and it may only be passed via MS_OPTIONSTR, so we
3250b9238976Sth 	 * must handle it here.
3251b9238976Sth 	 *
3252b9238976Sth 	 * Ideally, it would be in the list, but NFS does not specify its
3253b9238976Sth 	 * own opt proto list, it uses instead the default one. Since
3254b9238976Sth 	 * not all filesystems support extended attrs, it would not be
3255b9238976Sth 	 * appropriate to add it there.
3256b9238976Sth 	 */
3257b9238976Sth 	if (nfs4_trigger_add_mntopt(mntopts, MNTOPT_XATTR, vfsp) ||
3258b9238976Sth 	    nfs4_trigger_add_mntopt(mntopts, MNTOPT_NOXATTR, vfsp)) {
3259b9238976Sth 		kmem_free(mntopts, MAX_MNTOPT_STR);
3260b9238976Sth 		return (NULL);
3261b9238976Sth 	}
3262b9238976Sth 
3263b9238976Sth 	return (mntopts);
3264b9238976Sth }
3265b9238976Sth 
3266b9238976Sth static void
3267b9238976Sth nfs4_trigger_destroy_mntopts(char *mntopts)
3268b9238976Sth {
3269b9238976Sth 	if (mntopts)
3270b9238976Sth 		kmem_free(mntopts, MAX_MNTOPT_STR);
3271b9238976Sth }
3272b9238976Sth 
3273b9238976Sth /*
3274b9238976Sth  * Check a single mount option (optname). Add to mntopts if it is set in VFS.
3275b9238976Sth  */
3276b9238976Sth static int
3277b9238976Sth nfs4_trigger_add_mntopt(char *mntopts, char *optname, vfs_t *vfsp)
3278b9238976Sth {
3279b9238976Sth 	if (mntopts == NULL || optname == NULL || vfsp == NULL)
3280b9238976Sth 		return (EINVAL);
3281b9238976Sth 
3282b9238976Sth 	if (vfs_optionisset(vfsp, optname, NULL)) {
3283b9238976Sth 		size_t mntoptslen = strlen(mntopts);
3284b9238976Sth 		size_t optnamelen = strlen(optname);
3285b9238976Sth 
3286b9238976Sth 		/* +1 for ',', +1 for NUL */
3287b9238976Sth 		if (mntoptslen + optnamelen + 2 > MAX_MNTOPT_STR)
3288b9238976Sth 			return (EOVERFLOW);
3289b9238976Sth 
3290b9238976Sth 		/* first or subsequent mount option? */
3291b9238976Sth 		if (*mntopts != '\0')
3292b9238976Sth 			(void) strcat(mntopts, ",");
3293b9238976Sth 
3294b9238976Sth 		(void) strcat(mntopts, optname);
3295b9238976Sth 	}
3296b9238976Sth 
3297b9238976Sth 	return (0);
3298b9238976Sth }
3299b9238976Sth 
3300b9238976Sth static enum clnt_stat
3301*2f172c55SRobert Thurlow nfs4_ping_server_common(struct knetconfig *knc, struct netbuf *addr, int nointr)
3302b9238976Sth {
3303*2f172c55SRobert Thurlow 	int retries;
3304b9238976Sth 	uint_t max_msgsize;
3305b9238976Sth 	enum clnt_stat status;
3306b9238976Sth 	CLIENT *cl;
3307b9238976Sth 	struct timeval timeout;
3308b9238976Sth 
3309b9238976Sth 	/* as per recov_newserver() */
3310b9238976Sth 	max_msgsize = 0;
3311b9238976Sth 	retries = 1;
3312b9238976Sth 	timeout.tv_sec = 2;
3313b9238976Sth 	timeout.tv_usec = 0;
3314b9238976Sth 
3315*2f172c55SRobert Thurlow 	if (clnt_tli_kcreate(knc, addr, NFS_PROGRAM, NFS_V4,
3316*2f172c55SRobert Thurlow 	    max_msgsize, retries, CRED(), &cl) != 0)
3317b9238976Sth 		return (RPC_FAILED);
3318b9238976Sth 
3319b9238976Sth 	if (nointr)
3320b9238976Sth 		cl->cl_nosignal = TRUE;
3321b9238976Sth 	status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL, xdr_void, NULL,
3322b9238976Sth 	    timeout);
3323b9238976Sth 	if (nointr)
3324b9238976Sth 		cl->cl_nosignal = FALSE;
3325b9238976Sth 
3326b9238976Sth 	AUTH_DESTROY(cl->cl_auth);
3327b9238976Sth 	CLNT_DESTROY(cl);
3328b9238976Sth 
3329b9238976Sth 	return (status);
3330b9238976Sth }
3331*2f172c55SRobert Thurlow 
3332*2f172c55SRobert Thurlow static enum clnt_stat
3333*2f172c55SRobert Thurlow nfs4_trigger_ping_server(servinfo4_t *svp, int nointr)
3334*2f172c55SRobert Thurlow {
3335*2f172c55SRobert Thurlow 	return (nfs4_ping_server_common(svp->sv_knconf, &svp->sv_addr, nointr));
3336*2f172c55SRobert Thurlow }
3337