xref: /illumos-gate/usr/src/uts/common/nfs/nfs4_clnt.h (revision e010bda9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
26 /*	All Rights Reserved   */
27 
28 /*
29  * Portions of this source code were derived from Berkeley 4.3 BSD
30  * under license from the Regents of the University of California.
31  */
32 
33 #ifndef _NFS4_CLNT_H
34 #define	_NFS4_CLNT_H
35 
36 #include <sys/errno.h>
37 #include <sys/types.h>
38 #include <sys/kstat.h>
39 #include <sys/time.h>
40 #include <sys/flock.h>
41 #include <vm/page.h>
42 #include <nfs/nfs4_kprot.h>
43 #include <nfs/nfs4.h>
44 #include <nfs/rnode.h>
45 #include <sys/avl.h>
46 #include <sys/list.h>
47 #include <rpc/auth.h>
48 #include <sys/door.h>
49 #include <sys/condvar_impl.h>
50 #include <sys/zone.h>
51 
52 #ifdef	__cplusplus
53 extern "C" {
54 #endif
55 
56 #define	NFS4_SIZE_OK(size)	((size) <= MAXOFFSET_T)
57 
58 /* Four states of nfs4_server's lease_valid */
59 #define	NFS4_LEASE_INVALID		0
60 #define	NFS4_LEASE_VALID		1
61 #define	NFS4_LEASE_UNINITIALIZED	2
62 #define	NFS4_LEASE_NOT_STARTED		3
63 
64 /* flag to tell the renew thread it should exit */
65 #define	NFS4_THREAD_EXIT	1
66 
67 /* Default number of seconds to wait on GRACE and DELAY errors */
68 #define	NFS4ERR_DELAY_TIME	10
69 
70 /* Number of hash buckets for open owners for each nfs4_server */
71 #define	NFS4_NUM_OO_BUCKETS	53
72 
73 /* Number of freed open owners (per mntinfo4_t) to keep around */
74 #define	NFS4_NUM_FREED_OPEN_OWNERS	8
75 
76 /* Number of seconds to wait before retrying a SETCLIENTID(_CONFIRM) op */
77 #define	NFS4_RETRY_SCLID_DELAY	10
78 
79 /* Number of times we should retry a SETCLIENTID(_CONFIRM) op */
80 #define	NFS4_NUM_SCLID_RETRIES	3
81 
82 /* Number of times we should retry on open after getting NFS4ERR_BAD_SEQID */
83 #define	NFS4_NUM_RETRY_BAD_SEQID	3
84 
85 /*
86  * Macro to wakeup sleeping async worker threads.
87  */
88 #define	NFS4_WAKE_ASYNC_WORKER(work_cv)	{				\
89 	if (CV_HAS_WAITERS(&work_cv[NFS4_ASYNC_QUEUE])) 		\
90 		cv_signal(&work_cv[NFS4_ASYNC_QUEUE]);			\
91 	else if (CV_HAS_WAITERS(&work_cv[NFS4_ASYNC_PGOPS_QUEUE])) 	\
92 		cv_signal(&work_cv[NFS4_ASYNC_PGOPS_QUEUE]);		\
93 }
94 
95 #define	NFS4_WAKEALL_ASYNC_WORKERS(work_cv) {				\
96 		cv_broadcast(&work_cv[NFS4_ASYNC_QUEUE]);		\
97 		cv_broadcast(&work_cv[NFS4_ASYNC_PGOPS_QUEUE]);		\
98 }
99 
100 /*
101  * Is the attribute cache valid?  If client holds a delegation, then attrs
102  * are by definition valid.  If not, then check to see if attrs have timed out.
103  */
104 #define	ATTRCACHE4_VALID(vp) (VTOR4(vp)->r_deleg_type != OPEN_DELEGATE_NONE || \
105 	gethrtime() < VTOR4(vp)->r_time_attr_inval)
106 
107 /*
108  * Flags to indicate whether to purge the DNLC for non-directory vnodes
109  * in a call to nfs_purge_caches.
110  */
111 #define	NFS4_NOPURGE_DNLC	0
112 #define	NFS4_PURGE_DNLC		1
113 
114 /*
115  * Is cache valid?
116  * Swap is always valid, if no attributes (attrtime == 0) or
117  * if mtime matches cached mtime it is valid
118  * NOTE: mtime is now a timestruc_t.
119  * Caller should be holding the rnode r_statelock mutex.
120  */
121 #define	CACHE4_VALID(rp, mtime, fsize)				\
122 	((RTOV4(rp)->v_flag & VISSWAP) == VISSWAP ||		\
123 	(((mtime).tv_sec == (rp)->r_attr.va_mtime.tv_sec &&	\
124 	(mtime).tv_nsec == (rp)->r_attr.va_mtime.tv_nsec) &&	\
125 	((fsize) == (rp)->r_attr.va_size)))
126 
127 /*
128  * Macro to detect forced unmount or a zone shutdown.
129  */
130 #define	FS_OR_ZONE_GONE4(vfsp) \
131 	(((vfsp)->vfs_flag & VFS_UNMOUNTED) || \
132 	zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)
133 
134 /*
135  * Macro to help determine whether a request failed because the underlying
136  * filesystem has been forcibly unmounted or because of zone shutdown.
137  */
138 #define	NFS4_FRC_UNMT_ERR(err, vfsp) \
139 	((err) == EIO && FS_OR_ZONE_GONE4((vfsp)))
140 
141 /*
142  * Due to the way the address space callbacks are used to execute a delmap,
143  * we must keep track of how many times the same thread has called
144  * VOP_DELMAP()->nfs4_delmap().  This is done by having a list of
145  * nfs4_delmapcall_t's associated with each rnode4_t.  This list is protected
146  * by the rnode4_t's r_statelock.  The individual elements do not need to be
147  * protected as they will only ever be created, modified and destroyed by
148  * one thread (the call_id).
149  * See nfs4_delmap() for further explanation.
150  */
151 typedef struct nfs4_delmapcall {
152 	kthread_t	*call_id;
153 	int		error;	/* error from delmap */
154 	list_node_t	call_node;
155 } nfs4_delmapcall_t;
156 
157 /*
158  * delmap address space callback args
159  */
160 typedef struct nfs4_delmap_args {
161 	vnode_t			*vp;
162 	offset_t		off;
163 	caddr_t			addr;
164 	size_t			len;
165 	uint_t			prot;
166 	uint_t			maxprot;
167 	uint_t			flags;
168 	cred_t			*cr;
169 	nfs4_delmapcall_t	*caller; /* to retrieve errors from the cb */
170 } nfs4_delmap_args_t;
171 
172 /*
173  * client side statistics
174  */
175 /*
176  * Per-zone counters
177  */
178 struct clstat4 {
179 	kstat_named_t	calls;			/* client requests */
180 	kstat_named_t	badcalls;		/* rpc failures */
181 	kstat_named_t	referrals;		/* referrals */
182 	kstat_named_t	referlinks;		/* referrals as symlinks */
183 	kstat_named_t	clgets;			/* client handle gets */
184 	kstat_named_t	cltoomany;		/* client handle cache misses */
185 #ifdef DEBUG
186 	kstat_named_t	clalloc;		/* number of client handles */
187 	kstat_named_t	noresponse;		/* server not responding cnt */
188 	kstat_named_t	failover;		/* server failover count */
189 	kstat_named_t	remap;			/* server remap count */
190 #endif
191 };
192 
193 #ifdef DEBUG
194 /*
195  * The following are statistics that describe the behavior of the system as a
196  * whole and don't correspond to any particular zone.
197  */
198 struct clstat4_debug {
199 	kstat_named_t	nrnode;			/* number of allocated rnodes */
200 	kstat_named_t	access;			/* size of access cache */
201 	kstat_named_t	dirent;			/* size of readdir cache */
202 	kstat_named_t	dirents;		/* size of readdir buf cache */
203 	kstat_named_t	reclaim;		/* number of reclaims */
204 	kstat_named_t	clreclaim;		/* number of cl reclaims */
205 	kstat_named_t	f_reclaim;		/* number of free reclaims */
206 	kstat_named_t	a_reclaim;		/* number of active reclaims */
207 	kstat_named_t	r_reclaim;		/* number of rnode reclaims */
208 	kstat_named_t	rpath;			/* bytes used to store rpaths */
209 };
210 extern struct clstat4_debug clstat4_debug;
211 
212 #endif
213 
214 /*
215  * The NFS specific async_reqs structure. iotype4 is grouped to support two
216  * types of async thread pools, please read comments section of mntinfo4_t
217  * definition for more information. Care should be taken while adding new
218  * members to this group.
219  */
220 
221 enum iotype4 {
222 	NFS4_PUTAPAGE,
223 	NFS4_PAGEIO,
224 	NFS4_COMMIT,
225 	NFS4_READ_AHEAD,
226 	NFS4_READDIR,
227 	NFS4_INACTIVE,
228 	NFS4_ASYNC_TYPES
229 };
230 #define	NFS4_ASYNC_PGOPS_TYPES	(NFS4_COMMIT + 1)
231 
232 /*
233  * NFS async requests queue type.
234  */
235 enum ioqtype4 {
236 	NFS4_ASYNC_QUEUE,
237 	NFS4_ASYNC_PGOPS_QUEUE,
238 	NFS4_MAX_ASYNC_QUEUES
239 };
240 
241 /*
242  * Number of NFS async threads operating exclusively on page op requests.
243  */
244 #define	NUM_ASYNC_PGOPS_THREADS	0x2
245 
246 struct nfs4_async_read_req {
247 	void (*readahead)();		/* pointer to readahead function */
248 	u_offset_t blkoff;		/* offset in file */
249 	struct seg *seg;		/* segment to do i/o to */
250 	caddr_t addr;			/* address to do i/o to */
251 };
252 
253 struct nfs4_pageio_req {
254 	int (*pageio)();		/* pointer to pageio function */
255 	page_t *pp;			/* page list */
256 	u_offset_t io_off;		/* offset in file */
257 	uint_t io_len;			/* size of request */
258 	int flags;
259 };
260 
261 struct nfs4_readdir_req {
262 	int (*readdir)();		/* pointer to readdir function */
263 	struct rddir4_cache *rdc;	/* pointer to cache entry to fill */
264 };
265 
266 struct nfs4_commit_req {
267 	void (*commit)();		/* pointer to commit function */
268 	page_t *plist;			/* page list */
269 	offset4 offset;			/* starting offset */
270 	count4 count;			/* size of range to be commited */
271 };
272 
273 struct nfs4_async_reqs {
274 	struct nfs4_async_reqs *a_next;	/* pointer to next arg struct */
275 #ifdef DEBUG
276 	kthread_t *a_queuer;		/* thread id of queueing thread */
277 #endif
278 	struct vnode *a_vp;		/* vnode pointer */
279 	struct cred *a_cred;		/* cred pointer */
280 	enum iotype4 a_io;		/* i/o type */
281 	union {
282 		struct nfs4_async_read_req a_read_args;
283 		struct nfs4_pageio_req a_pageio_args;
284 		struct nfs4_readdir_req a_readdir_args;
285 		struct nfs4_commit_req a_commit_args;
286 	} a_args;
287 };
288 
289 #define	a_nfs4_readahead a_args.a_read_args.readahead
290 #define	a_nfs4_blkoff a_args.a_read_args.blkoff
291 #define	a_nfs4_seg a_args.a_read_args.seg
292 #define	a_nfs4_addr a_args.a_read_args.addr
293 
294 #define	a_nfs4_putapage a_args.a_pageio_args.pageio
295 #define	a_nfs4_pageio a_args.a_pageio_args.pageio
296 #define	a_nfs4_pp a_args.a_pageio_args.pp
297 #define	a_nfs4_off a_args.a_pageio_args.io_off
298 #define	a_nfs4_len a_args.a_pageio_args.io_len
299 #define	a_nfs4_flags a_args.a_pageio_args.flags
300 
301 #define	a_nfs4_readdir a_args.a_readdir_args.readdir
302 #define	a_nfs4_rdc a_args.a_readdir_args.rdc
303 
304 #define	a_nfs4_commit a_args.a_commit_args.commit
305 #define	a_nfs4_plist a_args.a_commit_args.plist
306 #define	a_nfs4_offset a_args.a_commit_args.offset
307 #define	a_nfs4_count a_args.a_commit_args.count
308 
309 /*
310  * Security information
311  */
312 typedef struct sv_secinfo {
313 	uint_t		count;	/* how many sdata there are */
314 	uint_t		index;	/* which sdata[index] */
315 	struct sec_data	*sdata;
316 } sv_secinfo_t;
317 
318 /*
319  * Hash bucket for the mi's open owner list (mi_oo_list).
320  */
321 typedef struct nfs4_oo_hash_bucket {
322 	list_t			b_oo_hash_list;
323 	kmutex_t		b_lock;
324 } nfs4_oo_hash_bucket_t;
325 
326 /*
327  * Global array of ctags.
328  */
329 extern ctag_t nfs4_ctags[];
330 
331 typedef enum nfs4_tag_type {
332 	TAG_NONE,
333 	TAG_ACCESS,
334 	TAG_CLOSE,
335 	TAG_CLOSE_LOST,
336 	TAG_CLOSE_UNDO,
337 	TAG_COMMIT,
338 	TAG_DELEGRETURN,
339 	TAG_FSINFO,
340 	TAG_GET_SYMLINK,
341 	TAG_GETATTR,
342 	TAG_GETATTR_FSLOCATION,
343 	TAG_INACTIVE,
344 	TAG_LINK,
345 	TAG_LOCK,
346 	TAG_LOCK_RECLAIM,
347 	TAG_LOCK_RESEND,
348 	TAG_LOCK_REINSTATE,
349 	TAG_LOCK_UNKNOWN,
350 	TAG_LOCKT,
351 	TAG_LOCKU,
352 	TAG_LOCKU_RESEND,
353 	TAG_LOCKU_REINSTATE,
354 	TAG_LOOKUP,
355 	TAG_LOOKUP_PARENT,
356 	TAG_LOOKUP_VALID,
357 	TAG_LOOKUP_VPARENT,
358 	TAG_MKDIR,
359 	TAG_MKNOD,
360 	TAG_MOUNT,
361 	TAG_OPEN,
362 	TAG_OPEN_CONFIRM,
363 	TAG_OPEN_CONFIRM_LOST,
364 	TAG_OPEN_DG,
365 	TAG_OPEN_DG_LOST,
366 	TAG_OPEN_LOST,
367 	TAG_OPENATTR,
368 	TAG_PATHCONF,
369 	TAG_PUTROOTFH,
370 	TAG_READ,
371 	TAG_READAHEAD,
372 	TAG_READDIR,
373 	TAG_READLINK,
374 	TAG_RELOCK,
375 	TAG_REMAP_LOOKUP,
376 	TAG_REMAP_LOOKUP_AD,
377 	TAG_REMAP_LOOKUP_NA,
378 	TAG_REMAP_MOUNT,
379 	TAG_RMDIR,
380 	TAG_REMOVE,
381 	TAG_RENAME,
382 	TAG_RENAME_VFH,
383 	TAG_RENEW,
384 	TAG_REOPEN,
385 	TAG_REOPEN_LOST,
386 	TAG_SECINFO,
387 	TAG_SETATTR,
388 	TAG_SETCLIENTID,
389 	TAG_SETCLIENTID_CF,
390 	TAG_SYMLINK,
391 	TAG_WRITE
392 } nfs4_tag_type_t;
393 
394 #define	NFS4_TAG_INITIALIZER	{				\
395 		{TAG_NONE,		"",			\
396 			{0x20202020, 0x20202020, 0x20202020}},	\
397 		{TAG_ACCESS,		"access",		\
398 			{0x61636365, 0x73732020, 0x20202020}},	\
399 		{TAG_CLOSE,		"close",		\
400 			{0x636c6f73, 0x65202020, 0x20202020}},	\
401 		{TAG_CLOSE_LOST,	"lost close",		\
402 			{0x6c6f7374, 0x20636c6f, 0x73652020}},	\
403 		{TAG_CLOSE_UNDO,	"undo close",		\
404 			{0x756e646f, 0x20636c6f, 0x73652020}},	\
405 		{TAG_COMMIT,		"commit",		\
406 			{0x636f6d6d, 0x69742020, 0x20202020}},	\
407 		{TAG_DELEGRETURN,	"delegreturn",		\
408 			{0x64656c65, 0x67726574, 0x75726e20}},	\
409 		{TAG_FSINFO,		"fsinfo",		\
410 			{0x6673696e, 0x666f2020, 0x20202020}},	\
411 		{TAG_GET_SYMLINK,	"get symlink text",	\
412 			{0x67657420, 0x736c6e6b, 0x20747874}},	\
413 		{TAG_GETATTR,		"getattr",		\
414 			{0x67657461, 0x74747220, 0x20202020}},	\
415 		{TAG_GETATTR_FSLOCATION, "getattr fslocation",	\
416 			{0x67657461, 0x74747220, 0x66736c6f}},	\
417 		{TAG_INACTIVE,		"inactive",		\
418 			{0x696e6163, 0x74697665, 0x20202020}},	\
419 		{TAG_LINK,		"link",			\
420 			{0x6c696e6b, 0x20202020, 0x20202020}},	\
421 		{TAG_LOCK,		"lock",			\
422 			{0x6c6f636b, 0x20202020, 0x20202020}},	\
423 		{TAG_LOCK_RECLAIM,	"reclaim lock",		\
424 			{0x7265636c, 0x61696d20, 0x6c6f636b}},	\
425 		{TAG_LOCK_RESEND,	"resend lock",		\
426 			{0x72657365, 0x6e64206c, 0x6f636b20}},	\
427 		{TAG_LOCK_REINSTATE,	"reinstate lock",	\
428 			{0x7265696e, 0x7374206c, 0x6f636b20}},	\
429 		{TAG_LOCK_UNKNOWN,	"unknown lock",		\
430 			{0x756e6b6e, 0x6f776e20, 0x6c6f636b}},	\
431 		{TAG_LOCKT,		"lock test",		\
432 			{0x6c6f636b, 0x5f746573, 0x74202020}},	\
433 		{TAG_LOCKU,		"unlock",		\
434 			{0x756e6c6f, 0x636b2020, 0x20202020}},	\
435 		{TAG_LOCKU_RESEND,	"resend locku",		\
436 			{0x72657365, 0x6e64206c, 0x6f636b75}},	\
437 		{TAG_LOCKU_REINSTATE,	"reinstate unlock",	\
438 			{0x7265696e, 0x73742075, 0x6e6c636b}},	\
439 		{TAG_LOOKUP,		"lookup",		\
440 			{0x6c6f6f6b, 0x75702020, 0x20202020}},	\
441 		{TAG_LOOKUP_PARENT,	"lookup parent",	\
442 			{0x6c6f6f6b, 0x75702070, 0x6172656e}},	\
443 		{TAG_LOOKUP_VALID,	"lookup valid",		\
444 			{0x6c6f6f6b, 0x75702076, 0x616c6964}},	\
445 		{TAG_LOOKUP_VPARENT,	"lookup valid parent",	\
446 			{0x6c6f6f6b, 0x766c6420, 0x7061726e}},	\
447 		{TAG_MKDIR,		"mkdir",		\
448 			{0x6d6b6469, 0x72202020, 0x20202020}},	\
449 		{TAG_MKNOD,		"mknod",		\
450 			{0x6d6b6e6f, 0x64202020, 0x20202020}},	\
451 		{TAG_MOUNT,		"mount",		\
452 			{0x6d6f756e, 0x74202020, 0x20202020}},	\
453 		{TAG_OPEN,		"open",			\
454 			{0x6f70656e, 0x20202020, 0x20202020}},	\
455 		{TAG_OPEN_CONFIRM,	"open confirm",		\
456 			{0x6f70656e, 0x5f636f6e, 0x6669726d}},	\
457 		{TAG_OPEN_CONFIRM_LOST,	"lost open confirm",	\
458 			{0x6c6f7374, 0x206f7065, 0x6e5f636f}},	\
459 		{TAG_OPEN_DG,		"open downgrade",	\
460 			{0x6f70656e, 0x20646772, 0x61646520}},	\
461 		{TAG_OPEN_DG_LOST,	"lost open downgrade",	\
462 			{0x6c737420, 0x6f70656e, 0x20646772}},	\
463 		{TAG_OPEN_LOST,		"lost open",		\
464 			{0x6c6f7374, 0x206f7065, 0x6e202020}},	\
465 		{TAG_OPENATTR,		"openattr",		\
466 			{0x6f70656e, 0x61747472, 0x20202020}},	\
467 		{TAG_PATHCONF,		"pathconf",		\
468 			{0x70617468, 0x636f6e66, 0x20202020}},	\
469 		{TAG_PUTROOTFH,		"putrootfh",		\
470 			{0x70757472, 0x6f6f7466, 0x68202020}},	\
471 		{TAG_READ,		"read",			\
472 			{0x72656164, 0x20202020, 0x20202020}},	\
473 		{TAG_READAHEAD,		"readahead",		\
474 			{0x72656164, 0x61686561, 0x64202020}},	\
475 		{TAG_READDIR,		"readdir",		\
476 			{0x72656164, 0x64697220, 0x20202020}},	\
477 		{TAG_READLINK,		"readlink",		\
478 			{0x72656164, 0x6c696e6b, 0x20202020}},	\
479 		{TAG_RELOCK,		"relock",		\
480 			{0x72656c6f, 0x636b2020, 0x20202020}},	\
481 		{TAG_REMAP_LOOKUP,	"remap lookup",		\
482 			{0x72656d61, 0x70206c6f, 0x6f6b7570}},	\
483 		{TAG_REMAP_LOOKUP_AD,	"remap lookup attr dir",	\
484 			{0x72656d70, 0x206c6b75, 0x70206164}},	\
485 		{TAG_REMAP_LOOKUP_NA,	"remap lookup named attrs",	\
486 			{0x72656d70, 0x206c6b75, 0x70206e61}},	\
487 		{TAG_REMAP_MOUNT,	"remap mount",		\
488 			{0x72656d61, 0x70206d6f, 0x756e7420}},	\
489 		{TAG_RMDIR,		"rmdir",		\
490 			{0x726d6469, 0x72202020, 0x20202020}},	\
491 		{TAG_REMOVE,		"remove",		\
492 			{0x72656d6f, 0x76652020, 0x20202020}},	\
493 		{TAG_RENAME,		"rename",		\
494 			{0x72656e61, 0x6d652020, 0x20202020}},	\
495 		{TAG_RENAME_VFH,	"rename volatile fh",	\
496 			{0x72656e61, 0x6d652028, 0x76666829}},	\
497 		{TAG_RENEW,		"renew",		\
498 			{0x72656e65, 0x77202020, 0x20202020}},	\
499 		{TAG_REOPEN,		"reopen",		\
500 			{0x72656f70, 0x656e2020, 0x20202020}},	\
501 		{TAG_REOPEN_LOST,	"lost reopen",		\
502 			{0x6c6f7374, 0x2072656f, 0x70656e20}},	\
503 		{TAG_SECINFO,		"secinfo",		\
504 			{0x73656369, 0x6e666f20, 0x20202020}},	\
505 		{TAG_SETATTR,		"setattr",		\
506 			{0x73657461, 0x74747220, 0x20202020}},	\
507 		{TAG_SETCLIENTID,	"setclientid",		\
508 			{0x73657463, 0x6c69656e, 0x74696420}},	\
509 		{TAG_SETCLIENTID_CF,	"setclientid_confirm",	\
510 			{0x73636c6e, 0x7469645f, 0x636f6e66}},	\
511 		{TAG_SYMLINK,		"symlink",		\
512 			{0x73796d6c, 0x696e6b20, 0x20202020}},	\
513 		{TAG_WRITE,		"write",		\
514 			{0x77726974, 0x65202020, 0x20202020}}	\
515 	}
516 
517 /*
518  * These flags are for differentiating the search criterian for
519  * find_open_owner().  The comparison is done with the open_owners's
520  * 'oo_just_created' flag.
521  */
522 #define	NFS4_PERM_CREATED	0x0
523 #define	NFS4_JUST_CREATED	0x1
524 
525 /*
526  * Hashed by the cr_uid and cr_ruid of credential 'oo_cred'. 'oo_cred_otw'
527  * is stored upon a successful OPEN.  This is needed when the user's effective
528  * and real uid's don't match.  The 'oo_cred_otw' overrides the credential
529  * passed down by VFS for async read/write, commit, lock, and close operations.
530  *
531  * The oo_ref_count keeps track the number of active references on this
532  * data structure + number of nfs4_open_streams point to this structure.
533  *
534  * 'oo_valid' tells whether this stuct is about to be freed or not.
535  *
536  * 'oo_just_created' tells us whether this struct has just been created but
537  * not been fully finalized (that is created upon an OPEN request and
538  * finalized upon the OPEN success).
539  *
540  * The 'oo_seqid_inuse' is for the open seqid synchronization.  If a thread
541  * is currently using the open owner and it's open_seqid, then it sets the
542  * oo_seqid_inuse to true if it currently is not set.  If it is set then it
543  * does a cv_wait on the oo_cv_seqid_sync condition variable.  When the thread
544  * is done it unsets the oo_seqid_inuse and does a cv_signal to wake a process
545  * waiting on the condition variable.
546  *
547  * 'oo_last_good_seqid' is the last valid seqid this open owner sent OTW,
548  * and 'oo_last_good_op' is the operation that issued the last valid seqid.
549  *
550  * Lock ordering:
551  *	mntinfo4_t::mi_lock > oo_lock (for searching mi_oo_list)
552  *
553  *	oo_seqid_inuse > mntinfo4_t::mi_lock
554  *	oo_seqid_inuse > rnode4_t::r_statelock
555  *	oo_seqid_inuse > rnode4_t::r_statev4_lock
556  *	oo_seqid_inuse > nfs4_open_stream_t::os_sync_lock
557  *
558  * The 'oo_seqid_inuse'/'oo_cv_seqid_sync' protects:
559  *	oo_last_good_op
560  *	oo_last_good_seqid
561  *	oo_name
562  *	oo_seqid
563  *
564  * The 'oo_lock' protects:
565  *	oo_cred
566  *	oo_cred_otw
567  *	oo_foo_node
568  *	oo_hash_node
569  *	oo_just_created
570  *	oo_ref_count
571  *	oo_valid
572  */
573 
574 typedef struct nfs4_open_owner {
575 	cred_t			*oo_cred;
576 	int			oo_ref_count;
577 	int			oo_valid;
578 	int			oo_just_created;
579 	seqid4			oo_seqid;
580 	seqid4			oo_last_good_seqid;
581 	nfs4_tag_type_t		oo_last_good_op;
582 	unsigned		oo_seqid_inuse:1;
583 	cred_t			*oo_cred_otw;
584 	kcondvar_t		oo_cv_seqid_sync;
585 	/*
586 	 * Fix this to always be 8 bytes
587 	 */
588 	uint64_t		oo_name;
589 	list_node_t		oo_hash_node;
590 	list_node_t		oo_foo_node;
591 	kmutex_t		oo_lock;
592 } nfs4_open_owner_t;
593 
594 /*
595  * Static server information.
596  * These fields are read-only once they are initialized; sv_lock
597  * should be held as writer if they are changed during mount:
598  *	sv_addr
599  *	sv_dhsec
600  *	sv_hostname
601  *	sv_hostnamelen
602  *	sv_knconf
603  *	sv_next
604  *	sv_origknconf
605  *
606  * These fields are protected by sv_lock:
607  *	sv_currsec
608  *	sv_fhandle
609  *	sv_flags
610  *	sv_fsid
611  *	sv_path
612  *	sv_pathlen
613  *	sv_pfhandle
614  *	sv_save_secinfo
615  *	sv_savesec
616  *	sv_secdata
617  *	sv_secinfo
618  *	sv_supp_attrs
619  *
620  * Lock ordering:
621  * nfs_rtable4_lock > sv_lock
622  * rnode4_t::r_statelock > sv_lock
623  */
624 typedef struct servinfo4 {
625 	struct knetconfig *sv_knconf;   /* bound TLI fd */
626 	struct knetconfig *sv_origknconf;	/* For RDMA save orig knconf */
627 	struct netbuf	   sv_addr;	/* server's address */
628 	nfs4_fhandle_t	   sv_fhandle;	/* this server's filehandle */
629 	nfs4_fhandle_t	   sv_pfhandle; /* parent dir filehandle */
630 	int		   sv_pathlen;	/* Length of server path */
631 	char		  *sv_path;	/* Path name on server */
632 	uint32_t	   sv_flags;	/* flags for this server */
633 	sec_data_t	  *sv_secdata;	/* client initiated security data */
634 	sv_secinfo_t	  *sv_secinfo;	/* server security information */
635 	sec_data_t	  *sv_currsec;	/* security data currently used; */
636 					/* points to one of the sec_data */
637 					/* entries in sv_secinfo */
638 	sv_secinfo_t	  *sv_save_secinfo; /* saved secinfo */
639 	sec_data_t	  *sv_savesec;	/* saved security data */
640 	sec_data_t	  *sv_dhsec;    /* AUTH_DH data from the user land */
641 	char		  *sv_hostname;	/* server's hostname */
642 	int		   sv_hostnamelen;  /* server's hostname length */
643 	fattr4_fsid		sv_fsid;    /* fsid of shared obj	*/
644 	fattr4_supported_attrs	sv_supp_attrs;
645 	struct servinfo4  *sv_next;	/* next in list */
646 	nfs_rwlock_t	   sv_lock;
647 } servinfo4_t;
648 
649 /* sv_flags fields */
650 #define	SV4_TRYSECINFO		0x001	/* try secinfo data from the server */
651 #define	SV4_TRYSECDEFAULT	0x002	/* try a default flavor */
652 #define	SV4_NOTINUSE		0x004	/* servinfo4_t had fatal errors */
653 #define	SV4_ROOT_STALE		0x008	/* root vnode got ESTALE */
654 
655 /*
656  * Lock call types.  See nfs4frlock().
657  */
658 typedef enum nfs4_lock_call_type {
659 	NFS4_LCK_CTYPE_NORM,
660 	NFS4_LCK_CTYPE_RECLAIM,
661 	NFS4_LCK_CTYPE_RESEND,
662 	NFS4_LCK_CTYPE_REINSTATE
663 } nfs4_lock_call_type_t;
664 
665 /*
666  * This structure holds the information for a lost open/close/open downgrade/
667  * lock/locku request.  It is also used for requests that are queued up so
668  * that the recovery thread can release server state after a forced
669  * unmount.
670  * "lr_op" is 0 if the struct is uninitialized.  Otherwise, it is set to
671  * the proper OP_* nfs_opnum4 number.  The other fields contain information
672  * to reconstruct the call.
673  *
674  * lr_dvp is used for OPENs with CREATE, so that we can do a PUTFH of the
675  * parent directroy without relying on vtodv (since we may not have a vp
676  * for the file we wish to create).
677  *
678  * lr_putfirst means that the request should go to the front of the resend
679  * queue, rather than the end.
680  */
681 typedef struct nfs4_lost_rqst {
682 	list_node_t			lr_node;
683 	nfs_opnum4			lr_op;
684 	vnode_t				*lr_vp;
685 	vnode_t				*lr_dvp;
686 	nfs4_open_owner_t		*lr_oop;
687 	struct nfs4_open_stream		*lr_osp;
688 	struct nfs4_lock_owner		*lr_lop;
689 	cred_t				*lr_cr;
690 	flock64_t			*lr_flk;
691 	bool_t				lr_putfirst;
692 	union {
693 		struct {
694 			nfs4_lock_call_type_t lru_ctype;
695 			nfs_lock_type4	lru_locktype;
696 		} lru_lockargs;		/* LOCK, LOCKU */
697 		struct {
698 			uint32_t		lru_oaccess;
699 			uint32_t		lru_odeny;
700 			enum open_claim_type4	lru_oclaim;
701 			stateid4		lru_ostateid; /* reopen only */
702 			component4		lru_ofile;
703 		} lru_open_args;
704 		struct {
705 			uint32_t	lru_dg_access;
706 			uint32_t	lru_dg_deny;
707 		} lru_open_dg_args;
708 	} nfs4_lr_u;
709 } nfs4_lost_rqst_t;
710 
711 #define	lr_oacc		nfs4_lr_u.lru_open_args.lru_oaccess
712 #define	lr_odeny	nfs4_lr_u.lru_open_args.lru_odeny
713 #define	lr_oclaim	nfs4_lr_u.lru_open_args.lru_oclaim
714 #define	lr_ostateid	nfs4_lr_u.lru_open_args.lru_ostateid
715 #define	lr_ofile	nfs4_lr_u.lru_open_args.lru_ofile
716 #define	lr_dg_acc	nfs4_lr_u.lru_open_dg_args.lru_dg_access
717 #define	lr_dg_deny	nfs4_lr_u.lru_open_dg_args.lru_dg_deny
718 #define	lr_ctype	nfs4_lr_u.lru_lockargs.lru_ctype
719 #define	lr_locktype	nfs4_lr_u.lru_lockargs.lru_locktype
720 
721 /*
722  * Recovery actions.  Some actions can imply further recovery using a
723  * different recovery action (e.g., recovering the clientid leads to
724  * recovering open files and locks).
725  */
726 
727 typedef enum {
728 	NR_UNUSED,
729 	NR_CLIENTID,
730 	NR_OPENFILES,
731 	NR_FHEXPIRED,
732 	NR_FAILOVER,
733 	NR_WRONGSEC,
734 	NR_EXPIRED,
735 	NR_BAD_STATEID,
736 	NR_BADHANDLE,
737 	NR_BAD_SEQID,
738 	NR_OLDSTATEID,
739 	NR_GRACE,
740 	NR_DELAY,
741 	NR_LOST_LOCK,
742 	NR_LOST_STATE_RQST,
743 	NR_STALE,
744 	NR_MOVED
745 } nfs4_recov_t;
746 
747 /*
748  * Administrative and debug message framework.
749  */
750 
751 #define	NFS4_MSG_MAX	100
752 extern int nfs4_msg_max;
753 
754 #define	NFS4_REFERRAL_LOOP_MAX	20
755 
756 typedef enum {
757 	RE_BAD_SEQID,
758 	RE_BADHANDLE,
759 	RE_CLIENTID,
760 	RE_DEAD_FILE,
761 	RE_END,
762 	RE_FAIL_RELOCK,
763 	RE_FAIL_REMAP_LEN,
764 	RE_FAIL_REMAP_OP,
765 	RE_FAILOVER,
766 	RE_FILE_DIFF,
767 	RE_LOST_STATE,
768 	RE_OPENS_CHANGED,
769 	RE_SIGLOST,
770 	RE_SIGLOST_NO_DUMP,
771 	RE_START,
772 	RE_UNEXPECTED_ACTION,
773 	RE_UNEXPECTED_ERRNO,
774 	RE_UNEXPECTED_STATUS,
775 	RE_WRONGSEC,
776 	RE_LOST_STATE_BAD_OP,
777 	RE_REFERRAL
778 } nfs4_event_type_t;
779 
780 typedef enum {
781 	RFS_NO_INSPECT,
782 	RFS_INSPECT
783 } nfs4_fact_status_t;
784 
785 typedef enum {
786 	RF_BADOWNER,
787 	RF_ERR,
788 	RF_RENEW_EXPIRED,
789 	RF_SRV_NOT_RESPOND,
790 	RF_SRV_OK,
791 	RF_SRVS_NOT_RESPOND,
792 	RF_SRVS_OK,
793 	RF_DELMAP_CB_ERR,
794 	RF_SENDQ_FULL
795 } nfs4_fact_type_t;
796 
797 typedef enum {
798 	NFS4_MS_DUMP,
799 	NFS4_MS_NO_DUMP
800 } nfs4_msg_status_t;
801 
802 typedef struct nfs4_rfact {
803 	nfs4_fact_type_t	rf_type;
804 	nfs4_fact_status_t	rf_status;
805 	bool_t			rf_reboot;
806 	nfs4_recov_t		rf_action;
807 	nfs_opnum4		rf_op;
808 	nfsstat4		rf_stat4;
809 	timespec_t		rf_time;
810 	int			rf_error;
811 	struct rnode4		*rf_rp1;
812 	char			*rf_char1;
813 } nfs4_rfact_t;
814 
815 typedef struct nfs4_revent {
816 	nfs4_event_type_t	re_type;
817 	nfsstat4		re_stat4;
818 	uint_t			re_uint;
819 	pid_t			re_pid;
820 	struct mntinfo4		*re_mi;
821 	struct rnode4		*re_rp1;
822 	struct rnode4		*re_rp2;
823 	char			*re_char1;
824 	char			*re_char2;
825 	nfs4_tag_type_t		re_tag1;
826 	nfs4_tag_type_t		re_tag2;
827 	seqid4			re_seqid1;
828 	seqid4			re_seqid2;
829 } nfs4_revent_t;
830 
831 typedef enum {
832 	RM_EVENT,
833 	RM_FACT
834 } nfs4_msg_type_t;
835 
836 typedef struct nfs4_debug_msg {
837 	timespec_t		msg_time;
838 	nfs4_msg_type_t		msg_type;
839 	char			*msg_srv;
840 	char			*msg_mntpt;
841 	union {
842 		nfs4_rfact_t	msg_fact;
843 		nfs4_revent_t	msg_event;
844 	} rmsg_u;
845 	nfs4_msg_status_t	msg_status;
846 	list_node_t		msg_node;
847 } nfs4_debug_msg_t;
848 
849 /*
850  * NFS private data per mounted file system
851  *	The mi_lock mutex protects the following fields:
852  *		mi_flags
853  *		mi_in_recovery
854  *		mi_recovflags
855  *		mi_recovthread
856  *		mi_error
857  *		mi_printed
858  *		mi_down
859  *		mi_stsize
860  *		mi_curread
861  *		mi_curwrite
862  *		mi_timers
863  *		mi_curr_serv
864  *		mi_klmconfig
865  *		mi_oo_list
866  *		mi_foo_list
867  *		mi_foo_num
868  *		mi_foo_max
869  *		mi_lost_state
870  *		mi_bseqid_list
871  *		mi_ephemeral
872  *		mi_ephemeral_tree
873  *
874  *	Normally the netconfig information for the mount comes from
875  *	mi_curr_serv and mi_klmconfig is NULL.  If NLM calls need to use a
876  *	different transport, mi_klmconfig contains the necessary netconfig
877  *	information.
878  *
879  *	The mi_async_lock mutex protects the following fields:
880  *		mi_async_reqs
881  *		mi_async_req_count
882  *		mi_async_tail
883  *		mi_async_curr[NFS4_MAX_ASYNC_QUEUES]
884  *		mi_async_clusters
885  *		mi_async_init_clusters
886  *		mi_threads[NFS4_MAX_ASYNC_QUEUES]
887  *		mi_inactive_thread
888  *		mi_manager_thread
889  *
890  *	The nfs4_server_t::s_lock protects the following fields:
891  *		mi_clientid
892  *		mi_clientid_next
893  *		mi_clientid_prev
894  *		mi_open_files
895  *
896  *	The mntinfo4_t::mi_recovlock protects the following fields:
897  *		mi_srvsettime
898  *		mi_srvset_cnt
899  *		mi_srv
900  *
901  * Changing mi_srv from one nfs4_server_t to a different one requires
902  * holding the mi_recovlock as RW_WRITER.
903  * Exception: setting mi_srv the first time in mount/mountroot is done
904  * holding the mi_recovlock as RW_READER.
905  *
906  *	Locking order:
907  *	  mi4_globals::mig_lock > mi_async_lock
908  *	  mi_async_lock > nfs4_server_t::s_lock > mi_lock
909  *	  mi_recovlock > mi_rename_lock > nfs_rtable4_lock
910  *	  nfs4_server_t::s_recovlock > mi_recovlock
911  *	  rnode4_t::r_rwlock > mi_rename_lock
912  *	  nfs_rtable4_lock > mi_lock
913  *	  nfs4_server_t::s_lock > mi_msg_list_lock
914  *	  mi_recovlock > nfs4_server_t::s_lock
915  *	  mi_recovlock > nfs4_server_lst_lock
916  *
917  * The 'mi_oo_list' represents the hash buckets that contain the
918  * nfs4_open_owenrs for this particular mntinfo4.
919  *
920  * The 'mi_foo_list' represents the freed nfs4_open_owners for this mntinfo4.
921  * 'mi_foo_num' is the current number of freed open owners on the list,
922  * 'mi_foo_max' is the maximum number of freed open owners that are allowable
923  * on the list.
924  *
925  * mi_rootfh and mi_srvparentfh are read-only once created, but that just
926  * refers to the pointer.  The contents must be updated to keep in sync
927  * with mi_curr_serv.
928  *
929  * The mi_msg_list_lock protects against adding/deleting entries to the
930  * mi_msg_list, and also the updating/retrieving of mi_lease_period;
931  *
932  * 'mi_zone' is initialized at structure creation time, and never
933  * changes; it may be read without a lock.
934  *
935  * mi_zone_node is linkage into the mi4_globals.mig_list, and is
936  * protected by mi4_globals.mig_list_lock.
937  *
938  * If MI4_EPHEMERAL is set in mi_flags, then mi_ephemeral points to an
939  * ephemeral structure for this ephemeral mount point. It can not be
940  * NULL. Also, mi_ephemeral_tree points to the root of the ephemeral
941  * tree.
942  *
943  * If MI4_EPHEMERAL is not set in mi_flags, then mi_ephemeral has
944  * to be NULL. If mi_ephemeral_tree is non-NULL, then this node
945  * is the enclosing mntinfo4 for the ephemeral tree.
946  */
947 struct zone;
948 struct nfs4_ephemeral;
949 struct nfs4_ephemeral_tree;
950 struct nfs4_server;
951 typedef struct mntinfo4 {
952 	kmutex_t	mi_lock;	/* protects mntinfo4 fields */
953 	struct servinfo4 *mi_servers;   /* server list */
954 	struct servinfo4 *mi_curr_serv; /* current server */
955 	struct nfs4_sharedfh *mi_rootfh; /* root filehandle */
956 	struct nfs4_sharedfh *mi_srvparentfh; /* root's parent on server */
957 	kcondvar_t	mi_failover_cv;	/* failover synchronization */
958 	struct vfs	*mi_vfsp;	/* back pointer to vfs */
959 	enum vtype	mi_type;	/* file type of the root vnode */
960 	uint_t		mi_flags;	/* see below */
961 	uint_t		mi_recovflags;	/* if recovery active; see below */
962 	kthread_t	*mi_recovthread; /* active recov thread or NULL */
963 	uint_t		mi_error;	/* only set/valid when MI4_RECOV_FAIL */
964 					/* is set in mi_flags */
965 	int		mi_tsize;	/* transfer size (bytes) */
966 					/* really read size */
967 	int		mi_stsize;	/* server's max transfer size (bytes) */
968 					/* really write size */
969 	int		mi_timeo;	/* inital timeout in 10th sec */
970 	int		mi_retrans;	/* times to retry request */
971 	hrtime_t	mi_acregmin;	/* min time to hold cached file attr */
972 	hrtime_t	mi_acregmax;	/* max time to hold cached file attr */
973 	hrtime_t	mi_acdirmin;	/* min time to hold cached dir attr */
974 	hrtime_t	mi_acdirmax;	/* max time to hold cached dir attr */
975 	len_t		mi_maxfilesize; /* for pathconf _PC_FILESIZEBITS */
976 	int		mi_curread;	/* current read size */
977 	int		mi_curwrite;	/* current write size */
978 	uint_t 		mi_count; 	/* ref count */
979 	/*
980 	 * Async I/O management
981 	 * We have 2 pools of threads working on async I/O:
982 	 * 	(1) Threads which work on all async queues. Default number of
983 	 *	threads in this queue is 8. Threads in this pool work on async
984 	 *	queue pointed by mi_async_curr[NFS4_ASYNC_QUEUE]. Number of
985 	 *	active threads in this pool is tracked by
986 	 *	mi_threads[NFS4_ASYNC_QUEUE].
987 	 * 	(ii)Threads which work only on page op async queues.
988 	 *	Page ops queue comprises of NFS4_PUTAPAGE, NFS4_PAGEIO &
989 	 *	NFS4_COMMIT. Default number of threads in this queue is 2
990 	 *	(NUM_ASYNC_PGOPS_THREADS). Threads in this pool work on async
991 	 *	queue pointed by mi_async_curr[NFS4_ASYNC_PGOPS_QUEUE]. Number
992 	 *	of active threads in this pool is tracked by
993 	 *	mi_threads[NFS4_ASYNC_PGOPS_QUEUE].
994 	 *
995 	 * In addition to above two pools, there is always one thread that
996 	 * handles over-the-wire requests for VOP_INACTIVE.
997 	 */
998 	struct nfs4_async_reqs *mi_async_reqs[NFS4_ASYNC_TYPES];
999 	struct nfs4_async_reqs *mi_async_tail[NFS4_ASYNC_TYPES];
1000 	struct nfs4_async_reqs **mi_async_curr[NFS4_MAX_ASYNC_QUEUES];
1001 						/* current async queue */
1002 	uint_t		mi_async_clusters[NFS4_ASYNC_TYPES];
1003 	uint_t		mi_async_init_clusters;
1004 	uint_t		mi_async_req_count; /* # outstanding work requests */
1005 	kcondvar_t	mi_async_reqs_cv; /* signaled when there's work */
1006 	ushort_t	mi_threads[NFS4_MAX_ASYNC_QUEUES];
1007 					/* number of active async threads */
1008 	ushort_t	mi_max_threads;	/* max number of async threads */
1009 	kthread_t	*mi_manager_thread; /* async manager thread id */
1010 	kthread_t	*mi_inactive_thread; /* inactive thread id */
1011 	kcondvar_t	mi_inact_req_cv; /* notify VOP_INACTIVE thread */
1012 	kcondvar_t	mi_async_work_cv[NFS4_MAX_ASYNC_QUEUES];
1013 					/* tell workers to work */
1014 	kcondvar_t	mi_async_cv;	/* all pool threads exited */
1015 	kmutex_t	mi_async_lock;
1016 	/*
1017 	 * Other stuff
1018 	 */
1019 	struct pathcnf	*mi_pathconf;	/* static pathconf kludge */
1020 	rpcprog_t	mi_prog;	/* RPC program number */
1021 	rpcvers_t	mi_vers;	/* RPC program version number */
1022 	char		**mi_rfsnames;	/* mapping to proc names */
1023 	kstat_named_t	*mi_reqs;	/* count of requests */
1024 	clock_t		mi_printftime;	/* last error printf time */
1025 	nfs_rwlock_t	mi_recovlock;	/* separate ops from recovery (v4) */
1026 	time_t		mi_grace_wait;	/* non-zero represents time to wait */
1027 	/* when we switched nfs4_server_t - only for observability purposes */
1028 	time_t		mi_srvsettime;
1029 	nfs_rwlock_t	mi_rename_lock;	/* atomic volfh rename  */
1030 	struct nfs4_fname *mi_fname;	/* root fname */
1031 	list_t		mi_lost_state;	/* resend list */
1032 	list_t		mi_bseqid_list; /* bad seqid list */
1033 	/*
1034 	 * Client Side Failover stats
1035 	 */
1036 	uint_t		mi_noresponse;	/* server not responding count */
1037 	uint_t		mi_failover; 	/* failover to new server count */
1038 	uint_t		mi_remap;	/* remap to new server count */
1039 	/*
1040 	 * Kstat statistics
1041 	 */
1042 	struct kstat	*mi_io_kstats;
1043 	struct kstat	*mi_ro_kstats;
1044 	kstat_t		*mi_recov_ksp;	/* ptr to the recovery kstat */
1045 
1046 	/*
1047 	 * Volatile fh flags (nfsv4)
1048 	 */
1049 	uint32_t	mi_fh_expire_type;
1050 	/*
1051 	 * Lease Management
1052 	 */
1053 	struct mntinfo4	*mi_clientid_next;
1054 	struct mntinfo4	*mi_clientid_prev;
1055 	clientid4	mi_clientid; /* redundant info found in nfs4_server */
1056 	int		mi_open_files;	/* count of open files */
1057 	int		mi_in_recovery;	/* count of recovery instances */
1058 	kcondvar_t	mi_cv_in_recov; /* cv for recovery threads */
1059 	/*
1060 	 * Open owner stuff.
1061 	 */
1062 	struct nfs4_oo_hash_bucket	mi_oo_list[NFS4_NUM_OO_BUCKETS];
1063 	list_t				mi_foo_list;
1064 	int				mi_foo_num;
1065 	int				mi_foo_max;
1066 	/*
1067 	 * Shared filehandle pool.
1068 	 */
1069 	nfs_rwlock_t			mi_fh_lock;
1070 	avl_tree_t			mi_filehandles;
1071 
1072 	/*
1073 	 * Debug message queue.
1074 	 */
1075 	list_t			mi_msg_list;
1076 	int			mi_msg_count;
1077 	time_t			mi_lease_period;
1078 					/*
1079 					 * not guaranteed to be accurate.
1080 					 * only should be used by debug queue.
1081 					 */
1082 	kmutex_t		mi_msg_list_lock;
1083 	/*
1084 	 * Zones support.
1085 	 */
1086 	struct zone	*mi_zone;	/* Zone in which FS is mounted */
1087 	zone_ref_t	mi_zone_ref;	/* Reference to aforementioned zone */
1088 	list_node_t	mi_zone_node;  /* linkage into per-zone mi list */
1089 
1090 	/*
1091 	 * Links for unmounting ephemeral mounts.
1092 	 */
1093 	struct nfs4_ephemeral		*mi_ephemeral;
1094 	struct nfs4_ephemeral_tree	*mi_ephemeral_tree;
1095 
1096 	uint_t mi_srvset_cnt; /* increment when changing the nfs4_server_t */
1097 	struct nfs4_server *mi_srv; /* backpointer to nfs4_server_t */
1098 	/*
1099 	 * Referral related info.
1100 	 */
1101 	int		mi_vfs_referral_loop_cnt;
1102 	/*
1103 	 * List of rnode4_t structures that belongs to this mntinfo4
1104 	 */
1105 	kmutex_t	mi_rnodes_lock;	/* protects the mi_rnodes list */
1106 	list_t		mi_rnodes;	/* the list */
1107 } mntinfo4_t;
1108 
1109 /*
1110  * The values for mi_flags.
1111  *
1112  *	MI4_HARD		 hard or soft mount
1113  *	MI4_PRINTED		 responding message printed
1114  *	MI4_INT			 allow INTR on hard mount
1115  * 	MI4_DOWN		 server is down
1116  *	MI4_NOAC		 don't cache attributes
1117  *	MI4_NOCTO		 no close-to-open consistency
1118  *	MI4_LLOCK		 local locking only (no lockmgr)
1119  *	MI4_GRPID		 System V group id inheritance
1120  *	MI4_SHUTDOWN		 System is rebooting or shutting down
1121  *	MI4_LINK		 server supports link
1122  *	MI4_SYMLINK		 server supports symlink
1123  *	MI4_EPHEMERAL_RECURSED	 an ephemeral mount being unmounted
1124  *				 due to a recursive call - no need
1125  *				 for additional recursion
1126  *	MI4_ACL			 server supports NFSv4 ACLs
1127  *	MI4_MIRRORMOUNT		 is a mirrormount
1128  *	MI4_NOPRINT		 don't print messages
1129  *	MI4_DIRECTIO		 do direct I/O
1130  *	MI4_RECOV_ACTIV		 filesystem has recovery a thread
1131  *	MI4_REMOVE_ON_LAST_CLOSE remove from server's list
1132  *	MI4_RECOV_FAIL		 client recovery failed
1133  *	MI4_PUBLIC		 public/url option used
1134  *	MI4_MOUNTING		 mount in progress, don't failover
1135  *	MI4_POSIX_LOCK		 if server is using POSIX locking
1136  *	MI4_LOCK_DEBUG		 cmn_err'd posix lock err msg
1137  *	MI4_DEAD		 zone has released it
1138  *	MI4_INACTIVE_IDLE	 inactive thread idle
1139  *	MI4_BADOWNER_DEBUG	 badowner error msg per mount
1140  *	MI4_ASYNC_MGR_STOP	 tell async manager to die
1141  *	MI4_TIMEDOUT		 saw a timeout during zone shutdown
1142  *	MI4_EPHEMERAL		 is an ephemeral mount
1143  */
1144 #define	MI4_HARD		 0x1
1145 #define	MI4_PRINTED		 0x2
1146 #define	MI4_INT			 0x4
1147 #define	MI4_DOWN		 0x8
1148 #define	MI4_NOAC		 0x10
1149 #define	MI4_NOCTO		 0x20
1150 #define	MI4_LLOCK		 0x80
1151 #define	MI4_GRPID		 0x100
1152 #define	MI4_SHUTDOWN		 0x200
1153 #define	MI4_LINK		 0x400
1154 #define	MI4_SYMLINK		 0x800
1155 #define	MI4_EPHEMERAL_RECURSED	 0x1000
1156 #define	MI4_ACL			 0x2000
1157 /* MI4_MIRRORMOUNT is also defined in nfsstat.c */
1158 #define	MI4_MIRRORMOUNT		 0x4000
1159 #define	MI4_REFERRAL		 0x8000
1160 /* 0x10000 is available */
1161 #define	MI4_NOPRINT		 0x20000
1162 #define	MI4_DIRECTIO		 0x40000
1163 /* 0x80000 is available */
1164 #define	MI4_RECOV_ACTIV		 0x100000
1165 #define	MI4_REMOVE_ON_LAST_CLOSE 0x200000
1166 #define	MI4_RECOV_FAIL		 0x400000
1167 #define	MI4_PUBLIC		 0x800000
1168 #define	MI4_MOUNTING		 0x1000000
1169 #define	MI4_POSIX_LOCK		 0x2000000
1170 #define	MI4_LOCK_DEBUG		 0x4000000
1171 #define	MI4_DEAD		 0x8000000
1172 #define	MI4_INACTIVE_IDLE	 0x10000000
1173 #define	MI4_BADOWNER_DEBUG	 0x20000000
1174 #define	MI4_ASYNC_MGR_STOP	 0x40000000
1175 #define	MI4_TIMEDOUT		 0x80000000
1176 
1177 #define	MI4_EPHEMERAL		(MI4_MIRRORMOUNT | MI4_REFERRAL)
1178 
1179 #define	INTR4(vp)	(VTOMI4(vp)->mi_flags & MI4_INT)
1180 
1181 #define	FAILOVER_MOUNT4(mi)	(mi->mi_servers->sv_next)
1182 
1183 /*
1184  * Recovery flags.
1185  *
1186  * MI4R_NEED_CLIENTID is sort of redundant (it's the nfs4_server_t flag
1187  * that's important), but some flag is needed to indicate that recovery is
1188  * going on for the filesystem.
1189  */
1190 #define	MI4R_NEED_CLIENTID	0x1
1191 #define	MI4R_REOPEN_FILES	0x2
1192 #define	MI4R_NEED_SECINFO	0x4
1193 #define	MI4R_NEED_NEW_SERVER	0x8
1194 #define	MI4R_REMAP_FILES	0x10
1195 #define	MI4R_SRV_REBOOT		0x20	/* server has rebooted */
1196 #define	MI4R_LOST_STATE		0x40
1197 #define	MI4R_BAD_SEQID		0x80
1198 #define	MI4R_MOVED		0x100
1199 
1200 #define	MI4_HOLD(mi) {		\
1201 	mi_hold(mi);		\
1202 }
1203 
1204 #define	MI4_RELE(mi) {		\
1205 	mi_rele(mi);		\
1206 }
1207 
1208 /*
1209  * vfs pointer to mount info
1210  */
1211 #define	VFTOMI4(vfsp)	((mntinfo4_t *)((vfsp)->vfs_data))
1212 
1213 /*
1214  * vnode pointer to mount info
1215  */
1216 #define	VTOMI4(vp)	((mntinfo4_t *)(((vp)->v_vfsp)->vfs_data))
1217 
1218 /*
1219  * Lease Management
1220  *
1221  * lease_valid is initially set to NFS4_LEASE_NOT_STARTED.  This is when the
1222  * nfs4_server is first created.  lease_valid is then set to
1223  * NFS4_LEASE_UNITIALIZED when the renew thread is started.  The extra state of
1224  * NFS4_LEASE_NOT_STARTED is needed for client recovery (so we know if a thread
1225  * already exists when we do SETCLIENTID).  lease_valid is then set to
1226  * NFS4_LEASE_VALID (if it is at NFS4_LEASE_UNITIALIZED) when a state creating
1227  * operation (OPEN) is done. lease_valid stays at NFS4_LEASE_VALID as long as
1228  * the lease is renewed.  It is set to NFS4_LEASE_INVALID when the lease
1229  * expires.  Client recovery is needed to set the lease back to
1230  * NFS4_LEASE_VALID from NFS4_LEASE_INVALID.
1231  *
1232  * The s_cred is the credential used to mount the first file system for this
1233  * server.  It used as the credential for the renew thread's calls to the
1234  * server.
1235  *
1236  * The renew thread waits on the condition variable cv_thread_exit.  If the cv
1237  * is signalled, then the thread knows it must check s_thread_exit to see if
1238  * it should exit.  The cv is signaled when the last file system is unmounted
1239  * from a particular server.  s_thread_exit is set to 0 upon thread startup,
1240  * and set to NFS4_THREAD_EXIT, when the last file system is unmounted thereby
1241  * telling the thread to exit.  s_thread_exit is needed to avoid spurious
1242  * wakeups.
1243  *
1244  * state_ref_count is incremented every time a new file is opened and
1245  * decremented every time a file is closed otw.  This keeps track of whether
1246  * the nfs4_server has state associated with it or not.
1247  *
1248  * s_refcnt is the reference count for storage management of the struct
1249  * itself.
1250  *
1251  * mntinfo4_list points to the doubly linked list of mntinfo4s that share
1252  * this nfs4_server (ie: <clientid, saddr> pair) in the current zone.  This is
1253  * needed for a nfs4_server to get a mntinfo4 for use in rfs4call.
1254  *
1255  * s_recovlock is used to synchronize recovery operations.  The thread
1256  * that is recovering the client must acquire it as a writer.  If the
1257  * thread is using the clientid (including recovery operations on other
1258  * state), acquire it as a reader.
1259  *
1260  * The 's_otw_call_count' keeps track of the number of outstanding over the
1261  * wire requests for this structure.  The struct will not go away as long
1262  * as this is non-zero (or s_refcnt is non-zero).
1263  *
1264  * The 's_cv_otw_count' is used in conjuntion with the 's_otw_call_count'
1265  * variable to let the renew thread when an outstanding otw request has
1266  * finished.
1267  *
1268  * 'zoneid' and 'zone_globals' are set at creation of this structure
1269  * and are read-only after that; no lock is required to read them.
1270  *
1271  * s_lock protects: everything except cv_thread_exit and s_recovlock.
1272  *
1273  * s_program is used as the index into the nfs4_callback_globals's
1274  * nfs4prog2server table.  When a callback request comes in, we can
1275  * use that request's program number (minus NFS4_CALLBACK) as an index
1276  * into the nfs4prog2server.  That entry will hold the nfs4_server_t ptr.
1277  * We can then access that nfs4_server_t and its 's_deleg_list' (its list of
1278  * delegated rnode4_ts).
1279  *
1280  * Lock order:
1281  * nfs4_server::s_lock > mntinfo4::mi_lock
1282  * nfs_rtable4_lock > s_lock
1283  * nfs4_server_lst_lock > s_lock
1284  * s_recovlock > s_lock
1285  */
1286 struct nfs4_callback_globals;
1287 
1288 typedef struct nfs4_server {
1289 	struct nfs4_server	*forw;
1290 	struct nfs4_server	*back;
1291 	struct netbuf		saddr;
1292 	uint_t			s_flags; /* see below */
1293 	uint_t			s_refcnt;
1294 	clientid4		clientid;	/* what we get from server */
1295 	nfs_client_id4		clidtosend;	/* what we send to server */
1296 	mntinfo4_t		*mntinfo4_list;
1297 	int			lease_valid;
1298 	time_t			s_lease_time;
1299 	time_t			last_renewal_time;
1300 	timespec_t		propagation_delay;
1301 	cred_t			*s_cred;
1302 	kcondvar_t		cv_thread_exit;
1303 	int			s_thread_exit;
1304 	int			state_ref_count;
1305 	int			s_otw_call_count;
1306 	kcondvar_t		s_cv_otw_count;
1307 	kcondvar_t		s_clientid_pend;
1308 	kmutex_t		s_lock;
1309 	list_t			s_deleg_list;
1310 	rpcprog_t		s_program;
1311 	nfs_rwlock_t		s_recovlock;
1312 	kcondvar_t		wait_cb_null; /* used to wait for CB_NULL */
1313 	zoneid_t		zoneid;	/* zone using this nfs4_server_t */
1314 	struct nfs4_callback_globals *zone_globals;	/* globals */
1315 } nfs4_server_t;
1316 
1317 /* nfs4_server flags */
1318 #define	N4S_CLIENTID_SET	1	/* server has our clientid */
1319 #define	N4S_CLIENTID_PEND	0x2	/* server doesn't have clientid */
1320 #define	N4S_CB_PINGED		0x4	/* server has sent us a CB_NULL */
1321 #define	N4S_CB_WAITER		0x8	/* is/has wait{ing/ed} for cb_null */
1322 #define	N4S_INSERTED		0x10	/* list has reference for server */
1323 #define	N4S_BADOWNER_DEBUG	0x20	/* bad owner err msg per client */
1324 
1325 #define	N4S_CB_PAUSE_TIME	10000	/* Amount of time to pause (10ms) */
1326 
1327 struct lease_time_arg {
1328 	time_t	lease_time;
1329 };
1330 
1331 enum nfs4_delegreturn_policy {
1332 	IMMEDIATE,
1333 	FIRSTCLOSE,
1334 	LASTCLOSE,
1335 	INACTIVE
1336 };
1337 
1338 /*
1339  * Operation hints for the recovery framework (mostly).
1340  *
1341  * EXCEPTIONS:
1342  * OH_ACCESS, OH_GETACL, OH_GETATTR, OH_LOOKUP, OH_READDIR
1343  *	These hints exist to allow user visit/readdir a R4SRVSTUB dir.
1344  *	(dir represents the root of a server fs that has not yet been
1345  *	mounted at client)
1346  */
1347 typedef enum {
1348 	OH_OTHER,
1349 	OH_READ,
1350 	OH_WRITE,
1351 	OH_COMMIT,
1352 	OH_VFH_RENAME,
1353 	OH_MOUNT,
1354 	OH_CLOSE,
1355 	OH_LOCKU,
1356 	OH_DELEGRETURN,
1357 	OH_ACCESS,
1358 	OH_GETACL,
1359 	OH_GETATTR,
1360 	OH_LOOKUP,
1361 	OH_READDIR
1362 } nfs4_op_hint_t;
1363 
1364 /*
1365  * This data structure is used to track ephemeral mounts for both
1366  * mirror mounts and referrals.
1367  *
1368  * Note that each nfs4_ephemeral can only have one other nfs4_ephemeral
1369  * pointing at it. So we don't need two backpointers to walk
1370  * back up the tree.
1371  *
1372  * An ephemeral tree is pointed to by an enclosing non-ephemeral
1373  * mntinfo4. The root is also pointed to by its ephemeral
1374  * mntinfo4. ne_child will get us back to it, while ne_prior
1375  * will get us back to the non-ephemeral mntinfo4. This is an
1376  * edge case we will need to be wary of when walking back up the
1377  * tree.
1378  *
1379  * The way we handle this edge case is to have ne_prior be NULL
1380  * for the root nfs4_ephemeral node.
1381  */
1382 typedef struct nfs4_ephemeral {
1383 	mntinfo4_t		*ne_mount;	/* who encloses us */
1384 	struct nfs4_ephemeral	*ne_child;	/* first child node */
1385 	struct nfs4_ephemeral	*ne_peer;	/* next sibling */
1386 	struct nfs4_ephemeral	*ne_prior;	/* who points at us */
1387 	time_t			ne_ref_time;	/* time last referenced */
1388 	uint_t			ne_mount_to;	/* timeout at */
1389 	int			ne_state;	/* used to traverse */
1390 } nfs4_ephemeral_t;
1391 
1392 /*
1393  * State for the node (set in ne_state):
1394  */
1395 #define	NFS4_EPHEMERAL_OK		0x0
1396 #define	NFS4_EPHEMERAL_VISIT_CHILD	0x1
1397 #define	NFS4_EPHEMERAL_VISIT_SIBLING	0x2
1398 #define	NFS4_EPHEMERAL_PROCESS_ME	0x4
1399 #define	NFS4_EPHEMERAL_CHILD_ERROR	0x8
1400 #define	NFS4_EPHEMERAL_PEER_ERROR	0x10
1401 
1402 /*
1403  * These are the locks used in processing ephemeral data:
1404  *
1405  * mi->mi_lock
1406  *
1407  * net->net_tree_lock
1408  *     This lock is used to gate all tree operations.
1409  *     If it is held, then no other process may
1410  *     traverse the tree. This allows us to not
1411  *     throw a hold on each vfs_t in the tree.
1412  *     Can be held for a "long" time.
1413  *
1414  * net->net_cnt_lock
1415  *     Used to protect refcnt and status.
1416  *     Must be held for a really short time.
1417  *
1418  * nfs4_ephemeral_thread_lock
1419  *     Is only held to create the harvester for the zone.
1420  *     There is no ordering imposed on it.
1421  *     Held for a really short time.
1422  *
1423  * Some further detail on the interactions:
1424  *
1425  * net_tree_lock controls access to net_root. Access needs to first be
1426  * attempted in a non-blocking check.
1427  *
1428  * net_cnt_lock controls access to net_refcnt and net_status. It must only be
1429  * held for very short periods of time, unless the refcnt is 0 and the status
1430  * is INVALID.
1431  *
1432  * Before a caller can grab net_tree_lock, it must first grab net_cnt_lock
1433  * to bump the net_refcnt. It then releases it and does the action specific
1434  * algorithm to get the net_tree_lock. Once it has that, then it is okay to
1435  * grab the net_cnt_lock and change the status. The status can only be
1436  * changed if the caller has the net_tree_lock held as well.
1437  *
1438  * Note that the initial grab of net_cnt_lock must occur whilst
1439  * mi_lock is being held. This prevents stale data in that if the
1440  * ephemeral tree is non-NULL, then the harvester can not remove
1441  * the tree from the mntinfo node until it grabs that lock. I.e.,
1442  * we get the pointer to the tree and hold the lock atomically
1443  * with respect to being in mi_lock.
1444  *
1445  * When a caller is done with net_tree_lock, it can decrement the net_refcnt
1446  * either before it releases net_tree_lock or after.
1447  *
1448  * In either event, to decrement net_refcnt, it must hold net_cnt_lock.
1449  *
1450  * Note that the overall locking scheme for the nodes is to control access
1451  * via the tree. The current scheme could easily be extended such that
1452  * the enclosing root referenced a "forest" of trees. The underlying trees
1453  * would be autonomous with respect to locks.
1454  *
1455  * Note that net_next is controlled by external locks
1456  * particular to the data structure that the tree is being added to.
1457  */
1458 typedef struct nfs4_ephemeral_tree {
1459 	mntinfo4_t			*net_mount;
1460 	nfs4_ephemeral_t		*net_root;
1461 	struct nfs4_ephemeral_tree	*net_next;
1462 	kmutex_t			net_tree_lock;
1463 	kmutex_t			net_cnt_lock;
1464 	uint_t				net_status;
1465 	uint_t				net_refcnt;
1466 } nfs4_ephemeral_tree_t;
1467 
1468 /*
1469  * State for the tree (set in net_status):
1470  */
1471 #define	NFS4_EPHEMERAL_TREE_OK		0x0
1472 #define	NFS4_EPHEMERAL_TREE_BUILDING	0x1
1473 #define	NFS4_EPHEMERAL_TREE_DEROOTING	0x2
1474 #define	NFS4_EPHEMERAL_TREE_INVALID	0x4
1475 #define	NFS4_EPHEMERAL_TREE_MOUNTING	0x8
1476 #define	NFS4_EPHEMERAL_TREE_UMOUNTING	0x10
1477 #define	NFS4_EPHEMERAL_TREE_LOCKED	0x20
1478 
1479 #define	NFS4_EPHEMERAL_TREE_PROCESSING	(NFS4_EPHEMERAL_TREE_DEROOTING | \
1480 	NFS4_EPHEMERAL_TREE_INVALID | NFS4_EPHEMERAL_TREE_UMOUNTING | \
1481 	NFS4_EPHEMERAL_TREE_LOCKED)
1482 
1483 /*
1484  * This macro evaluates to non-zero if the given op releases state at the
1485  * server.
1486  */
1487 #define	OH_IS_STATE_RELE(op)	((op) == OH_CLOSE || (op) == OH_LOCKU || \
1488 				(op) == OH_DELEGRETURN)
1489 
1490 #ifdef _KERNEL
1491 
1492 extern void	nfs4_async_manager(struct vfs *);
1493 extern void	nfs4_async_manager_stop(struct vfs *);
1494 extern void	nfs4_async_stop(struct vfs *);
1495 extern int	nfs4_async_stop_sig(struct vfs *);
1496 extern int	nfs4_async_readahead(vnode_t *, u_offset_t, caddr_t,
1497 				struct seg *, cred_t *,
1498 				void (*)(vnode_t *, u_offset_t,
1499 				caddr_t, struct seg *, cred_t *));
1500 extern int	nfs4_async_putapage(vnode_t *, page_t *, u_offset_t, size_t,
1501 				int, cred_t *, int (*)(vnode_t *, page_t *,
1502 				u_offset_t, size_t, int, cred_t *));
1503 extern int	nfs4_async_pageio(vnode_t *, page_t *, u_offset_t, size_t,
1504 				int, cred_t *, int (*)(vnode_t *, page_t *,
1505 				u_offset_t, size_t, int, cred_t *));
1506 extern void	nfs4_async_commit(vnode_t *, page_t *, offset3, count3,
1507 				cred_t *, void (*)(vnode_t *, page_t *,
1508 				offset3, count3, cred_t *));
1509 extern void	nfs4_async_inactive(vnode_t *, cred_t *);
1510 extern void	nfs4_inactive_thread(mntinfo4_t *mi);
1511 extern void	nfs4_inactive_otw(vnode_t *, cred_t *);
1512 extern int	nfs4_putpages(vnode_t *, u_offset_t, size_t, int, cred_t *);
1513 
1514 extern int	nfs4_setopts(vnode_t *, model_t, struct nfs_args *);
1515 extern void	nfs4_mnt_kstat_init(struct vfs *);
1516 
1517 extern void	rfs4call(struct mntinfo4 *, struct COMPOUND4args_clnt *,
1518 			struct COMPOUND4res_clnt *, cred_t *, int *, int,
1519 			nfs4_error_t *);
1520 extern void	nfs4_acl_fill_cache(struct rnode4 *, vsecattr_t *);
1521 extern int	nfs4_attr_otw(vnode_t *, nfs4_tag_type_t,
1522 				nfs4_ga_res_t *, bitmap4, cred_t *);
1523 
1524 extern void	nfs4_attrcache_noinval(vnode_t *, nfs4_ga_res_t *, hrtime_t);
1525 extern void	nfs4_attr_cache(vnode_t *, nfs4_ga_res_t *,
1526 				hrtime_t, cred_t *, int,
1527 				change_info4 *);
1528 extern void	nfs4_purge_rddir_cache(vnode_t *);
1529 extern void	nfs4_invalidate_pages(vnode_t *, u_offset_t, cred_t *);
1530 extern void	nfs4_purge_caches(vnode_t *, int, cred_t *, int);
1531 extern void	nfs4_purge_stale_fh(int, vnode_t *, cred_t *);
1532 extern void	nfs4_flush_pages(vnode_t *vp, cred_t *cr);
1533 
1534 extern void	nfs4rename_update(vnode_t *, vnode_t *, nfs_fh4 *, char *);
1535 extern void	nfs4_update_paths(vnode_t *, char *, vnode_t *, char *,
1536 			vnode_t *);
1537 
1538 extern void	nfs4args_lookup_free(nfs_argop4 *, int);
1539 extern void	nfs4args_copen_free(OPEN4cargs *);
1540 
1541 extern void	nfs4_printfhandle(nfs4_fhandle_t *);
1542 
1543 extern void	nfs_free_mi4(mntinfo4_t *);
1544 extern void	sv4_free(servinfo4_t *);
1545 extern void	nfs4_mi_zonelist_add(mntinfo4_t *);
1546 extern int	nfs4_mi_zonelist_remove(mntinfo4_t *);
1547 extern int 	nfs4_secinfo_recov(mntinfo4_t *, vnode_t *, vnode_t *);
1548 extern void	nfs4_secinfo_init(void);
1549 extern void	nfs4_secinfo_fini(void);
1550 extern int	nfs4_secinfo_path(mntinfo4_t *, cred_t *, int);
1551 extern int 	nfs4_secinfo_vnode_otw(vnode_t *, char *, cred_t *);
1552 extern void	secinfo_free(sv_secinfo_t *);
1553 extern void	save_mnt_secinfo(servinfo4_t *);
1554 extern void	check_mnt_secinfo(servinfo4_t *, vnode_t *);
1555 extern int	vattr_to_fattr4(vattr_t *, vsecattr_t *, fattr4 *, int,
1556 				enum nfs_opnum4, bitmap4 supp_mask);
1557 extern int	nfs4_putapage(vnode_t *, page_t *, u_offset_t *, size_t *,
1558 			int, cred_t *);
1559 extern void	nfs4_write_error(vnode_t *, int, cred_t *);
1560 extern void	nfs4_lockcompletion(vnode_t *, int);
1561 extern bool_t	nfs4_map_lost_lock_conflict(vnode_t *);
1562 extern int	vtodv(vnode_t *, vnode_t **, cred_t *, bool_t);
1563 extern int	vtoname(vnode_t *, char *, ssize_t);
1564 extern void	nfs4open_confirm(vnode_t *, seqid4*, stateid4 *, cred_t *,
1565 		    bool_t, bool_t *, nfs4_open_owner_t *, bool_t,
1566 		    nfs4_error_t *, int *);
1567 extern void	nfs4_error_zinit(nfs4_error_t *);
1568 extern void	nfs4_error_init(nfs4_error_t *, int);
1569 extern void	nfs4_free_args(struct nfs_args *);
1570 
1571 extern void 	mi_hold(mntinfo4_t *);
1572 extern void	mi_rele(mntinfo4_t *);
1573 
1574 extern vnode_t	*find_referral_stubvp(vnode_t *, char *, cred_t *);
1575 extern int	 nfs4_setup_referral(vnode_t *, char *, vnode_t **, cred_t *);
1576 
1577 extern sec_data_t	*copy_sec_data(sec_data_t *);
1578 extern gss_clntdata_t	*copy_sec_data_gss(gss_clntdata_t *);
1579 
1580 #ifdef DEBUG
1581 extern int	nfs4_consistent_type(vnode_t *);
1582 #endif
1583 
1584 extern void	nfs4_init_dot_entries(void);
1585 extern void	nfs4_destroy_dot_entries(void);
1586 extern struct nfs4_callback_globals	*nfs4_get_callback_globals(void);
1587 
1588 extern struct nfs4_server nfs4_server_lst;
1589 
1590 extern clock_t nfs_write_error_interval;
1591 
1592 #endif /* _KERNEL */
1593 
1594 /*
1595  * Flags for nfs4getfh_otw.
1596  */
1597 
1598 #define	NFS4_GETFH_PUBLIC	0x01
1599 #define	NFS4_GETFH_NEEDSOP	0x02
1600 
1601 /*
1602  * Found through rnodes.
1603  *
1604  * The os_open_ref_count keeps track the number of open file descriptor
1605  * references on this data structure.  It will be bumped for any successful
1606  * OTW OPEN call and any OPEN call that determines the OTW call is not
1607  * necessary and the open stream hasn't just been created (see
1608  * nfs4_is_otw_open_necessary).
1609  *
1610  * os_mapcnt is a count of the number of mmapped pages for a particular
1611  * open stream; this in conjunction w/ os_open_ref_count is used to
1612  * determine when to do a close to the server.  This is necessary because
1613  * of the semantics of doing open, mmap, close; the OTW close must be wait
1614  * until all open and mmap references have vanished.
1615  *
1616  * 'os_valid' tells us whether this structure is about to be freed or not,
1617  * if it is then don't return it in find_open_stream().
1618  *
1619  * 'os_final_close' is set when a CLOSE OTW was attempted.  This is needed
1620  * so we can properly count the os_open_ref_count in cases where we VOP_CLOSE
1621  * without a VOP_OPEN, and have nfs4_inactive() drive the OTW CLOSE.  It
1622  * also helps differentiate the VOP_OPEN/VN_RELE case from the VOP_CLOSE
1623  * that tried to close OTW but failed, and left the state cleanup to
1624  * nfs4_inactive/CLOSE_FORCE.
1625  *
1626  * 'os_force_close' is used to let us know if an intervening thread came
1627  * and reopened the open stream after we decided to issue a CLOSE_FORCE,
1628  * but before we could actually process the CLOSE_FORCE.
1629  *
1630  * 'os_pending_close' is set when an over-the-wire CLOSE is deferred to the
1631  * lost state queue.
1632  *
1633  * 'open_stateid' is set to the last open stateid returned by the server unless
1634  * 'os_delegation' is 1, in which case 'open_stateid' refers to the
1635  * delegation stateid returned by the server.  This is used in cases where the
1636  * client tries to OPEN a file but already has a suitable delegation, so we
1637  * just stick the delegation stateid in the open stream.
1638  *
1639  * os_dc_openacc are open access bits which have been granted to the
1640  * open stream by virtue of a delegation, but which have not been seen
1641  * by the server.  This applies even if the open stream does not have
1642  * os_delegation set.  These bits are used when setting file locks to
1643  * determine whether an open with CLAIM_DELEGATE_CUR needs to be done
1644  * before the lock request can be sent to the server.  See
1645  * nfs4frlock_check_deleg().
1646  *
1647  * 'os_mmap_read/write' keep track of the read and write access our memory
1648  * maps require.  We need to keep track of this so we can provide the proper
1649  * access bits in the open/mmap/close/reboot/reopen case.
1650  *
1651  * 'os_failed_reopen' tells us that we failed to successfully reopen this
1652  * open stream; therefore, we should not use this open stateid as it is
1653  * not valid anymore. This flag is also used to indicate an unsuccessful
1654  * attempt to reopen a delegation open stream with CLAIM_DELEGATE_CUR.
1655  *
1656  * If 'os_orig_oo_name' is different than os_open_owner's oo_name
1657  * then this tells us that this open stream's open owner used a
1658  * bad seqid (that is, got NFS4ERR_BAD_SEQID).  If different, this open
1659  * stream will no longer be used for future OTW state releasing calls.
1660  *
1661  * Lock ordering:
1662  * rnode4_t::r_os_lock > os_sync_lock
1663  * os_sync_lock > rnode4_t::r_statelock
1664  * os_sync_lock > rnode4_t::r_statev4_lock
1665  * os_sync_lock > mntinfo4_t::mi_lock (via hold over rfs4call)
1666  *
1667  * The 'os_sync_lock' protects:
1668  *	open_stateid
1669  *	os_dc_openacc
1670  *	os_delegation
1671  *	os_failed_reopen
1672  *	os_final_close
1673  *	os_force_close
1674  *	os_mapcnt
1675  *	os_mmap_read
1676  *	os_mmap_write
1677  *	os_open_ref_count
1678  *	os_pending_close
1679  *	os_share_acc_read
1680  *	os_share_acc_write
1681  *	os_share_deny_none
1682  *	os_share_deny_read
1683  *	os_share_deny_write
1684  *	os_ref_count
1685  *	os_valid
1686  *
1687  * The rnode4_t::r_os_lock protects:
1688  *	os_node
1689  *
1690  * These fields are set at creation time and
1691  * read only after that:
1692  *	os_open_owner
1693  *	os_orig_oo_name
1694  */
1695 typedef struct nfs4_open_stream {
1696 	uint64_t		os_share_acc_read;
1697 	uint64_t		os_share_acc_write;
1698 	uint64_t		os_mmap_read;
1699 	uint64_t		os_mmap_write;
1700 	uint32_t		os_share_deny_none;
1701 	uint32_t		os_share_deny_read;
1702 	uint32_t		os_share_deny_write;
1703 	stateid4		open_stateid;
1704 	int			os_dc_openacc;
1705 	int			os_ref_count;
1706 	unsigned		os_valid:1;
1707 	unsigned 		os_delegation:1;
1708 	unsigned		os_final_close:1;
1709 	unsigned 		os_pending_close:1;
1710 	unsigned 		os_failed_reopen:1;
1711 	unsigned		os_force_close:1;
1712 	int			os_open_ref_count;
1713 	long			os_mapcnt;
1714 	list_node_t		os_node;
1715 	struct nfs4_open_owner	*os_open_owner;
1716 	uint64_t		os_orig_oo_name;
1717 	kmutex_t		os_sync_lock;
1718 } nfs4_open_stream_t;
1719 
1720 /*
1721  * This structure describes the format of the lock_owner_name
1722  * field of the lock owner.
1723  */
1724 
1725 typedef struct nfs4_lo_name {
1726 	uint64_t	ln_seq_num;
1727 	pid_t		ln_pid;
1728 } nfs4_lo_name_t;
1729 
1730 /*
1731  * Flags for lo_flags.
1732  */
1733 #define	NFS4_LOCK_SEQID_INUSE	0x1
1734 #define	NFS4_BAD_SEQID_LOCK	0x2
1735 
1736 /*
1737  * The lo_prev_rnode and lo_next_rnode are for a circular list that hangs
1738  * off the rnode.  If the links are NULL it means this object is not on the
1739  * list.
1740  *
1741  * 'lo_pending_rqsts' is non-zero if we ever tried to send a request and
1742  * didn't get a response back.  This is used to figure out if we have
1743  * possible remote v4 locks, so that we can clean up at process exit.  In
1744  * theory, the client should be able to figure out if the server received
1745  * the request (based on what seqid works), so maybe we can get rid of this
1746  * flag someday.
1747  *
1748  * 'lo_ref_count' tells us how many processes/threads are using this data
1749  * structure.  The rnode's list accounts for one reference.
1750  *
1751  * 'lo_just_created' is set to NFS4_JUST_CREATED when we first create the
1752  * data structure.  It is then set to NFS4_PERM_CREATED when a lock request
1753  * is successful using this lock owner structure.  We need to keep 'temporary'
1754  * lock owners around so we can properly keep the lock seqid synchronization
1755  * when multiple processes/threads are trying to create the lock owner for the
1756  * first time (especially with the DENIED error case).  Once
1757  * 'lo_just_created' is set to NFS4_PERM_CREATED, it doesn't change.
1758  *
1759  * 'lo_valid' tells us whether this structure is about to be freed or not,
1760  * if it is then don't return it from find_lock_owner().
1761  *
1762  * Retrieving and setting of 'lock_seqid' is protected by the
1763  * NFS4_LOCK_SEQID_INUSE flag.  Waiters for NFS4_LOCK_SEQID_INUSE should
1764  * use 'lo_cv_seqid_sync'.
1765  *
1766  * The setting of 'lock_stateid' is protected by the
1767  * NFS4_LOCK_SEQID_INUSE flag and 'lo_lock'.  The retrieving of the
1768  * 'lock_stateid' is protected by 'lo_lock', with the additional
1769  * requirement that the calling function can handle NFS4ERR_OLD_STATEID and
1770  * NFS4ERR_BAD_STATEID as appropiate.
1771  *
1772  * The setting of NFS4_BAD_SEQID_LOCK to lo_flags tells us whether this lock
1773  * owner used a bad seqid (that is, got NFS4ERR_BAD_SEQID).  With this set,
1774  * this lock owner will no longer be used for future OTW calls.  Once set,
1775  * it is never unset.
1776  *
1777  * Lock ordering:
1778  * rnode4_t::r_statev4_lock > lo_lock
1779  */
1780 typedef struct nfs4_lock_owner {
1781 	struct nfs4_lock_owner	*lo_next_rnode;
1782 	struct nfs4_lock_owner	*lo_prev_rnode;
1783 	int			lo_pid;
1784 	stateid4		lock_stateid;
1785 	seqid4			lock_seqid;
1786 	/*
1787 	 * Fix this to always be 12 bytes
1788 	 */
1789 	nfs4_lo_name_t		lock_owner_name;
1790 	int			lo_ref_count;
1791 	int			lo_valid;
1792 	int			lo_pending_rqsts;
1793 	int			lo_just_created;
1794 	int			lo_flags;
1795 	kcondvar_t		lo_cv_seqid_sync;
1796 	kmutex_t		lo_lock;
1797 	kthread_t		*lo_seqid_holder; /* debugging aid */
1798 } nfs4_lock_owner_t;
1799 
1800 /* for nfs4_lock_owner_t lookups */
1801 typedef enum {LOWN_ANY, LOWN_VALID_STATEID} lown_which_t;
1802 
1803 /* Number of times to retry a call that fails with state independent error */
1804 #define	NFS4_NUM_RECOV_RETRIES	3
1805 
1806 typedef enum {
1807 	NO_SID,
1808 	DEL_SID,
1809 	LOCK_SID,
1810 	OPEN_SID,
1811 	SPEC_SID
1812 } nfs4_stateid_type_t;
1813 
1814 typedef struct nfs4_stateid_types {
1815 	stateid4 d_sid;
1816 	stateid4 l_sid;
1817 	stateid4 o_sid;
1818 	nfs4_stateid_type_t cur_sid_type;
1819 } nfs4_stateid_types_t;
1820 
1821 /*
1822  * Per-zone data for dealing with callbacks.  Included here solely for the
1823  * benefit of MDB.
1824  */
1825 struct nfs4_callback_stats {
1826 	kstat_named_t	delegations;
1827 	kstat_named_t	cb_getattr;
1828 	kstat_named_t	cb_recall;
1829 	kstat_named_t	cb_null;
1830 	kstat_named_t	cb_dispatch;
1831 	kstat_named_t	delegaccept_r;
1832 	kstat_named_t	delegaccept_rw;
1833 	kstat_named_t	delegreturn;
1834 	kstat_named_t	callbacks;
1835 	kstat_named_t	claim_cur;
1836 	kstat_named_t	claim_cur_ok;
1837 	kstat_named_t	recall_trunc;
1838 	kstat_named_t	recall_failed;
1839 	kstat_named_t	return_limit_write;
1840 	kstat_named_t	return_limit_addmap;
1841 	kstat_named_t	deleg_recover;
1842 	kstat_named_t	cb_illegal;
1843 };
1844 
1845 struct nfs4_callback_globals {
1846 	kmutex_t nfs4_cb_lock;
1847 	kmutex_t nfs4_dlist_lock;
1848 	int nfs4_program_hint;
1849 	/* this table maps the program number to the nfs4_server structure */
1850 	struct nfs4_server **nfs4prog2server;
1851 	list_t nfs4_dlist;
1852 	list_t nfs4_cb_ports;
1853 	struct nfs4_callback_stats nfs4_callback_stats;
1854 #ifdef DEBUG
1855 	int nfs4_dlistadd_c;
1856 	int nfs4_dlistclean_c;
1857 #endif
1858 };
1859 
1860 typedef enum {
1861 	CLOSE_NORM,
1862 	CLOSE_DELMAP,
1863 	CLOSE_FORCE,
1864 	CLOSE_RESEND,
1865 	CLOSE_AFTER_RESEND
1866 } nfs4_close_type_t;
1867 
1868 /*
1869  * Structure to hold the bad seqid information that is passed
1870  * to the recovery framework.
1871  */
1872 typedef struct nfs4_bseqid_entry {
1873 	nfs4_open_owner_t	*bs_oop;
1874 	nfs4_lock_owner_t	*bs_lop;
1875 	vnode_t			*bs_vp;
1876 	pid_t			bs_pid;
1877 	nfs4_tag_type_t		bs_tag;
1878 	seqid4			bs_seqid;
1879 	list_node_t		bs_node;
1880 } nfs4_bseqid_entry_t;
1881 
1882 #ifdef _KERNEL
1883 
1884 extern void	nfs4close_one(vnode_t *, nfs4_open_stream_t *, cred_t *, int,
1885 		    nfs4_lost_rqst_t *, nfs4_error_t *, nfs4_close_type_t,
1886 		    size_t, uint_t, uint_t);
1887 extern void	nfs4close_notw(vnode_t *, nfs4_open_stream_t *, int *);
1888 extern void	nfs4_set_lock_stateid(nfs4_lock_owner_t *, stateid4);
1889 extern void	open_owner_hold(nfs4_open_owner_t *);
1890 extern void	open_owner_rele(nfs4_open_owner_t *);
1891 extern nfs4_open_stream_t	*find_or_create_open_stream(nfs4_open_owner_t *,
1892 					struct rnode4 *, int *);
1893 extern nfs4_open_stream_t *find_open_stream(nfs4_open_owner_t *,
1894 				struct rnode4 *);
1895 extern nfs4_open_stream_t *create_open_stream(nfs4_open_owner_t *oop,
1896 				struct rnode4 *rp);
1897 extern void	open_stream_hold(nfs4_open_stream_t *);
1898 extern void	open_stream_rele(nfs4_open_stream_t *, struct rnode4 *);
1899 extern int	nfs4close_all(vnode_t *, cred_t *);
1900 extern void	lock_owner_hold(nfs4_lock_owner_t *);
1901 extern void	lock_owner_rele(nfs4_lock_owner_t *);
1902 extern nfs4_lock_owner_t *create_lock_owner(struct rnode4 *, pid_t);
1903 extern nfs4_lock_owner_t *find_lock_owner(struct rnode4 *, pid_t, lown_which_t);
1904 extern void	nfs4_rnode_remove_lock_owner(struct rnode4 *,
1905 			nfs4_lock_owner_t *);
1906 extern void	nfs4_flush_lock_owners(struct rnode4 *);
1907 extern void nfs4_setlockowner_args(lock_owner4 *, struct rnode4 *, pid_t);
1908 extern void	nfs4_set_open_seqid(seqid4, nfs4_open_owner_t *,
1909 		    nfs4_tag_type_t);
1910 extern void	nfs4_set_lock_seqid(seqid4, nfs4_lock_owner_t *);
1911 extern void	nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t *,
1912 		    nfs4_tag_type_t);
1913 extern void	nfs4_end_open_seqid_sync(nfs4_open_owner_t *);
1914 extern int	nfs4_start_open_seqid_sync(nfs4_open_owner_t *, mntinfo4_t *);
1915 extern void	nfs4_end_lock_seqid_sync(nfs4_lock_owner_t *);
1916 extern int	nfs4_start_lock_seqid_sync(nfs4_lock_owner_t *, mntinfo4_t *);
1917 extern void	nfs4_setup_lock_args(nfs4_lock_owner_t *, nfs4_open_owner_t *,
1918 			nfs4_open_stream_t *, clientid4, locker4 *);
1919 extern void	nfs4_destroy_open_owner(nfs4_open_owner_t *);
1920 
1921 extern void		nfs4_renew_lease_thread(nfs4_server_t *);
1922 extern nfs4_server_t	*find_nfs4_server(mntinfo4_t *);
1923 extern nfs4_server_t	*find_nfs4_server_all(mntinfo4_t *, int all);
1924 extern nfs4_server_t	*new_nfs4_server(servinfo4_t *,	cred_t *);
1925 extern void		nfs4_mark_srv_dead(nfs4_server_t *);
1926 extern nfs4_server_t	*servinfo4_to_nfs4_server(servinfo4_t *);
1927 extern void		nfs4_inc_state_ref_count(mntinfo4_t *);
1928 extern void		nfs4_inc_state_ref_count_nolock(nfs4_server_t *,
1929 				mntinfo4_t *);
1930 extern void		nfs4_dec_state_ref_count(mntinfo4_t *);
1931 extern void		nfs4_dec_state_ref_count_nolock(nfs4_server_t *,
1932 				mntinfo4_t *);
1933 extern clientid4	mi2clientid(mntinfo4_t *);
1934 extern int		nfs4_server_in_recovery(nfs4_server_t *);
1935 extern bool_t		nfs4_server_vlock(nfs4_server_t *, int);
1936 extern nfs4_open_owner_t *create_open_owner(cred_t *, mntinfo4_t *);
1937 extern uint64_t		nfs4_get_new_oo_name(void);
1938 extern nfs4_open_owner_t *find_open_owner(cred_t *, int, mntinfo4_t *);
1939 extern nfs4_open_owner_t *find_open_owner_nolock(cred_t *, int, mntinfo4_t *);
1940 extern void	nfs4frlock(nfs4_lock_call_type_t, vnode_t *, int, flock64_t *,
1941 			int, u_offset_t, cred_t *, nfs4_error_t *,
1942 			nfs4_lost_rqst_t *, int *);
1943 extern void	nfs4open_dg_save_lost_rqst(int, nfs4_lost_rqst_t *,
1944 		    nfs4_open_owner_t *, nfs4_open_stream_t *, cred_t *,
1945 		    vnode_t *, int, int);
1946 extern void	nfs4_open_downgrade(int, int, nfs4_open_owner_t *,
1947 		    nfs4_open_stream_t *, vnode_t *, cred_t *,
1948 		    nfs4_lost_rqst_t *, nfs4_error_t *, cred_t **, seqid4 *);
1949 extern seqid4	nfs4_get_open_seqid(nfs4_open_owner_t *);
1950 extern cred_t	*nfs4_get_otw_cred(cred_t *, mntinfo4_t *, nfs4_open_owner_t *);
1951 extern void	nfs4_init_stateid_types(nfs4_stateid_types_t *);
1952 extern void	nfs4_save_stateid(stateid4 *, nfs4_stateid_types_t *);
1953 
1954 extern kmutex_t nfs4_server_lst_lock;
1955 
1956 extern void	nfs4callback_destroy(nfs4_server_t *);
1957 extern void	nfs4_callback_init(void);
1958 extern void	nfs4_callback_fini(void);
1959 extern void	nfs4_cb_args(nfs4_server_t *, struct knetconfig *,
1960 			SETCLIENTID4args *);
1961 extern void	nfs4delegreturn_async(struct rnode4 *, int, bool_t);
1962 
1963 extern enum nfs4_delegreturn_policy nfs4_delegreturn_policy;
1964 
1965 extern void	nfs4_add_mi_to_server(nfs4_server_t *, mntinfo4_t *);
1966 extern void	nfs4_remove_mi_from_server(mntinfo4_t *, nfs4_server_t *);
1967 extern nfs4_server_t *nfs4_move_mi(mntinfo4_t *, servinfo4_t *, servinfo4_t *);
1968 extern bool_t	nfs4_fs_active(nfs4_server_t *);
1969 extern void	nfs4_server_rele(nfs4_server_t *);
1970 extern bool_t	inlease(nfs4_server_t *);
1971 extern bool_t	nfs4_has_pages(vnode_t *);
1972 extern void	nfs4_log_badowner(mntinfo4_t *, nfs_opnum4);
1973 
1974 #endif /* _KERNEL */
1975 
1976 /*
1977  * Client State Recovery
1978  */
1979 
1980 /*
1981  * The following defines are used for rs_flags in
1982  * a nfs4_recov_state_t structure.
1983  *
1984  * NFS4_RS_RENAME_HELD		Indicates that the mi_rename_lock was held.
1985  * NFS4_RS_GRACE_MSG		Set once we have uprintf'ed a grace message.
1986  * NFS4_RS_DELAY_MSG		Set once we have uprintf'ed a delay message.
1987  * NFS4_RS_RECALL_HELD1		r_deleg_recall_lock for vp1 was held.
1988  * NFS4_RS_RECALL_HELD2		r_deleg_recall_lock for vp2 was held.
1989  */
1990 #define	NFS4_RS_RENAME_HELD	0x000000001
1991 #define	NFS4_RS_GRACE_MSG	0x000000002
1992 #define	NFS4_RS_DELAY_MSG	0x000000004
1993 #define	NFS4_RS_RECALL_HELD1	0x000000008
1994 #define	NFS4_RS_RECALL_HELD2	0x000000010
1995 
1996 /*
1997  * Information that is retrieved from nfs4_start_op() and that is
1998  * passed into nfs4_end_op().
1999  *
2000  * rs_sp is a reference to the nfs4_server that was found, or NULL.
2001  *
2002  * rs_num_retry_despite_err is the number times client retried an
2003  * OTW op despite a recovery error.  It is only incremented for hints
2004  * exempt to normal R4RECOVERR processing
2005  * (OH_CLOSE/OH_LOCKU/OH_DELEGRETURN).  (XXX this special-case code
2006  * needs review for possible removal.)
2007  * It is initialized wherever nfs4_recov_state_t is declared -- usually
2008  * very near initialization of rs_flags.
2009  */
2010 typedef struct {
2011 	nfs4_server_t	*rs_sp;
2012 	int		rs_flags;
2013 	int		rs_num_retry_despite_err;
2014 } nfs4_recov_state_t;
2015 
2016 /*
2017  * Flags for nfs4_check_remap, nfs4_remap_file and nfs4_remap_root.
2018  */
2019 
2020 #define	NFS4_REMAP_CKATTRS	1
2021 #define	NFS4_REMAP_NEEDSOP	2
2022 
2023 #ifdef _KERNEL
2024 
2025 extern int	nfs4_is_otw_open_necessary(nfs4_open_owner_t *, int,
2026 			vnode_t *, int, int *, int, nfs4_recov_state_t *);
2027 extern void	nfs4setclientid(struct mntinfo4 *, struct cred *, bool_t,
2028 			nfs4_error_t *);
2029 extern void	nfs4_reopen(vnode_t *, nfs4_open_stream_t *, nfs4_error_t *,
2030 			open_claim_type4, bool_t, bool_t);
2031 extern void	nfs4_remap_root(struct mntinfo4 *, nfs4_error_t *, int);
2032 extern void	nfs4_check_remap(mntinfo4_t *mi, vnode_t *vp, int,
2033 			nfs4_error_t *);
2034 extern void	nfs4_remap_file(mntinfo4_t *mi, vnode_t *vp, int,
2035 			nfs4_error_t *);
2036 extern int	nfs4_make_dotdot(struct nfs4_sharedfh *, hrtime_t,
2037 			vnode_t *, cred_t *, vnode_t **, int);
2038 extern void	nfs4_fail_recov(vnode_t *, char *, int, nfsstat4);
2039 
2040 extern int	nfs4_needs_recovery(nfs4_error_t *, bool_t, vfs_t *);
2041 extern int	nfs4_recov_marks_dead(nfsstat4);
2042 extern bool_t	nfs4_start_recovery(nfs4_error_t *, struct mntinfo4 *,
2043 			vnode_t *, vnode_t *, stateid4 *,
2044 			nfs4_lost_rqst_t *, nfs_opnum4, nfs4_bseqid_entry_t *,
2045 			vnode_t *, char *);
2046 extern int	nfs4_start_op(struct mntinfo4 *, vnode_t *, vnode_t *,
2047 			nfs4_recov_state_t *);
2048 extern void	nfs4_end_op(struct mntinfo4 *, vnode_t *, vnode_t *,
2049 			nfs4_recov_state_t *, bool_t);
2050 extern int	nfs4_start_fop(struct mntinfo4 *, vnode_t *, vnode_t *,
2051 			nfs4_op_hint_t, nfs4_recov_state_t *, bool_t *);
2052 extern void	nfs4_end_fop(struct mntinfo4 *, vnode_t *, vnode_t *,
2053 				nfs4_op_hint_t, nfs4_recov_state_t *, bool_t);
2054 extern char	*nfs4_recov_action_to_str(nfs4_recov_t);
2055 
2056 /*
2057  * In sequence, code desiring to unmount an ephemeral tree must
2058  * call nfs4_ephemeral_umount, nfs4_ephemeral_umount_activate,
2059  * and nfs4_ephemeral_umount_unlock. The _unlock must also be
2060  * called on all error paths that occur before it would naturally
2061  * be invoked.
2062  *
2063  * The caller must also provde a pointer to a boolean to keep track
2064  * of whether or not the code in _unlock is to be ran.
2065  */
2066 extern void	nfs4_ephemeral_umount_activate(mntinfo4_t *,
2067     bool_t *, nfs4_ephemeral_tree_t **);
2068 extern int	nfs4_ephemeral_umount(mntinfo4_t *, int, cred_t *,
2069     bool_t *, nfs4_ephemeral_tree_t **);
2070 extern void	nfs4_ephemeral_umount_unlock(bool_t *,
2071     nfs4_ephemeral_tree_t **);
2072 
2073 extern int	nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp);
2074 
2075 extern int	nfs4_callmapid(utf8string *, struct nfs_fsl_info *);
2076 extern int	nfs4_fetch_locations(mntinfo4_t *, struct nfs4_sharedfh *,
2077     char *, cred_t *, nfs4_ga_res_t *, COMPOUND4res_clnt *, bool_t);
2078 
2079 extern int	wait_for_recall(vnode_t *, vnode_t *, nfs4_op_hint_t,
2080 			nfs4_recov_state_t *);
2081 extern void	nfs4_end_op_recall(vnode_t *, vnode_t *, nfs4_recov_state_t *);
2082 extern void	nfs4_send_siglost(pid_t, mntinfo4_t *mi, vnode_t *vp, bool_t,
2083 		    int, nfsstat4);
2084 extern time_t	nfs4err_delay_time;
2085 extern void	nfs4_set_grace_wait(mntinfo4_t *);
2086 extern void	nfs4_set_delay_wait(vnode_t *);
2087 extern int	nfs4_wait_for_grace(mntinfo4_t *, nfs4_recov_state_t *);
2088 extern int	nfs4_wait_for_delay(vnode_t *, nfs4_recov_state_t *);
2089 extern nfs4_bseqid_entry_t *nfs4_create_bseqid_entry(nfs4_open_owner_t *,
2090 		    nfs4_lock_owner_t *, vnode_t *, pid_t, nfs4_tag_type_t,
2091 		    seqid4);
2092 
2093 extern void	nfs4_resend_open_otw(vnode_t **, nfs4_lost_rqst_t *,
2094 			nfs4_error_t *);
2095 extern void	nfs4_resend_delegreturn(nfs4_lost_rqst_t *, nfs4_error_t *,
2096 			nfs4_server_t *);
2097 extern int	nfs4_rpc_retry_error(int);
2098 extern int	nfs4_try_failover(nfs4_error_t *);
2099 extern void	nfs4_free_msg(nfs4_debug_msg_t *);
2100 extern void	nfs4_mnt_recov_kstat_init(vfs_t *);
2101 extern void	nfs4_mi_kstat_inc_delay(mntinfo4_t *);
2102 extern void	nfs4_mi_kstat_inc_no_grace(mntinfo4_t *);
2103 extern char	*nfs4_stat_to_str(nfsstat4);
2104 extern char	*nfs4_op_to_str(nfs_opnum4);
2105 
2106 extern void	nfs4_queue_event(nfs4_event_type_t, mntinfo4_t *, char *,
2107 		    uint_t, vnode_t *, vnode_t *, nfsstat4, char *, pid_t,
2108 		    nfs4_tag_type_t, nfs4_tag_type_t, seqid4, seqid4);
2109 extern void	nfs4_queue_fact(nfs4_fact_type_t, mntinfo4_t *, nfsstat4,
2110 		    nfs4_recov_t, nfs_opnum4, bool_t, char *, int, vnode_t *);
2111 #pragma	rarely_called(nfs4_queue_event)
2112 #pragma	rarely_called(nfs4_queue_fact)
2113 
2114 /* Used for preformed "." and ".." dirents */
2115 extern char	*nfs4_dot_entries;
2116 extern char	*nfs4_dot_dot_entry;
2117 
2118 #ifdef	DEBUG
2119 extern uint_t	nfs4_tsd_key;
2120 #endif
2121 
2122 #endif /* _KERNEL */
2123 
2124 /*
2125  * Filehandle management.
2126  *
2127  * Filehandles can change in v4, so rather than storing the filehandle
2128  * directly in the rnode, etc., we manage the filehandle through one of
2129  * these objects.
2130  * Locking: sfh_fh and sfh_tree is protected by the filesystem's
2131  * mi_fh_lock.  The reference count and flags are protected by sfh_lock.
2132  * sfh_mi is read-only.
2133  *
2134  * mntinfo4_t::mi_fh_lock > sfh_lock.
2135  */
2136 
2137 typedef struct nfs4_sharedfh {
2138 	nfs_fh4 sfh_fh;			/* key and current filehandle */
2139 	kmutex_t sfh_lock;
2140 	uint_t sfh_refcnt;		/* reference count */
2141 	uint_t sfh_flags;
2142 	mntinfo4_t *sfh_mi;		/* backptr to filesystem */
2143 	avl_node_t sfh_tree;		/* used by avl package */
2144 } nfs4_sharedfh_t;
2145 
2146 #define	SFH4_SAME(sfh1, sfh2)	((sfh1) == (sfh2))
2147 
2148 /*
2149  * Flags.
2150  */
2151 #define	SFH4_IN_TREE	0x1		/* currently in an AVL tree */
2152 
2153 #ifdef _KERNEL
2154 
2155 extern void sfh4_createtab(avl_tree_t *);
2156 extern nfs4_sharedfh_t *sfh4_get(const nfs_fh4 *, mntinfo4_t *);
2157 extern nfs4_sharedfh_t *sfh4_put(const nfs_fh4 *, mntinfo4_t *,
2158 				nfs4_sharedfh_t *);
2159 extern void sfh4_update(nfs4_sharedfh_t *, const nfs_fh4 *);
2160 extern void sfh4_copyval(const nfs4_sharedfh_t *, nfs4_fhandle_t *);
2161 extern void sfh4_hold(nfs4_sharedfh_t *);
2162 extern void sfh4_rele(nfs4_sharedfh_t **);
2163 extern void sfh4_printfhandle(const nfs4_sharedfh_t *);
2164 
2165 #endif
2166 
2167 /*
2168  * Path and file name management.
2169  *
2170  * This type stores the name of an entry in the filesystem and keeps enough
2171  * information that it can provide a complete path.  All fields are
2172  * protected by fn_lock, except for the reference count, which is managed
2173  * using atomic add/subtract.
2174  *
2175  * Additionally shared filehandle for this fname is stored.
2176  * Normally, fn_get() when it creates this fname stores the passed in
2177  * shared fh in fn_sfh by doing sfh_hold. Similarly the path which
2178  * destroys this fname releases the reference on this fh by doing sfh_rele.
2179  *
2180  * fn_get uses the fn_sfh to refine the comparision in cases
2181  * where we have matched the name but have differing file handles,
2182  * this normally happens due to
2183  *
2184  *	1. Server side rename of a file/directory.
2185  *	2. Another client renaming a file/directory on the server.
2186  *
2187  * Differing names but same filehandle is possible as in the case of hardlinks,
2188  * but differing filehandles with same name component will later confuse
2189  * the client and can cause various panics.
2190  *
2191  * Lock order: child and then parent.
2192  */
2193 
2194 typedef struct nfs4_fname {
2195 	struct nfs4_fname *fn_parent;	/* parent name; null if fs root */
2196 	char *fn_name;			/* the actual name */
2197 	ssize_t fn_len;			/* strlen(fn_name) */
2198 	uint32_t fn_refcnt;		/* reference count */
2199 	kmutex_t fn_lock;
2200 	avl_node_t fn_tree;
2201 	avl_tree_t fn_children;		/* children, if any */
2202 	nfs4_sharedfh_t *fn_sfh;	/* The fh for this fname */
2203 } nfs4_fname_t;
2204 
2205 #ifdef _KERNEL
2206 
2207 extern vnode_t	nfs4_xattr_notsupp_vnode;
2208 #define	NFS4_XATTR_DIR_NOTSUPP	&nfs4_xattr_notsupp_vnode
2209 
2210 extern nfs4_fname_t *fn_get(nfs4_fname_t *, char *, nfs4_sharedfh_t *);
2211 extern void fn_hold(nfs4_fname_t *);
2212 extern void fn_rele(nfs4_fname_t **);
2213 extern char *fn_name(nfs4_fname_t *);
2214 extern char *fn_path(nfs4_fname_t *);
2215 extern void fn_move(nfs4_fname_t *, nfs4_fname_t *, char *);
2216 extern nfs4_fname_t *fn_parent(nfs4_fname_t *);
2217 
2218 /* Referral Support */
2219 extern int nfs4_process_referral(mntinfo4_t *, nfs4_sharedfh_t *, char *,
2220     cred_t *, nfs4_ga_res_t *, COMPOUND4res_clnt *, struct nfs_fsl_info *);
2221 
2222 #endif
2223 
2224 /*
2225  * Per-zone data for managing client handles, included in this file for the
2226  * benefit of MDB.
2227  */
2228 struct nfs4_clnt {
2229 	struct chhead	*nfscl_chtable4;
2230 	kmutex_t	nfscl_chtable4_lock;
2231 	zoneid_t	nfscl_zoneid;
2232 	list_node_t	nfscl_node;
2233 	struct clstat4	nfscl_stat;
2234 };
2235 
2236 #ifdef	__cplusplus
2237 }
2238 #endif
2239 
2240 #endif /* _NFS4_CLNT_H */
2241