xref: /illumos-gate/usr/src/uts/common/nfs/nfs4.h (revision a57549b4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright 2018 Nexenta Systems, Inc.
29  * Copyright 2019 Nexenta by DDN, Inc.
30  */
31 
32 #ifndef _NFS4_H
33 #define	_NFS4_H
34 
35 #include <sys/types.h>
36 #include <sys/vnode.h>
37 #include <sys/fem.h>
38 #include <rpc/rpc.h>
39 #include <nfs/nfs.h>
40 
41 #ifdef _KERNEL
42 #include <nfs/nfs4_kprot.h>
43 #include <sys/nvpair.h>
44 #else
45 #include <rpcsvc/nfs4_prot.h>
46 #endif
47 #include <nfs/nfs4_attr.h>
48 #include <sys/acl.h>
49 #include <sys/list.h>
50 #include <nfs/nfs4x.h>
51 
52 #ifdef	__cplusplus
53 extern "C" {
54 #endif
55 
56 #define	NFS4_MAX_SECOID4	65536
57 #define	NFS4_MAX_UTF8STRING	65536
58 #define	NFS4_MAX_LINKTEXT4	65536
59 #define	NFS4_MAX_PATHNAME4	65536
60 
61 struct nfs_fsl_info {
62 	uint_t netbuf_len;
63 	uint_t netnm_len;
64 	uint_t knconf_len;
65 	char *netname;
66 	struct netbuf *addr;
67 	struct knetconfig *knconf;
68 };
69 
70 #ifdef _KERNEL
71 
72 typedef struct nfs4_fhandle {
73 	int fh_len;
74 	char fh_buf[NFS4_FHSIZE];
75 } nfs4_fhandle_t;
76 
77 #define	NFS4_MINORVERSION 0
78 #define	CB4_MINORVERSION 0
79 
80 #define	FIRST_NFS4_OP   OP_ACCESS
81 #define	LAST_NFS40_OP   OP_RELEASE_LOCKOWNER
82 #define	LAST_NFS41_OP   OP_RECLAIM_COMPLETE
83 #define	LAST_NFS42_OP   OP_RECLAIM_COMPLETE
84 #define	LAST_NFS4_OP    LAST_NFS42_OP
85 
86 /*
87  * Set the fattr4_change variable using a time struct. Note that change
88  * is 64 bits, but timestruc_t is 128 bits in a 64-bit kernel.
89  */
90 #define	NFS4_SET_FATTR4_CHANGE(change, ts)			\
91 {							\
92 	change = (ts).tv_sec;				\
93 	change <<= 32;					\
94 	change |= (uint32_t)((ts).tv_nsec);		\
95 }
96 
97 /*
98  * Server lease period.  Value is in seconds;  Also used for grace period
99  */
100 extern time_t rfs4_lease_time;
101 
102 /*
103  * This set of typedefs and interfaces represent the core or base set
104  * of functionality that backs the NFSv4 server's state related data
105  * structures.  Since the NFSv4 server needs inter-RPC state to be
106  * available that is unrelated to the filesystem (in other words,
107  * soft-state), this functionality is needed to maintain that and is
108  * written to be somewhat flexible to adapt to the various types of
109  * data structures contained within the server.
110  *
111  * The basic structure at this level is that the server maintains a
112  * global "database" which consists of a set of tables.  Each table
113  * contains a set of like data structures.  Each table is indexed by
114  * at least one hash function and in most cases two hashes.  Each
115  * table's characteristics is set when it is created at run-time via
116  * rfs4_table_create().  All table creation and related functions are
117  * located in nfs4_state.c.  The generic database functionality is
118  * located in nfs4_db.c.
119  */
120 
121 typedef struct rfs4_dbe rfs4_dbe_t;		/* basic opaque db entry */
122 typedef struct rfs4_table rfs4_table_t;		/* basic table type */
123 typedef struct rfs4_index rfs4_index_t;		/* index */
124 typedef struct rfs4_database rfs4_database_t;	/* and database */
125 
126 typedef struct {		/* opaque entry type for later use */
127 	rfs4_dbe_t *dbe;
128 } *rfs4_entry_t;
129 
130 /*
131  * NFSv4 server state databases
132  *
133  * Initialized when the module is loaded and used by NFSv4 state tables.
134  * These kmem_cache free pools are used globally, the NFSv4 state tables
135  * which make use of these kmem_cache free pools are per zone.
136  */
137 extern kmem_cache_t *rfs4_client_mem_cache;
138 extern kmem_cache_t *rfs4_clntIP_mem_cache;
139 extern kmem_cache_t *rfs4_openown_mem_cache;
140 extern kmem_cache_t *rfs4_openstID_mem_cache;
141 extern kmem_cache_t *rfs4_lockstID_mem_cache;
142 extern kmem_cache_t *rfs4_lockown_mem_cache;
143 extern kmem_cache_t *rfs4_file_mem_cache;
144 extern kmem_cache_t *rfs4_delegstID_mem_cache;
145 extern kmem_cache_t *rfs4_session_mem_cache;
146 
147 /* database, table, index creation entry points */
148 extern rfs4_database_t *rfs4_database_create(uint32_t);
149 extern void		rfs4_database_shutdown(rfs4_database_t *);
150 extern void		rfs4_database_destroy(rfs4_database_t *);
151 
152 extern void		rfs4_database_destroy(rfs4_database_t *);
153 
154 extern kmem_cache_t	*nfs4_init_mem_cache(char *, uint32_t, uint32_t,
155 				uint32_t);
156 extern rfs4_table_t	*rfs4_table_create(rfs4_database_t *, char *,
157 				time_t, uint32_t,
158 				bool_t (*create)(rfs4_entry_t, void *),
159 				void (*destroy)(rfs4_entry_t),
160 				bool_t (*expiry)(rfs4_entry_t),
161 				uint32_t, uint32_t, uint32_t, id_t);
162 extern void		rfs4_table_destroy(rfs4_database_t *, rfs4_table_t *);
163 extern rfs4_index_t	*rfs4_index_create(rfs4_table_t *, char *,
164 				uint32_t (*hash)(void *),
165 				bool_t (compare)(rfs4_entry_t, void *),
166 				void *(*mkkey)(rfs4_entry_t), bool_t);
167 extern void		rfs4_index_destroy(rfs4_index_t *);
168 
169 /* Type used to direct rfs4_dbsearch() in what types of records to inspect */
170 typedef enum {RFS4_DBS_VALID, RFS4_DBS_INVALID} rfs4_dbsearch_type_t;
171 /* search and db entry manipulation entry points */
172 extern rfs4_entry_t	rfs4_dbsearch(rfs4_index_t *, void *,
173 				bool_t *, void *, rfs4_dbsearch_type_t);
174 extern void		rfs4_dbe_lock(rfs4_dbe_t *);
175 extern void		rfs4_dbe_unlock(rfs4_dbe_t *);
176 extern clock_t		rfs4_dbe_twait(rfs4_dbe_t *, clock_t);
177 extern void		rfs4_dbe_cv_broadcast(rfs4_dbe_t *);
178 extern void		rfs4_dbe_hold(rfs4_dbe_t *);
179 extern void		rfs4_dbe_hold_nolock(rfs4_dbe_t *);
180 extern void		rfs4_dbe_rele_nolock(rfs4_dbe_t *);
181 extern void		rfs4_dbe_rele(rfs4_dbe_t *);
182 extern uint32_t	rfs4_dbe_refcnt(rfs4_dbe_t *);
183 extern id_t		rfs4_dbe_getid(rfs4_dbe_t *);
184 extern void		rfs4_dbe_invalidate(rfs4_dbe_t *);
185 extern bool_t		rfs4_dbe_is_invalid(rfs4_dbe_t *);
186 extern time_t		rfs4_dbe_get_timerele(rfs4_dbe_t *);
187 extern void		rfs4_dbe_hide(rfs4_dbe_t *);
188 extern void		rfs4_dbe_unhide(rfs4_dbe_t *);
189 #ifdef DEBUG
190 extern bool_t		rfs4_dbe_islocked(rfs4_dbe_t *);
191 #endif
192 extern void		rfs4_dbe_walk(rfs4_table_t *,
193 			void (*callout)(rfs4_entry_t, void *), void *);
194 
195 /*
196  * Minimal server stable storage.
197  *
198  * Currently the NFSv4 server will only save the client
199  * ID (the long version) so that it will be able to
200  * grant possible reclaim requests during the infamous
201  * grace_period.
202  */
203 
204 #define	RFS4_SS_DIRSIZE	64 * 1024
205 #define	NFS4_SS_VERSION 1
206 
207 /* handy pathname structure */
208 typedef struct ss_pn {
209 	char *leaf;
210 	char pn[MAXPATHLEN];
211 } rfs4_ss_pn_t;
212 
213 /*
214  * The server will build this link list on startup. It represents the
215  * clients that have had valid state on the server in a prior instance.
216  *
217  */
218 typedef struct rfs4_oldstate {
219 	struct rfs4_oldstate	*next;
220 	struct rfs4_oldstate	*prev;
221 	rfs4_ss_pn_t		*ss_pn;
222 	nfs_client_id4		cl_id4;
223 } rfs4_oldstate_t;
224 
225 /*
226  * This union is used to overlay the server's internal treatment of
227  * the protocols stateid4 datatype.  Therefore, "bits" must not exceed
228  * the size of stateid4 and more importantly should match the size of
229  * stateid4.  The chgseq field must the first entry since it overlays
230  * stateid4.seqid.
231  */
232 typedef union {
233 	stateid4 stateid;
234 	struct {
235 		uint32_t chgseq;	/* State changes / protocol's seqid */
236 		uint32_t boottime;	/* boot time  */
237 		uint32_t type:2;	/* stateid_type_t as define below */
238 		uint32_t clnodeid:8;	/* cluster server nodeid */
239 		uint32_t ident:22;	/* 2^22-1 openowner x fhs */
240 		pid_t	 pid;		/* pid of corresponding lock owner */
241 	} bits;
242 } stateid_t;
243 /*
244  * Note that the way the type field above is defined, this enum must
245  * not have more than 4 members.
246  */
247 typedef enum {OPENID, LOCKID, DELEGID} stateid_type_t;
248 
249 /*
250  * "wait" struct for use in the open open and lock owner state
251  * structures to provide serialization between server threads that are
252  * handling requests for the same open owner or lock stateid.  This
253  * way only one thread will be updating things like sequence ids,
254  * replay cache and stateid at a time.
255  */
256 typedef struct rfs4_state_wait {
257 	uint32_t		sw_active;
258 	uint32_t		sw_wait_count;
259 	kmutex_t		sw_cv_lock[1];
260 	kcondvar_t		sw_cv[1];
261 } rfs4_state_wait_t;
262 
263 extern void	rfs4_sw_enter(rfs4_state_wait_t *);
264 extern void	rfs4_sw_exit(rfs4_state_wait_t *);
265 
266 /*
267  * This enum and the following rfs4_cbinfo_t struct are used to
268  * maintain information about the callback path used from the server
269  * to client for operations like CB_GETATTR and CB_RECALL.  The
270  * rfs4_cbinfo_t struct is meant to be encompassed in the client
271  * struct and managed within that structure's locking scheme.
272  *
273  * The various states of the callback path are used by the server to
274  * determine if delegations should initially be provided to a client
275  * and then later on if connectivity has been lost and delegations
276  * should be revoked.
277  */
278 
279 /*
280  * CB_NOCHANGE - Special value used for interfaces within the delegation
281  *		code to signify that "no change" has occurred to the
282  *		callback path
283  * CB_UNINIT	- No callback info provided by the client
284  * CB_NONE	- Callback info provided but CB_NULL call
285  *		  has yet to be attempted
286  * CB_OK	- Callback path tested with CB_NULL with success
287  * CB_INPROG	- Callback path currently being tested with CB_NULL
288  * CB_FAILED	- Callback path was == CB_OK but has failed
289  *		  with timeout/rpc error
290  * CB_BAD	- Callback info provided but CB_NULL failed
291  */
292 typedef enum {
293 	CB_NOCHANGE = 0,
294 	CB_UNINIT = 1,
295 	CB_NONE = 2,
296 	CB_OK = 3,
297 	CB_INPROG = 4,
298 	CB_FAILED = 5,
299 	CB_BAD = 6
300 } rfs4_cbstate_t;
301 
302 #define	RFS4_CBCH_MAX	10	/* size callback client handle cache */
303 /*
304  * Callback info for a client.
305  * Client only provides: cb_client4 and cb_ident
306  * The rest of the information is used to track callback path status
307  * and usage.
308  *
309  * cb_state - used as comments for the rfs4_cbstate_t enum indicate
310  * cb_notified_of_cb_path_down - if the callback path was once CB_OK and
311  *	has hence CB_FAILED, the client needs to be notified via RENEW.
312  * cb_timefailed - current time when cb_state transitioned from
313  *	CB_OK -> CB_FAILED.  Meant for observability.  When did that happen?
314  * cb_chc_free/cb_chc - cache of client handles for the callback path
315  * cb_ident - SETCLIENTID provided callback_ident value
316  * callback - SETCLIENTID provided cb_client4 value
317  * cb_refcnt - current number of users of this structure's content
318  *	protected by cb_lock
319  * cb_badbehavior - how many times did a client do something we didn't like?
320  * cb_lock - lock for contents of cbinfo
321  * cb_cv - used to allow threads to wait on CB_NULL completion
322  * cb_nullcaller - is there a thread currently taking care of
323  *	new callback information?
324  * cb_cv_nullcaller - used by the thread doing CB_NULL to wait on
325  *	threads that may be using client handles of the current
326  *	client handle cache.
327  * newer - new callback info provided by a client and awaiting
328  *	CB_NULL testing and move to regular cbinfo.
329  */
330 typedef struct {
331 	rfs4_cbstate_t	cb_state;
332 	unsigned	cb_notified_of_cb_path_down:1;
333 	time_t		cb_timefailed;
334 	int		cb_chc_free;
335 	CLIENT		*cb_chc[RFS4_CBCH_MAX];
336 	uint32_t	cb_ident;
337 	cb_client4	cb_callback;
338 	uint32_t	cb_refcnt;
339 	uint32_t	cb_badbehavior;
340 	kmutex_t	cb_lock[1];
341 	kcondvar_t	cb_cv[1];
342 	bool_t		cb_nullcaller;
343 	kcondvar_t	cb_cv_nullcaller[1];
344 	struct {
345 		bool_t		cb_new;
346 		bool_t		cb_confirmed;
347 		uint32_t	cb_ident;
348 		cb_client4	cb_callback;
349 	} cb_newer;
350 } rfs4_cbinfo_t;
351 
352 /*
353  * A server instance. We can associate sets of clients - via a pointer in
354  * rfs4_client_t - with a given server instance, allowing us to treat clients
355  * in the set differently to clients in other sets.
356  *
357  * Currently used only for Sun Cluster HA-NFS support, to group clients
358  * on NFS resource failover so each set of clients gets its own dedicated
359  * grace period and distributed stable storage data.
360  */
361 typedef struct rfs4_servinst {
362 	int			dss_npaths;
363 	krwlock_t		rwlock;
364 	krwlock_t		oldstate_lock;
365 	time_t			start_time;
366 	time_t			grace_period;
367 	uint_t			nreclaim;	/* number reclaim clients  */
368 	rfs4_oldstate_t		*oldstate;
369 	struct rfs4_dss_path	**dss_paths;
370 	struct rfs4_servinst	*next;
371 	struct rfs4_servinst	*prev;
372 } rfs4_servinst_t;
373 
374 /*
375  * DSS: distributed stable storage
376  */
377 
378 typedef struct rfs4_dss_path {
379 	struct rfs4_dss_path	*next; /* for insque/remque */
380 	struct rfs4_dss_path	*prev; /* for insque/remque */
381 	char			*path;
382 	struct rfs4_servinst	*sip;
383 	unsigned		index; /* offset in servinst's array */
384 } rfs4_dss_path_t;
385 
386 /* array of paths passed-in from nfsd command-line; stored in nvlist */
387 extern char		**rfs4_dss_newpaths;
388 extern uint_t		rfs4_dss_numnewpaths;
389 
390 /* nvlists of all DSS paths: current, and before last warmstart */
391 extern nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
392 
393 /*
394  * The server maintains a set of state on a per client basis that
395  * matches that of the protocol requirements.  A client's state is
396  * rooted with the rfs4_client_t struct of which there is one per
397  * client and is created when SETCLIENTID/SETCLIENTID_CONFIRM are
398  * received.  From there, the server then creates rfs4_openowner_t
399  * structs for each new open owner from that client and are initiated
400  * at OPEN/OPEN_CONFIRM (when the open owner is new to the server).
401  * At OPEN, at least two other structures are created, and potentially a
402  * third.  rfs4_state_t is created to track the association between an
403  * open owner and a particular file. An rfs4_file_t struct may be
404  * created (if the file is not already open) at OPEN as well.  The
405  * rfs4_file_t struct is the only one that is per server and not per
406  * client.  The rfs4_deleg_state_t struct is created in the
407  * instance that the server is going to provide a delegation for the
408  * file being OPENed.  Finally, the rfs4_lockowner_t is created at the
409  * first use of a lock owner at the server and is a result of the LOCK
410  * operation.  The rfs4_lo_state_t struct is then created to represent
411  * the relation between the lock owner and the file.
412  *
413  */
414 /*
415  * The following ascii art represents each of these data structs and
416  * their references to each other.  Note: "<-(x)->" represents the
417  * doubly link lists (list_t).
418  *
419  *                          ____________________
420  *                         |                    |
421  *                         |    rfs4_client_t   |
422  *                       ->|         (1)        |<-
423  *                      /  |____________________|  \
424  *                     /              ^             \
425  *                    /               |              \
426  *  ____________________    ____________________    ____________________
427  * |                    |  |                    |  |                    |
428  * |  rfs4_lockowner_t  |  |  rfs4_openowner_t  |  | rfs4_deleg_state_t |
429  * |                    |  |     (3)    <-(1)-> |  |            <-(2)-> |
430  * |____________________|  |____________________|  |____________________|
431  *           ^                        ^                       |
432  *           |                        |                       V
433  *  ____________________    ____________________    ____________________
434  * |                    |  |                    |  |                    |
435  * |  rfs4_lo_state_t   |->|    rfs4_state_t    |->|     rfs4_file_t    |
436  * |            <-(4)-> |  |     (4)    <-(3)-> |  |        (2)         |
437  * |____________________|  |____________________|  |____________________|
438  */
439 /*
440  * Each of these data types are kept in a separate rfs4_table_t and is
441  * actually encapsulated within a rfs4_dbe_t struct.  The various
442  * tables and their construction is done in nfs4_state.c but
443  * documented here to completeness.
444  *
445  * Table		Data struct stored	Indexed by
446  * -----		------------------	----------
447  * rfs4_client_tab	rfs4_client_t		nfs_client_id4
448  *						clientid4
449  *
450  * rfs4_openowner_tab	rfs4_openowner_t	open_owner4
451  *
452  * rfs4_state_tab	rfs4_state_t		open_owner4 | file
453  *						stateid
454  *
455  * rfs4_lo_state_tab	rfs4_lo_state_t		lockowner | stateid
456  *						lock_stateid
457  *
458  * rfs4_lockowner_tab	rfs4_lockowner_t	lockowner
459  *						pid
460  *
461  * rfs4_file_tab	rfs4_file_t		filehandle
462  *
463  * rfs4_deleg_state_tab	rfs4_deleg_state_t	clientid4 | file
464  *						deleg_stateid
465  */
466 
467 /*
468  * The client struct, it is the root of all state for a particular
469  * client.  The client is identified by the nfs_client_id4 via
470  * SETCLIENTID and the server returns the clientid4 as short hand reference
471  */
472 /*
473  * Client struct - as mentioned above it is the root of all state for
474  * a single client as identified by the client supplied nfs_client_id4
475  *
476  * dbe - encapsulation struct
477  * clientid - server assigned short hand reference to client
478  * nfs_client - client supplied identifier for itself
479  * confirm_verf - the value provided to the client for SETCLIENTID_CONFIRM
480  * need_confirm - does this client need to be SETCLIENTID_CONFIRMed?
481  *
482  * unlksys_completed - has an F_UNLKSYS been done for this client which
483  *		says that the use of cleanlocks() on individual files
484  *		is not required?
485  * can_reclaim - indicates if client is allowed to reclaim after server
486  *		start-up (client had previous state at server)
487  * ss_remove - indicates that the rfs4_client_destroy function should
488  *		clean up stable storage file.
489  * forced_expire - set if the sysadmin has used clear_locks for this client.
490  * no_referrals - set if the client is Solaris and pre-dates referrals
491  * deleg_revoked - how many delegations have been revoked for this client?
492  *
493  * cp_confirmed - this refers to a confirmed client struct that has
494  * the same nfs_client_id4 as this client struct.  When/if this client
495  * struct is confirmed via SETCLINETID_CONFIRM, the previously
496  * confirmed client struct will be "closed" and hence this reference.
497  *
498  * last_access - used to determine if the client has let its lease expire
499  * cbinfo - struct containing all callback related information
500  * cr_set - credentials used for the SETCLIENTID/SETCLIENTID_CONFIRM pair
501  * sysid - the lock manager sysid allocated for this client's file locks
502  * openownerlist - root of openowners list associated with this client
503  * ss_pn - Pathname to the stable storage file.
504  * cl_addr - Clients network address.
505  * server_instance - pointer to the currently associated server instance
506  */
507 typedef struct rfs4_client {
508 	rfs4_dbe_t		*rc_dbe;
509 	clientid4		rc_clientid;
510 	nfs_client_id4		rc_nfs_client;
511 	verifier4		rc_confirm_verf;
512 	unsigned		rc_need_confirm:1;
513 	unsigned		rc_unlksys_completed:1;
514 	unsigned		rc_can_reclaim:1;
515 	unsigned		rc_ss_remove:1;
516 	unsigned		rc_forced_expire:1;
517 	unsigned		rc_reclaim_completed:1;
518 	uint_t			rc_deleg_revoked;
519 	struct rfs4_client	*rc_cp_confirmed;
520 	time_t			rc_last_access;
521 	rfs4_cbinfo_t		rc_cbinfo;
522 	cred_set_t		rc_cr_set;
523 	sysid_t			rc_sysidt;
524 	list_t			rc_openownerlist;
525 	rfs4_ss_pn_t		*rc_ss_pn;
526 	struct sockaddr_storage rc_addr;
527 	rfs4_servinst_t		*rc_server_instance;
528 
529 	/* nfsv4.1 */
530 	rfs41_csr_t		rc_contrived;
531 	rfs41_sprot_t		rc_state_prot;
532 	list_t			rc_sessions;
533 	unsigned		rc_destroying:1;    /* flag: going to destroy */
534 } rfs4_client_t;
535 
536 /*
537  * ClntIP struct - holds the diagnosis about whether the client
538  * cannot support referrals.  Set to true for old Solaris clients.
539  */
540 
541 typedef struct rfs4_clntip {
542 	rfs4_dbe_t		*ri_dbe;
543 	struct sockaddr_storage ri_addr;
544 	unsigned		ri_no_referrals:1;
545 } rfs4_clntip_t;
546 
547 /*
548  * The openowner contains the client supplied open_owner4 as well as
549  * the matching sequence id and is used to track the client's usage of
550  * the open_owner4.  Note that a reply is saved here as well for
551  * processing of retransmissions.
552  *
553  * dbe - encapsulation struct
554  * client - reference to rfs4_client_t for this openowner
555  * owner - actual client supplied open_owner4
556  * need_confirm - does this openowner need to be OPEN_CONFIRMed
557  * postpone_confirm - set if error received on first use of open_owner
558  * state2confirm - what stateid4 should be used on the OPEN_CONFIRM
559  * open_seqid - what is the next open_seqid expected for this openowner
560  * oo_sw - used to serialize access to the open seqid/reply handling
561  * statelist - root of state struct list associated with this openowner
562  * node - node for client struct list of openowners
563  * reply_fh - open replay processing needs the filehandle so that it is
564  *	able to reset the current filehandle for appropriate compound
565  *	processing and reply.
566  * reply - last reply sent in relation to this openowner
567  */
568 typedef struct rfs4_openowner {
569 	rfs4_dbe_t		*ro_dbe;
570 	rfs4_client_t		*ro_client;
571 	open_owner4		ro_owner;
572 	unsigned		ro_need_confirm:1;
573 	unsigned		ro_postpone_confirm:1;
574 	seqid4			ro_open_seqid;
575 	rfs4_state_wait_t	ro_sw;
576 	list_t			ro_statelist;
577 	list_node_t		ro_node;
578 	nfs_fh4			ro_reply_fh;
579 	nfs_resop4		ro_reply;
580 } rfs4_openowner_t;
581 
582 /*
583  * This state struct represents the association between an openowner
584  * and a file that has been OPENed by that openowner.
585  *
586  * dbe - encapsulation struct
587  * stateid - server provided stateid
588  * owner - reference back to the openowner for this state
589  * finfo - reference to the open file for this state
590  * open_access - how did the openowner OPEN the file (access)
591  * open_deny - how did the openowner OPEN the file (deny)
592  * share_access - what share reservation is on the file (access)
593  * share_deny - what share reservation is on the file (deny)
594  * closed - has this file been closed?
595  * lostatelist - root of list of lo_state associated with this state/file
596  * node - node for state struct list of states
597  */
598 typedef struct rfs4_state {
599 	rfs4_dbe_t		*rs_dbe;
600 	stateid_t		rs_stateid;
601 	rfs4_openowner_t	*rs_owner;
602 	struct rfs4_file	*rs_finfo;
603 	uint32_t		rs_open_access;
604 	uint32_t		rs_open_deny;
605 	uint32_t		rs_share_access;
606 	uint32_t		rs_share_deny;
607 	unsigned		rs_closed:1;
608 	list_t			rs_lostatelist;
609 	list_node_t		rs_node;
610 } rfs4_state_t;
611 
612 /*
613  * Lockowner - track the lockowner and its related info
614  *
615  * dbe - encapsulation struct
616  * client - reference to the client
617  * owner - lockowner supplied by the client
618  * pid - local identifier used for file locking
619  */
620 typedef struct rfs4_lockowner {
621 	rfs4_dbe_t		*rl_dbe;
622 	rfs4_client_t		*rl_client;
623 	lock_owner4		rl_owner;
624 	pid_t			rl_pid;
625 } rfs4_lockowner_t;
626 
627 /*
628  * Lockowner_state associated with a state struct and lockowner
629  *
630  * dbe - encapsulation struct
631  * state - reference back to state struct for open file
632  * lockid - stateid for this lockowner/state
633  * locker - reference to lockowner
634  * seqid - sequence id for this lockowner/state
635  * skip_seqid_check - used on initialization of struct
636  * locks_cleaned - have all locks been released for this lockowner/file?
637  * lock_completed - successful LOCK with lockowner/file?
638  * ls_sw - used to serialize update seqid/reply/stateid handling
639  * node - node for state struct list of lo_states
640  * reply - last reply sent in relation to this lockowner/state
641  */
642 typedef struct rfs4_lo_state {
643 	rfs4_dbe_t		*rls_dbe;
644 	rfs4_state_t		*rls_state;
645 	stateid_t		rls_lockid;
646 	rfs4_lockowner_t	*rls_locker;
647 	seqid4			rls_seqid;
648 	unsigned		rls_skip_seqid_check:1;
649 	unsigned		rls_locks_cleaned:1;
650 	unsigned		rls_lock_completed:1;
651 	rfs4_state_wait_t	rls_sw;
652 	list_node_t		rls_node;
653 	nfs_resop4		rls_reply;
654 } rfs4_lo_state_t;
655 
656 /*
657  * Delegation state - per client
658  *
659  * dbe - encapsulation struct
660  * dtype - type of delegation (NONE, READ, WRITE)
661  * delegid - stateid for this delegation
662  * time_granted - time this delegation was assigned to client
663  * time_recalled - time when the server started recall process
664  * time_revoked - if revoked, time that the revoke occurred
665  * finfo - reference to the file associated with this delegation
666  * client - reference to client for which this delegation is associated
667  * node - list of delegations for the file (WRITE == 1, READ == )
668  */
669 typedef struct rfs4_deleg_state {
670 	rfs4_dbe_t		*rds_dbe;
671 	open_delegation_type4	rds_dtype;
672 	stateid_t		rds_delegid;
673 	time_t			rds_time_granted;
674 	time_t			rds_time_recalled;
675 	time_t			rds_time_revoked;
676 	struct rfs4_file	*rds_finfo;
677 	rfs4_client_t		*rds_client;
678 	list_node_t		rds_node;
679 } rfs4_deleg_state_t;
680 
681 /*
682  * Delegation info associated with the file
683  *
684  * dtype - type of delegation for file (NONE, READ, WRITE)
685  * time_returned - time that last delegation was returned for file
686  * time_recalled - time that recall sequence started
687  * time_lastgrant - time that last delegation was provided to a client
688  * time_lastwrite - time of last write to use the delegation stateid
689  * time_rm_delayed - time of last remove/rename which was DELAYed
690  * rdgrants - how many read delegations have been provided for this file
691  * wrgrants - how many write delegations provided (can only be one)
692  * recall_count - how many recall threads are outstanding
693  * recall_lock - lock to protect contents of this struct
694  * recall_cv - condition var for the "parent" thread to wait upon
695  * deleg_change_grant - value for change attribute at time of write grant
696  * deleg_change - most recent value of change obtained from client
697  * deleg_change_ts - time of last deleg_change update
698  * ever_recalled - has this particular delegation ever been recalled?
699  * dont_grant - file deletion is impending, don't grant a delegation
700  * conflicted_client - clientid of the client that caused a CB_RECALL
701  *	to occur. This is used for delegation policy (should a delegation
702  *	be granted shortly after it has been returned?)
703  */
704 typedef struct rfs4_dinfo {
705 	open_delegation_type4 rd_dtype;
706 	time_t		rd_time_returned;
707 	time_t		rd_time_recalled;
708 	time_t		rd_time_lastgrant;
709 	time_t		rd_time_lastwrite;
710 	time_t		rd_time_rm_delayed;
711 	uint32_t	rd_rdgrants;
712 	uint32_t	rd_wrgrants;
713 	int32_t		rd_recall_count;
714 	kmutex_t	rd_recall_lock[1];
715 	kcondvar_t	rd_recall_cv[1];
716 	bool_t		rd_ever_recalled;
717 	uint32_t	rd_hold_grant;
718 	clientid4	rd_conflicted_client;
719 } rfs4_dinfo_t;
720 
721 /*
722  * File
723  *
724  * dbe - encapsulation struct
725  * vp - vnode for the file that is open or has a delegation
726  * filehandle - the filehandle generated by the server for this file
727  * delegstatelist - root of delegation list for this file
728  * dinfo - see struct definition above
729  * share_deny - union of all deny modes on file
730  * share_access - union of all access modes on file
731  * access_read - count of read access
732  * access_write - count of write access
733  * deny_read - count of deny reads
734  * deny_write - count of deny writes
735  * file_rwlock - lock for serializing the removal of a file while
736  *	the state structures are active within the server
737  *
738  *	The only requirement for locking file_rwlock is that the
739  *	caller have a reference to the containing rfs4_file.  The dbe
740  *	lock may or may not be held for lock/unlock of file_rwlock.
741  *	As mentioned above, the file_rwlock is used for serialization
742  *	of file removal and more specifically reference to the held
743  *	vnode (e.g. vp).
744  */
745 typedef struct rfs4_file {
746 	rfs4_dbe_t	*rf_dbe;
747 	vnode_t		*rf_vp;
748 	nfs_fh4		rf_filehandle;
749 	list_t		rf_delegstatelist;
750 	rfs4_dinfo_t	rf_dinfo;
751 	uint32_t	rf_share_deny;
752 	uint32_t	rf_share_access;
753 	uint32_t	rf_access_read;
754 	uint32_t	rf_access_write;
755 	uint32_t	rf_deny_read;
756 	uint32_t	rf_deny_write;
757 	krwlock_t	rf_file_rwlock;
758 } rfs4_file_t;
759 
760 /*
761  * nfs4_deleg_policy is used to signify the server's global delegation
762  * policy.  The default is to NEVER delegate files and the
763  * administrator must configure the server to enable delegations.
764  *
765  * The disable/enable delegation functions are used to eliminate a
766  * race with exclusive creates.
767  */
768 typedef enum {
769 	SRV_NEVER_DELEGATE = 0,
770 	SRV_NORMAL_DELEGATE = 1
771 } srv_deleg_policy_t;
772 
773 extern void rfs4_disable_delegation(void), rfs4_enable_delegation(void);
774 
775 /*
776  * Request types for delegation. These correspond with
777  * open_delegation_type4 with the addition of a new value, DELEG_ANY,
778  * to reqequest any delegation.
779  */
780 typedef enum {
781 	DELEG_NONE = 0,		/* Corresponds to OPEN_DELEG_NONE */
782 	DELEG_READ = 1,		/* Corresponds to OPEN_DELEG_READ */
783 	DELEG_WRITE = 2,	/* Corresponds to OPEN_DELEG_WRITE */
784 	DELEG_ANY = -1		/* New value to request any delegation type */
785 } delegreq_t;
786 
787 #define	NFS4_DELEG4TYPE2REQTYPE(x) (delegreq_t)(x)
788 
789 /*
790  * Zone global variables of NFSv4 server
791  */
792 typedef struct nfs4_srv {
793 	/* Unique write verifier */
794 	verifier4	write4verf;
795 	/* Delegation lock */
796 	kmutex_t	deleg_lock;
797 	/* Used to serialize create/destroy of nfs4_server_state database */
798 	kmutex_t	state_lock;
799 	rfs4_database_t *nfs4_server_state;
800 	/* Used to manage access to server instance linked list */
801 	kmutex_t	servinst_lock;
802 	rfs4_servinst_t *nfs4_cur_servinst;
803 	/* Used to manage access to nfs4_deleg_policy */
804 	krwlock_t	deleg_policy_lock;
805 	srv_deleg_policy_t nfs4_deleg_policy;
806 	/* Set first time we see one */
807 	int		seen_first_compound;
808 	/*
809 	 * Circular double-linked list of paths for currently-served RGs.
810 	 * No locking required -- only changed on server start.
811 	 * Managed with insque/remque.
812 	 */
813 	rfs4_dss_path_t	*dss_pathlist;
814 	/* Duplicate request cache */
815 	struct rfs4_drc	*nfs4_drc;
816 	/* nfsv4 server start time */
817 	time_t rfs4_start_time;
818 	/* Used to serialize lookups of clientids */
819 	krwlock_t rfs4_findclient_lock;
820 
821 	/* NFSv4 server state client tables */
822 	/* table expiry times */
823 	time_t rfs4_client_cache_time;
824 	time_t rfs4_openowner_cache_time;
825 	time_t rfs4_state_cache_time;
826 	time_t rfs4_lo_state_cache_time;
827 	time_t rfs4_lockowner_cache_time;
828 	time_t rfs4_file_cache_time;
829 	time_t rfs4_deleg_state_cache_time;
830 	time_t rfs4_clntip_cache_time;
831 	/* tables and indexes */
832 	/* client table */
833 	rfs4_table_t *rfs4_client_tab;
834 	rfs4_index_t *rfs4_clientid_idx;
835 	rfs4_index_t *rfs4_nfsclnt_idx;
836 	/* client IP table */
837 	rfs4_table_t *rfs4_clntip_tab;
838 	rfs4_index_t *rfs4_clntip_idx;
839 	/* Open Owner table */
840 	rfs4_table_t *rfs4_openowner_tab;
841 	rfs4_index_t *rfs4_openowner_idx;
842 	/* Open State ID table */
843 	rfs4_table_t *rfs4_state_tab;
844 	rfs4_index_t *rfs4_state_idx;
845 	rfs4_index_t *rfs4_state_owner_file_idx;
846 	rfs4_index_t *rfs4_state_file_idx;
847 	/* Lock State ID table */
848 	rfs4_table_t *rfs4_lo_state_tab;
849 	rfs4_index_t *rfs4_lo_state_idx;
850 	rfs4_index_t *rfs4_lo_state_owner_idx;
851 	/* Lock owner table */
852 	rfs4_table_t *rfs4_lockowner_tab;
853 	rfs4_index_t *rfs4_lockowner_idx;
854 	rfs4_index_t *rfs4_lockowner_pid_idx;
855 	/* File table */
856 	rfs4_table_t *rfs4_file_tab;
857 	rfs4_index_t *rfs4_file_idx;
858 	/* Deleg State table */
859 	rfs4_table_t *rfs4_deleg_state_tab;
860 	rfs4_index_t *rfs4_deleg_idx;
861 	rfs4_index_t *rfs4_deleg_state_idx;
862 
863 	/* nfs4.x */
864 	rfs4_table_t	*rfs4_session_tab;
865 	rfs4_index_t	*rfs4_session_idx;
866 
867 	/* client stable storage */
868 	int rfs4_ss_enabled;
869 } nfs4_srv_t;
870 
871 /*
872  * max length of the NFSv4 server database name
873  */
874 #define	RFS4_MAX_MEM_CACHE_NAME 48
875 
876 /*
877  * global NFSv4 server kmem caches
878  * r_db_name - The name of the state database and the table that will use it
879  *             These tables are defined in nfs4_srv_t
880  * r_db_mem_cache - The kmem cache associated with the state database name
881  */
882 typedef struct rfs4_db_mem_cache {
883 	char		r_db_name[RFS4_MAX_MEM_CACHE_NAME];
884 	kmem_cache_t	*r_db_mem_cache;
885 } rfs4_db_mem_cache_t;
886 
887 #define	RFS4_DB_MEM_CACHE_NUM 9
888 
889 extern rfs4_db_mem_cache_t rfs4_db_mem_cache_table[RFS4_DB_MEM_CACHE_NUM];
890 
891 extern srv_deleg_policy_t nfs4_get_deleg_policy();
892 
893 extern void		rfs4_servinst_create(nfs4_srv_t *, int, int, char **);
894 extern void		rfs4_servinst_destroy_all(nfs4_srv_t *);
895 extern void		rfs4_servinst_assign(nfs4_srv_t *, rfs4_client_t *,
896 			    rfs4_servinst_t *);
897 extern rfs4_servinst_t	*rfs4_servinst(rfs4_client_t *);
898 extern int		rfs4_clnt_in_grace(rfs4_client_t *);
899 extern int		rfs4_servinst_in_grace(rfs4_servinst_t *);
900 extern int		rfs4_servinst_grace_new(rfs4_servinst_t *);
901 extern void		rfs4_grace_start(rfs4_servinst_t *);
902 extern void		rfs4_grace_start_new(nfs4_srv_t *);
903 extern void		rfs4_grace_reset_all(nfs4_srv_t *);
904 extern void		rfs4_dss_readstate(nfs4_srv_t *, int, char **);
905 
906 /*
907  * Various interfaces to manipulate the state structures introduced
908  * above
909  */
910 extern	void		rfs4_free_reply(nfs_resop4 *);
911 extern	void		rfs4_copy_reply(nfs_resop4 *, nfs_resop4 *);
912 
913 /* rfs4_client_t handling */
914 extern	rfs4_client_t	*rfs4_findclient(nfs_client_id4 *,
915 					bool_t *, rfs4_client_t *);
916 extern	rfs4_client_t	*rfs4_findclient_by_id(clientid4, bool_t);
917 extern	rfs4_client_t	*rfs4_findclient_by_addr(struct sockaddr *);
918 extern	void		rfs4_client_rele(rfs4_client_t *);
919 extern	void		rfs4_client_close(rfs4_client_t *);
920 extern	void		rfs4_client_state_remove(rfs4_client_t *);
921 extern	void		rfs4_client_scv_next(rfs4_client_t *);
922 extern	void		rfs4_update_lease(rfs4_client_t *);
923 extern	bool_t		rfs4_lease_expired(rfs4_client_t *);
924 extern	nfsstat4	rfs4_check_clientid(clientid4 *, int);
925 
926 /* rfs4_clntip_t handling */
927 extern	rfs4_clntip_t	*rfs4_find_clntip(struct sockaddr *, bool_t *);
928 extern	void		rfs4_invalidate_clntip(struct sockaddr *);
929 
930 /* rfs4_openowner_t handling */
931 extern	rfs4_openowner_t *rfs4_findopenowner(open_owner4 *, bool_t *, seqid4);
932 extern	void		rfs4_update_open_sequence(rfs4_openowner_t *);
933 extern	void		rfs4_update_open_resp(rfs4_openowner_t *,
934 					nfs_resop4 *, nfs_fh4 *);
935 extern	void		rfs4_openowner_rele(rfs4_openowner_t *);
936 extern	void		rfs4_free_opens(rfs4_openowner_t *, bool_t, bool_t);
937 
938 /* rfs4_lockowner_t handling */
939 extern	rfs4_lockowner_t *rfs4_findlockowner(lock_owner4 *, bool_t *);
940 extern	rfs4_lockowner_t *rfs4_findlockowner_by_pid(pid_t);
941 extern	void		rfs4_lockowner_rele(rfs4_lockowner_t *);
942 
943 /* rfs4_state_t handling */
944 extern	rfs4_state_t	*rfs4_findstate_by_owner_file(rfs4_openowner_t *,
945 					rfs4_file_t *, bool_t *);
946 extern	void		rfs4_state_rele(rfs4_state_t *);
947 extern	void		rfs4_state_close(rfs4_state_t *, bool_t,
948 					bool_t, cred_t *);
949 extern	void		rfs4_release_share_lock_state(rfs4_state_t *,
950 					cred_t *, bool_t);
951 extern	void		rfs4_close_all_state(rfs4_file_t *);
952 
953 /* rfs4_lo_state_t handling */
954 extern	rfs4_lo_state_t *rfs4_findlo_state_by_owner(rfs4_lockowner_t *,
955 						rfs4_state_t *, bool_t *);
956 extern	void		rfs4_lo_state_rele(rfs4_lo_state_t *, bool_t);
957 extern	void		rfs4_update_lock_sequence(rfs4_lo_state_t *);
958 extern	void		rfs4_update_lock_resp(rfs4_lo_state_t *,
959 					nfs_resop4 *);
960 
961 /* rfs4_file_t handling */
962 extern	rfs4_file_t	*rfs4_findfile(vnode_t *, nfs_fh4 *, bool_t *);
963 extern	rfs4_file_t	*rfs4_findfile_withlock(vnode_t *, nfs_fh4 *,
964 						bool_t *);
965 extern	void		rfs4_file_rele(rfs4_file_t *);
966 
967 /* General collection of "get state" functions */
968 extern	nfsstat4	rfs4_get_state(stateid4 *, rfs4_state_t **,
969 					rfs4_dbsearch_type_t);
970 extern	nfsstat4	rfs4_get_deleg_state(stateid4 *,
971 					rfs4_deleg_state_t **);
972 extern	nfsstat4	rfs4_get_lo_state(stateid4 *, rfs4_lo_state_t **,
973 					bool_t);
974 struct compound_state;
975 extern	nfsstat4	rfs4_check_stateid(int, vnode_t *, stateid4 *,
976 					bool_t, bool_t *, bool_t,
977 					caller_context_t *,
978 					struct compound_state *);
979 extern	int		rfs4_check_stateid_seqid(rfs4_state_t *, stateid4 *,
980 					const struct compound_state *);
981 extern	int		rfs4_check_lo_stateid_seqid(rfs4_lo_state_t *,
982 					stateid4 *,
983 					const struct compound_state *);
984 
985 /* return values for rfs4_check_stateid_seqid() */
986 #define	NFS4_CHECK_STATEID_OKAY	1
987 #define	NFS4_CHECK_STATEID_OLD	2
988 #define	NFS4_CHECK_STATEID_BAD	3
989 #define	NFS4_CHECK_STATEID_EXPIRED	4
990 #define	NFS4_CHECK_STATEID_REPLAY	5
991 #define	NFS4_CHECK_STATEID_CLOSED	6
992 #define	NFS4_CHECK_STATEID_UNCONFIRMED	7
993 
994 /* delay() time that server is willing to briefly wait for a delegreturn */
995 #define	NFS4_DELEGATION_CONFLICT_DELAY	(hz/10)
996 
997 /*
998  * Interfaces for handling of callback's client handle cache and
999  * callback interfaces themselves.
1000  */
1001 extern	void		rfs4_cbinfo_free(rfs4_cbinfo_t *);
1002 extern	void		rfs4_client_setcb(rfs4_client_t *, cb_client4 *,
1003 					uint32_t);
1004 extern	void		rfs4_deleg_cb_check(rfs4_client_t *);
1005 extern	nfsstat4	rfs4_vop_getattr(vnode_t *, vattr_t *, int, cred_t *);
1006 
1007 /* rfs4_deleg_state_t handling and other delegation interfaces */
1008 extern	rfs4_deleg_state_t *rfs4_finddeleg(rfs4_state_t *, bool_t *);
1009 extern	rfs4_deleg_state_t *rfs4_finddelegstate(stateid_t *);
1010 extern	bool_t		rfs4_check_recall(rfs4_state_t *, uint32_t);
1011 extern	void		rfs4_recall_deleg(rfs4_file_t *,
1012 				bool_t, rfs4_client_t *);
1013 extern	int		rfs4_get_deleg(rfs4_state_t *,  open_delegation_type4,
1014 			open_delegation_type4 (*policy)(rfs4_state_t *,
1015 				open_delegation_type4 dtype));
1016 extern	rfs4_deleg_state_t *rfs4_grant_delegation(delegreq_t, rfs4_state_t *,
1017 				int *);
1018 extern	void		rfs4_set_deleg_response(rfs4_deleg_state_t *,
1019 				open_delegation4 *, nfsace4 *, int);
1020 extern	void		rfs4_return_deleg(rfs4_deleg_state_t *, bool_t);
1021 extern	bool_t		rfs4_is_deleg(rfs4_state_t *);
1022 extern	void		rfs4_deleg_state_rele(rfs4_deleg_state_t *);
1023 extern	bool_t		rfs4_check_delegated_byfp(int, rfs4_file_t *,
1024 					bool_t, bool_t, bool_t, clientid4 *);
1025 extern	void		rfs4_clear_dont_grant(rfs4_file_t *);
1026 
1027 /*
1028  * nfs4 monitored operations.
1029  */
1030 extern int deleg_rd_open(femarg_t *, int, cred_t *, caller_context_t *);
1031 extern int deleg_wr_open(femarg_t *, int, cred_t *, caller_context_t *);
1032 extern int deleg_wr_read(femarg_t *, uio_t *, int, cred_t *,
1033 	    caller_context_t *);
1034 extern int deleg_rd_write(femarg_t *, uio_t *, int, cred_t *,
1035 	    caller_context_t *);
1036 extern int deleg_wr_write(femarg_t *, uio_t *, int, cred_t *,
1037 	    caller_context_t *);
1038 extern int deleg_rd_setattr(femarg_t *, vattr_t *, int, cred_t *,
1039 		caller_context_t *);
1040 extern int deleg_wr_setattr(femarg_t *, vattr_t *, int, cred_t *,
1041 		caller_context_t *);
1042 extern int deleg_rd_rwlock(femarg_t *, int, caller_context_t *);
1043 extern int deleg_wr_rwlock(femarg_t *, int, caller_context_t *);
1044 extern int deleg_rd_space(femarg_t *, int, flock64_t *, int, offset_t, cred_t *,
1045 		caller_context_t *);
1046 extern int deleg_wr_space(femarg_t *, int, flock64_t *, int, offset_t, cred_t *,
1047 		caller_context_t *);
1048 extern int deleg_rd_setsecattr(femarg_t *, vsecattr_t *, int, cred_t *,
1049 		caller_context_t *);
1050 extern int deleg_wr_setsecattr(femarg_t *, vsecattr_t *, int, cred_t *,
1051 		caller_context_t *);
1052 extern int deleg_rd_vnevent(femarg_t *, vnevent_t, vnode_t *, char *,
1053 		caller_context_t *);
1054 extern int deleg_wr_vnevent(femarg_t *, vnevent_t, vnode_t *, char *,
1055 		caller_context_t *);
1056 
1057 extern void rfs4_mon_hold(void *);
1058 extern void rfs4_mon_rele(void *);
1059 
1060 extern fem_t	*deleg_rdops;
1061 extern fem_t	*deleg_wrops;
1062 
1063 extern int rfs4_share(rfs4_state_t *, uint32_t, uint32_t);
1064 extern int rfs4_unshare(rfs4_state_t *);
1065 extern void rfs4_set_deleg_policy(nfs4_srv_t *, srv_deleg_policy_t);
1066 extern void rfs4_hold_deleg_policy(nfs4_srv_t *);
1067 extern void rfs4_rele_deleg_policy(nfs4_srv_t *);
1068 
1069 #ifdef DEBUG
1070 #define	NFS4_DEBUG(var, args) if (var) cmn_err args
1071 
1072 extern int rfs4_debug;
1073 extern int nfs4_client_attr_debug;
1074 extern int nfs4_client_state_debug;
1075 extern int nfs4_client_shadow_debug;
1076 extern int nfs4_client_lock_debug;
1077 extern int nfs4_client_lease_debug;
1078 extern int nfs4_seqid_sync;
1079 extern int nfs4_client_map_debug;
1080 extern int nfs4_client_inactive_debug;
1081 extern int nfs4_client_recov_debug;
1082 extern int nfs4_client_failover_debug;
1083 extern int nfs4_client_call_debug;
1084 extern int nfs4_client_foo_debug;
1085 extern int nfs4_client_zone_debug;
1086 extern int nfs4_lost_rqst_debug;
1087 extern int nfs4_open_stream_debug;
1088 extern int nfs4_client_open_dg;
1089 extern int nfs4_srvmnt_debug;
1090 extern int nfs4_utf8_debug;
1091 
1092 void rfs4_dbe_debug(rfs4_dbe_t *e);
1093 
1094 #ifdef NFS4_DEBUG_MUTEX
1095 void nfs4_debug_mutex_enter(kmutex_t *, char *, int);
1096 void nfs4_debug_mutex_exit(kmutex_t *, char *, int);
1097 
1098 #define	mutex_enter(m) nfs4_debug_mutex_enter((m), __FILE__, __LINE__)
1099 #define	mutex_exit(m) nfs4_debug_mutex_exit((m), __FILE__, __LINE__)
1100 #endif /* NFS4_DEBUG_MUTEX */
1101 
1102 #else  /* ! DEBUG */
1103 #define	NFS4_DEBUG(var, args)
1104 #endif /* DEBUG */
1105 
1106 /*
1107  * XXX - temporary for testing of volatile fh
1108  */
1109 
1110 #ifdef VOLATILE_FH_TEST
1111 
1112 struct nfs_fh4_fmt {
1113 	fhandle4_t	fh4_i;
1114 	uint32_t	fh4_flag;
1115 	uint32_t	fh4_volatile_id;
1116 };
1117 
1118 #else /* VOLATILE_FH_TEST */
1119 
1120 struct nfs_fh4_fmt {
1121 	fhandle4_t	fh4_i;
1122 	uint32_t	fh4_flag;
1123 };
1124 
1125 #endif /* VOLATILE_FH_TEST */
1126 
1127 #define	FH4_NAMEDATTR	1
1128 #define	FH4_ATTRDIR	2
1129 
1130 #define	fh4_fsid	fh4_i.fhx_fsid
1131 #define	fh4_len		fh4_i.fhx_len	/* fid length */
1132 #define	fh4_data	fh4_i.fhx_data	/* fid bytes */
1133 #define	fh4_xlen	fh4_i.fhx_xlen
1134 #define	fh4_xdata	fh4_i.fhx_xdata
1135 typedef struct nfs_fh4_fmt nfs_fh4_fmt_t;
1136 
1137 #define	fh4_to_fmt4(fh4p) ((nfs_fh4_fmt_t *)(fh4p)->nfs_fh4_val)
1138 #define	get_fh4_flag(fh4p, flag) ((fh4_to_fmt4(fh4p)->fh4_flag) & (flag))
1139 #define	set_fh4_flag(fh4p, flag) ((fh4_to_fmt4(fh4p)->fh4_flag) |= (flag))
1140 #define	clr_fh4_flag(fh4p, flag) ((fh4_to_fmt4(fh4p)->fh4_flag) &= ~(flag))
1141 
1142 #define	NFS_FH4_LEN	sizeof (nfs_fh4_fmt_t)
1143 
1144 /*
1145  * Copy fields from external (fhandle_t) to in-memory (nfs_fh4_fmt_t)
1146  * format to support export info checking.  It does not copy over
1147  * the complete filehandle, just the fsid, xlen and xdata.  It may
1148  * need to be changed to be used in other places.
1149  *
1150  * NOTE: The macro expects the space to be  pre-allocated for
1151  * the contents of nfs_fh4_fmt_t.
1152  */
1153 #define	FH_TO_FMT4(exifh, nfs_fmt) {				\
1154 	bzero((nfs_fmt), NFS_FH4_LEN);				\
1155 	(nfs_fmt)->fh4_fsid = (exifh)->fh_fsid;			\
1156 	(nfs_fmt)->fh4_xlen = (exifh)->fh_xlen;			\
1157 	bcopy((exifh)->fh_xdata, (nfs_fmt)->fh4_xdata,		\
1158 	    (exifh)->fh_xlen);					\
1159 }
1160 
1161 /*
1162  * A few definitions of repeatedly used constructs for nfsv4
1163  */
1164 #define	UTF8STRING_FREE(str)	{				\
1165 	kmem_free((str).utf8string_val,	(str).utf8string_len);	\
1166 	(str).utf8string_val = NULL;				\
1167 	(str).utf8string_len = 0;				\
1168 }
1169 
1170 /*
1171  * NFS4_VOLATILE_FH yields non-zero if the filesystem uses non-persistent
1172  * filehandles.
1173  */
1174 #define	NFS4_VOLATILE_FH(mi)					\
1175 	((mi)->mi_fh_expire_type &				\
1176 	(FH4_VOLATILE_ANY | FH4_VOL_MIGRATION | FH4_VOL_RENAME))
1177 
1178 /*
1179  * NFS_IS_DOTNAME checks if the name given represents a dot or dotdot entry
1180  */
1181 #define	NFS_IS_DOTNAME(name)					\
1182 	(((name)[0] == '.') &&					\
1183 	(((name)[1] == '\0') || (((name)[1] == '.') && ((name)[2] == '\0'))))
1184 
1185 /*
1186  * Define the number of bits in a bitmap word (uint32)
1187  */
1188 #define	NFS4_BITMAP4_BITSPERWORD	(sizeof (uint32_t) * 8)
1189 
1190 /*
1191  * Define the value for the access field of the compound_state structure
1192  * based on the result of nfsauth access checking.
1193  */
1194 #define	CS_ACCESS_OK		0x1
1195 #define	CS_ACCESS_DENIED	0x2
1196 #define	CS_ACCESS_LIMITED	0x4
1197 
1198 /*
1199  * compound state in nfsv4 server
1200  */
1201 struct compound_state {
1202 	struct exportinfo *exi;
1203 	struct exportinfo *saved_exi;	/* export struct for saved_vp */
1204 	cred_t		*basecr;	/* UNIX cred:  only RPC request */
1205 	caddr_t		principal;
1206 	int		nfsflavor;
1207 	cred_t		*cr;		/* UNIX cred: RPC request and */
1208 					/* target export */
1209 	bool_t		cont;
1210 	uint_t		access;		/* access perm on vp per request */
1211 	bool_t		deleg;		/* TRUE if current fh has */
1212 					/* write delegated */
1213 	vnode_t		*vp;		/* modified by PUTFH, and by ops that */
1214 					/* input to GETFH */
1215 	bool_t		mandlock;	/* Is mandatory locking in effect */
1216 					/* for vp */
1217 	vnode_t		*saved_vp;	/* modified by SAVEFH, copied to */
1218 					/* vp by RESTOREFH */
1219 	nfsstat4	*statusp;
1220 	nfs_fh4		fh;		/* ditto. valid only if vp != NULL */
1221 	nfs_fh4		saved_fh;	/* ditto. valid only if */
1222 					/*	saved_vp != NULL */
1223 	struct svc_req	*req;
1224 	char		fhbuf[NFS4_FHSIZE];
1225 
1226 	/* NFSv4.1 */
1227 	uint8_t		minorversion;	/* NFS4 minor version */
1228 	rfs4_session_t	*sp;		/* OP_SEQUENCE set it */
1229 	slotid4		slotno;
1230 	rfs4_slot_t	*slot;
1231 	rfs4_client_t	*client;
1232 	uint16_t	op_pos;
1233 	uint16_t	op_len;		/* number operations in compound req */
1234 #define	RFS4_DISPATCH_DONE	(1 << 0)
1235 	uint8_t		cs_flags;
1236 	bool_t		cachethis;
1237 	COMPOUND4res	*cmpresp;
1238 };
1239 
1240 typedef struct compound_state compound_state_t;
1241 
1242 static inline bool_t
1243 rfs4_has_session(const compound_state_t *cs)
1244 {
1245 	return (cs->slot != NULL);
1246 }
1247 
1248 /*
1249  * Conversion commands for nfsv4 server attr checking
1250  */
1251 enum nfs4_attr_cmd {
1252 	NFS4ATTR_SUPPORTED = 0,		/* check which attrs supported */
1253 	NFS4ATTR_GETIT = 1,		/* getattr - sys to fattr4 (r) */
1254 	NFS4ATTR_SETIT = 2,		/* setattr - fattr4 to sys (w) */
1255 	NFS4ATTR_VERIT = 3,		/* verify - fattr4 to sys (r) */
1256 	NFS4ATTR_FREEIT = 4		/* free any alloc'd space for attr */
1257 };
1258 
1259 typedef enum nfs4_attr_cmd nfs4_attr_cmd_t;
1260 
1261 struct nfs4_svgetit_arg {
1262 	nfs4_attr_cmd_t op;		/* getit or setit */
1263 	struct compound_state *cs;
1264 	struct statvfs64 *sbp;
1265 	uint_t		flag;		/* VOP_GETATTR/VOP_SETATTR flag */
1266 	uint_t		xattr;		/* object is xattr */
1267 	bool_t		rdattr_error_req; /* if readdir & client wants */
1268 						/* rdattr_error */
1269 	nfsstat4	rdattr_error;	/* used for per-entry status */
1270 					/* (if rdattr_err) */
1271 	bool_t		is_referral;	/* because sometimes we tell lies */
1272 	bool_t		mntdfid_set;
1273 
1274 	fattr4_mounted_on_fileid
1275 			mounted_on_fileid;
1276 					/* readdir op can always return	*/
1277 					/* d_ino from server fs dirent  */
1278 					/* for mounted_on_fileid attr.	*/
1279 					/* This field holds d_ino so	*/
1280 					/* srv attr conv code can avoid */
1281 					/* doing an untraverse.		*/
1282 	vattr_t		vap[1];
1283 };
1284 
1285 struct nfs4_ntov_map {
1286 	bitmap4		fbit;		/* FATTR4_XXX_MASKY */
1287 	uint_t		vbit;		/* AT_XXX */
1288 	bool_t		vfsstat;
1289 	bool_t		mandatory;	/* attribute mandatory to implement? */
1290 	uint_t		nval;
1291 	int		xdr_size;	/* Size of XDR'd attr */
1292 	xdrproc_t	xfunc;
1293 	int (*sv_getit)(nfs4_attr_cmd_t, struct nfs4_svgetit_arg *,
1294 		union nfs4_attr_u *);	/* subroutine for getting attr. */
1295 	char		*prtstr;	/* string attr for printing */
1296 };
1297 
1298 struct nfs4attr_to_vattr {
1299 	vnode_t		*vp;
1300 	vattr_t		*vap;
1301 	nfs_fh4		*fhp;
1302 	nfsstat4	rdattr_error;
1303 	uint32_t	flag;
1304 	fattr4_change	change;
1305 	fattr4_fsid	srv_fsid;
1306 	fattr4_mounted_on_fileid	mntd_fid;
1307 };
1308 
1309 typedef struct nfs4attr_to_vattr ntov4_t;
1310 
1311 /*
1312  * nfs4attr_to_vattr flags
1313  */
1314 #define	NTOV_FHP_VALID			0x01
1315 #define	NTOV_RDATTR_ERROR_VALID		0x02
1316 #define	NTOV_CHANGE_VALID		0x04
1317 #define	NTOV_SUPP_VALID			0x08
1318 #define	NTOV_SRV_FSID_VALID		0x10
1319 #define	NTOV_MOUNTED_ON_FILEID_VALID	0x20
1320 
1321 
1322 #define	FATTR4_MANDATTR_MASK (		\
1323 	FATTR4_SUPPORTED_ATTRS_MASK |	\
1324 	FATTR4_TYPE_MASK |		\
1325 	FATTR4_FH_EXPIRE_TYPE_MASK |	\
1326 	FATTR4_CHANGE_MASK |		\
1327 	FATTR4_SIZE_MASK |		\
1328 	FATTR4_LINK_SUPPORT_MASK |	\
1329 	FATTR4_SYMLINK_SUPPORT_MASK |	\
1330 	FATTR4_NAMED_ATTR_MASK |	\
1331 	FATTR4_FSID_MASK |		\
1332 	FATTR4_UNIQUE_HANDLES_MASK |	\
1333 	FATTR4_LEASE_TIME_MASK |	\
1334 	FATTR4_RDATTR_ERROR_MASK |	\
1335 	FATTR4_FILEHANDLE_MASK)
1336 
1337 
1338 struct nfs4attr_to_osattr {
1339 	void *attrconv_arg;
1340 	uint_t mask;
1341 };
1342 
1343 struct mntinfo4;
1344 
1345 /*
1346  * lkp4_attr_setup lists the different options for attributes when calling
1347  * nfs4lookup_setup - either no attributes (just lookups - e.g., secinfo),
1348  * one component only (normal component lookup), get attributes for the
1349  * last component (e.g., mount), attributes for each component (e.g.,
1350  * failovers later), just the filehandle for the last component (e.g.,
1351  * volatile filehandle recovery), or stuff that needs OPENATTR (e.g.
1352  * looking up a named attribute or it's hidden directory).
1353  */
1354 enum lkp4_attr_setup {
1355 	LKP4_NO_ATTRIBUTES = 0,		/* no attrs or filehandles */
1356 	LKP4_ALL_ATTRIBUTES = 3,	/* multi-comp: attrs for all comps */
1357 	LKP4_LAST_NAMED_ATTR = 5,	/* multi-comp: named attr & attrdir */
1358 	LKP4_LAST_ATTRDIR = 6,		/* multi-comp: just attrdir */
1359 	LKP4_ALL_ATTR_SECINFO = 7	/* multi-comp: attrs for all comp and */
1360 					/*	secinfo for last comp */
1361 };
1362 
1363 /*
1364  * lookup4_param a set of parameters to nfs4lookup_setup -
1365  * used to setup a path lookup compound request.
1366  */
1367 typedef struct lookup4_param {
1368 	enum lkp4_attr_setup l4_getattrs; /* (in) get attrs in the lookup? */
1369 	int		header_len;	/* (in) num ops before first lookup  */
1370 	int		trailer_len;	/* (in) num ops after last	*/
1371 					/*	Lookup/Getattr		*/
1372 	bitmap4		ga_bits;	/* (in) Which attributes for Getattr */
1373 	COMPOUND4args_clnt *argsp;	/* (in/out) args for compound struct */
1374 	COMPOUND4res_clnt  *resp;	/* (in/out) res for compound  struct */
1375 	int		arglen;		/* (out) argop buffer alloc'd length */
1376 	struct mntinfo4 *mi;
1377 } lookup4_param_t;
1378 
1379 
1380 #define	NFS4_FATTR4_FINISH	-1	/* fattr4 index indicating finish */
1381 
1382 typedef int (*nfs4attr_to_os_t)(int, union nfs4_attr_u *,
1383 		struct nfs4attr_to_osattr *);
1384 
1385 /*
1386  * The nfs4_error_t is the basic structure to return error values
1387  * from rfs4call.  It encapsulates the unix errno
1388  * value, the nfsstat4 value and the rpc status value into a single
1389  * structure.
1390  *
1391  * If error is set, then stat is ignored and rpc_status may be
1392  * set if the error occurred as the result of a CLNT_CALL.  If
1393  * stat is set, then rpc request succeeded, error and
1394  * rpc_status are set to 0 and stat contains the result of
1395  * operation, NFS4_OK or one of the NFS4ERR_* values.
1396  *
1397  * Functions which want to generate errors independently from
1398  * rfs4call should set error to the desired errno value and
1399  * set stat and rpc_status to 0.  nfs4_error_init() is a
1400  * convenient function to do this.
1401  */
1402 typedef struct {
1403 	int		error;
1404 	nfsstat4	stat;
1405 	enum clnt_stat	rpc_status;
1406 } nfs4_error_t;
1407 
1408 /*
1409  * Shared functions
1410  */
1411 extern void	rfs4_op_readdir(nfs_argop4 *, nfs_resop4 *,
1412 			struct svc_req *, struct compound_state *);
1413 extern void	nfs_fh4_copy(nfs_fh4 *, nfs_fh4 *);
1414 
1415 extern void	nfs4_fattr4_free(fattr4 *);
1416 
1417 extern int	nfs4lookup_setup(char *, lookup4_param_t *, int);
1418 extern void	nfs4_getattr_otw_norecovery(vnode_t *,
1419 			nfs4_ga_res_t *, nfs4_error_t *, cred_t *, int);
1420 extern int	nfs4_getattr_otw(vnode_t *, nfs4_ga_res_t *, cred_t *, int);
1421 extern int	nfs4cmpfh(const nfs_fh4 *, const nfs_fh4 *);
1422 extern int	nfs4cmpfhandle(nfs4_fhandle_t *, nfs4_fhandle_t *);
1423 extern int	nfs4getattr(vnode_t *, struct vattr *, cred_t *);
1424 extern int	nfs4_waitfor_purge_complete(vnode_t *);
1425 extern int	nfs4_validate_caches(vnode_t *, cred_t *);
1426 extern int	nfs4init(int, char *);
1427 extern void	nfs4fini(void);
1428 extern int	nfs4_vfsinit(void);
1429 extern void	nfs4_vfsfini(void);
1430 
1431 extern void	nfs4_vnops_init(void);
1432 extern void	nfs4_vnops_fini(void);
1433 extern void	nfs_idmap_init(void);
1434 extern void	nfs_idmap_flush(int);
1435 extern void	nfs_idmap_fini(void);
1436 extern int	nfs4_rnode_init(void);
1437 extern int	nfs4_rnode_fini(void);
1438 extern int	nfs4_shadow_init(void);
1439 extern int	nfs4_shadow_fini(void);
1440 extern int	nfs4_acache_init(void);
1441 extern int	nfs4_acache_fini(void);
1442 extern int	nfs4_subr_init(void);
1443 extern int	nfs4_subr_fini(void);
1444 extern void	nfs4_acl_init(void);
1445 extern void	nfs4_acl_free_cache(vsecattr_t *);
1446 
1447 extern int	geterrno4(nfsstat4);
1448 extern nfsstat4	puterrno4(int);
1449 extern int	nfs4_need_to_bump_seqid(COMPOUND4res_clnt *);
1450 extern int	nfs4tsize(void);
1451 extern int	checkauth4(struct compound_state *, struct svc_req *);
1452 extern nfsstat4 call_checkauth4(struct compound_state *, struct svc_req *);
1453 extern int	is_exported_sec(int, struct exportinfo *);
1454 extern void	nfs4_vmask_to_nmask(uint_t, bitmap4 *);
1455 extern void	nfs4_vmask_to_nmask_set(uint_t, bitmap4 *);
1456 extern int	nfs_idmap_str_uid(utf8string *u8s, uid_t *, bool_t);
1457 extern int	nfs_idmap_str_gid(utf8string *u8s, gid_t *, bool_t);
1458 extern int	nfs_idmap_uid_str(uid_t, utf8string *u8s, bool_t);
1459 extern int	nfs_idmap_gid_str(gid_t gid, utf8string *u8s, bool_t);
1460 extern int	nfs4_time_ntov(nfstime4 *, timestruc_t *);
1461 extern int	nfs4_time_vton(timestruc_t *, nfstime4 *);
1462 extern char	*utf8_to_str(utf8string *, uint_t *, char *);
1463 extern char	*utf8_to_fn(utf8string *, uint_t *, char *);
1464 extern utf8string *str_to_utf8(char *, utf8string *);
1465 extern utf8string *utf8_copy(utf8string *, utf8string *);
1466 extern int	utf8_compare(const utf8string *, const utf8string *);
1467 extern nfsstat4	utf8_dir_verify(utf8string *);
1468 extern char	*utf8_strchr(utf8string *, const char);
1469 extern int	ln_ace4_cmp(nfsace4 *, nfsace4 *, int);
1470 extern int	vs_aent_to_ace4(vsecattr_t *, vsecattr_t *, int, int);
1471 extern int	vs_ace4_to_aent(vsecattr_t *, vsecattr_t *, uid_t, gid_t,
1472     int, int);
1473 extern int	vs_ace4_to_acet(vsecattr_t *, vsecattr_t *, uid_t, gid_t,
1474     int);
1475 extern int	vs_acet_to_ace4(vsecattr_t *, vsecattr_t *, int);
1476 extern void	vs_acet_destroy(vsecattr_t *);
1477 extern void	vs_ace4_destroy(vsecattr_t *);
1478 extern void	vs_aent_destroy(vsecattr_t *);
1479 
1480 extern int	vn_find_nfs_record(vnode_t *, nvlist_t **, char **, char **);
1481 extern int	vn_is_nfs_reparse(vnode_t *, cred_t *);
1482 extern fs_locations4 *fetch_referral(vnode_t *, cred_t *);
1483 extern char	*build_symlink(vnode_t *, cred_t *, size_t *);
1484 
1485 extern int	stateid4_cmp(stateid4 *, stateid4 *);
1486 
1487 extern vtype_t	nf4_to_vt[];
1488 
1489 extern struct nfs4_ntov_map nfs4_ntov_map[];
1490 extern uint_t nfs4_ntov_map_size;
1491 
1492 extern struct vfsops	*nfs4_vfsops;
1493 extern struct vnodeops	*nfs4_vnodeops;
1494 extern const struct	fs_operation_def nfs4_vnodeops_template[];
1495 extern vnodeops_t	*nfs4_trigger_vnodeops;
1496 extern const struct	fs_operation_def nfs4_trigger_vnodeops_template[];
1497 
1498 extern uint_t nfs4_tsize(struct knetconfig *);
1499 extern uint_t rfs4_tsize(struct svc_req *);
1500 
1501 extern bool_t	xdr_inline_decode_nfs_fh4(uint32_t *, nfs_fh4_fmt_t *,
1502 			uint32_t);
1503 extern bool_t	xdr_inline_encode_nfs_fh4(uint32_t **, uint32_t *,
1504 			nfs_fh4_fmt_t *);
1505 
1506 #ifdef DEBUG
1507 extern int		rfs4_do_pre_op_attr;
1508 extern int		rfs4_do_post_op_attr;
1509 #endif
1510 
1511 extern stateid4 clnt_special0;
1512 extern stateid4 clnt_special1;
1513 #define	CLNT_ISSPECIAL(id) (stateid4_cmp(id, &clnt_special0) || \
1514 				stateid4_cmp(id, &clnt_special1))
1515 
1516 /* State's functions */
1517 extern void rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *);
1518 extern void rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *);
1519 
1520 /* Declarations for nfs4.x */
1521 nfsstat4 do_rfs4_op_secinfo(struct compound_state *, char *, SECINFO4res *);
1522 
1523 /*
1524  * The NFS Version 4 service procedures.
1525  */
1526 
1527 extern void	rfs4_do_server_start(int, int, int);
1528 extern void	rfs4_compound(COMPOUND4args *, COMPOUND4res *,
1529 			compound_state_t *, struct svc_req *, int *);
1530 extern void rfs4_init_compound_state(struct compound_state *);
1531 extern void rfs4_fini_compound_state(struct compound_state *);
1532 
1533 struct rpcdisp;
1534 extern int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *);
1535 extern void	rfs4_compound_free(COMPOUND4res *);
1536 extern void	rfs4_compound_flagproc(COMPOUND4args *, int *);
1537 
1538 extern void	rfs4_srvrinit(void);
1539 extern void	rfs4_srvrfini(void);
1540 extern void	rfs4_srv_zone_init(nfs_globals_t *);
1541 extern void	rfs4_srv_zone_fini(nfs_globals_t *);
1542 extern void	rfs4_state_g_init(void);
1543 extern void	rfs4_state_zone_init(nfs4_srv_t *);
1544 extern void	rfs4_state_g_fini(void);
1545 extern void	rfs4_state_zone_fini(void);
1546 extern nfs4_srv_t *nfs4_get_srv(void);
1547 
1548 #endif
1549 #ifdef	__cplusplus
1550 }
1551 #endif
1552 
1553 #endif /* _NFS4_H */
1554