xref: /illumos-gate/usr/src/uts/common/nfs/export.h (revision b89a8333)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 #ifndef	_NFS_EXPORT_H
30 #define	_NFS_EXPORT_H
31 
32 #include <nfs/nfs_sec.h>
33 #include <nfs/auth.h>
34 #include <sys/vnode.h>
35 #include <nfs/nfs4.h>
36 #include <sys/kiconv.h>
37 
38 #ifdef	__cplusplus
39 extern "C" {
40 #endif
41 
42 /*
43  * nfs pseudo flavor number is owned by IANA. Need to make sure the
44  * Solaris specific NFS_FLAVOR_NOMAP number will not overlap with any
45  * new IANA defined pseudo flavor numbers. The chance for the overlap
46  * is very small since the growth of new flavor numbers is expected
47  * to be limited.
48  */
49 #define	NFS_FLAVOR_NOMAP	999999	/* no nfs flavor mapping */
50 
51 /*
52  * As duplicate flavors can be passed into exportfs in the arguments, we
53  * allocate a cleaned up array with non duplicate flavors on the stack.
54  * So we need to know how much to allocate.
55  */
56 #define	MAX_FLAVORS		6	/* none, sys, dh, krb5, krb5i krb5p */
57 
58 /*
59  * Note: exported_lock is currently used to ensure the integrity of
60  * the secinfo fields.
61  */
62 struct secinfo {
63 	seconfig_t	s_secinfo;	/* /etc/nfssec.conf entry */
64 	unsigned int	s_flags;	/* flags (see below) */
65 	int32_t		s_refcnt;	/* reference count for tracking */
66 					/* how many children (self included) */
67 					/* use this flavor. */
68 	int 		s_window;	/* window */
69 	uint_t		s_rootid;	/* UID to use for authorized roots */
70 	int		s_rootcnt;	/* count of root names */
71 	caddr_t		*s_rootnames;	/* array of root names */
72 					/* they are strings for AUTH_DES and */
73 					/* rpc_gss_principal_t for RPCSEC_GSS */
74 };
75 
76 #ifdef _SYSCALL32
77 struct secinfo32 {
78 	seconfig32_t	s_secinfo;	/* /etc/nfssec.conf entry */
79 	uint32_t	s_flags;	/* flags (see below) */
80 	int32_t		s_refcnt;	/* reference count for tracking */
81 					/* how many children (self included) */
82 					/* use this flavor. */
83 	int32_t 	s_window;	/* window */
84 	uint32_t	s_rootid;	/* UID to use for authorized roots */
85 	int32_t		s_rootcnt;	/* count of root names */
86 	caddr32_t	s_rootnames;	/* array of root names */
87 					/* they are strings for AUTH_DES and */
88 					/* rpc_gss_principal_t for RPCSEC_GSS */
89 };
90 #endif /* _SYSCALL32 */
91 
92 /*
93  * security negotiation related
94  */
95 
96 #define	SEC_QUERY	0x01	/* query sec modes */
97 
98 struct sec_ol {
99 	int		sec_flags;	/* security nego flags */
100 	uint_t		sec_index;	/* index into sec flavor array */
101 };
102 
103 /*
104  * Per-mode flags (secinfo.s_flags)
105  */
106 #define	M_RO		0x01	/* exported ro to all */
107 #define	M_ROL		0x02	/* exported ro to all listed */
108 #define	M_RW		0x04	/* exported rw to all */
109 #define	M_RWL		0x08	/* exported ro to all listed */
110 #define	M_ROOT		0x10	/* root list is defined */
111 #define	M_4SEC_EXPORTED	0x20	/* this is an explicitly shared flavor */
112 #define	M_NONE		0x40	/* none list is defined */
113 
114 /* invalid secinfo reference count */
115 #define	SEC_REF_INVALID(p) ((p)->s_refcnt < 1)
116 
117 /* last secinfo reference */
118 #define	SEC_REF_LAST(p) ((p)->s_refcnt == 1)
119 
120 /* sec flavor explicitly shared for the exported node */
121 #define	SEC_REF_EXPORTED(p) ((p)->s_flags & M_4SEC_EXPORTED)
122 
123 /* the only reference count left is for referring itself */
124 #define	SEC_REF_SELF(p) (SEC_REF_LAST(p) && SEC_REF_EXPORTED(p))
125 
126 /*
127  * The export information passed to exportfs() (Version 2)
128  */
129 #define	EX_CURRENT_VERSION 2	/* current version of exportdata struct */
130 
131 struct exportdata {
132 	int		ex_version;	/* structure version */
133 	char		*ex_path;	/* exported path */
134 	size_t		ex_pathlen;	/* path length */
135 	int		ex_flags;	/* flags */
136 	unsigned int	ex_anon;	/* uid for unauthenticated requests */
137 	int		ex_seccnt;	/* count of security modes */
138 	struct secinfo	*ex_secinfo;	/* security mode info */
139 	char		*ex_index;	/* index file for public filesystem */
140 	char		*ex_log_buffer;	/* path to logging buffer file */
141 	size_t		ex_log_bufferlen;	/* buffer file path len */
142 	char		*ex_tag;	/* tag used to identify log config */
143 	size_t		ex_taglen;	/* tag length */
144 };
145 
146 #ifdef _SYSCALL32
147 struct exportdata32 {
148 	int32_t		ex_version;	/* structure version */
149 	caddr32_t	ex_path;	/* exported path */
150 	int32_t		ex_pathlen;	/* path length */
151 	int32_t		ex_flags;	/* flags */
152 	uint32_t	ex_anon;	/* uid for unauthenticated requests */
153 	int32_t		ex_seccnt;	/* count of security modes */
154 	caddr32_t	ex_secinfo;	/* security mode info */
155 	caddr32_t	ex_index;	/* index file for public filesystem */
156 	caddr32_t	ex_log_buffer;	/* path to logging buffer file */
157 	int32_t		ex_log_bufferlen;	/* buffer file path len */
158 	caddr32_t	ex_tag;		/* tag used to identify log config */
159 	int32_t		ex_taglen;	/* tag length */
160 };
161 #endif /* _SYSCALL32 */
162 
163 /*
164  * exported vfs flags.
165  */
166 
167 #define	EX_NOSUID	0x01	/* exported with unsetable set[ug]ids */
168 #define	EX_ACLOK	0x02	/* exported with maximal access if acl exists */
169 #define	EX_PUBLIC	0x04	/* exported with public filehandle */
170 #define	EX_NOSUB	0x08	/* no nfs_getfh or MCL below export point */
171 #define	EX_INDEX	0x10	/* exported with index file specified */
172 #define	EX_LOG		0x20	/* logging enabled */
173 #define	EX_LOG_ALLOPS	0x40	/* logging of all RPC operations enabled */
174 				/* by default only operations which affect */
175 				/* transaction logging are enabled */
176 #define	EX_PSEUDO	0x80	/* pseudo filesystem export */
177 #ifdef VOLATILE_FH_TEST
178 #define	EX_VOLFH	0x100	/* XXX nfsv4 fh may expire anytime */
179 #define	EX_VOLRNM	0x200	/* XXX nfsv4 fh expire at rename */
180 #define	EX_VOLMIG	0x400	/* XXX nfsv4 fh expire at migration */
181 #define	EX_NOEXPOPEN	0x800	/* XXX nfsv4 fh no expire with open */
182 #endif /* VOLATILE_FH_TEST */
183 
184 #define	EX_CHARMAP	0x1000	/* NFS may need a character set conversion */
185 
186 #ifdef	_KERNEL
187 
188 #define	RPC_IDEMPOTENT	0x1	/* idempotent or not */
189 /*
190  * Be very careful about which NFS procedures get the RPC_ALLOWANON bit.
191  * Right now, it this bit is on, we ignore the results of per NFS request
192  * access control.
193  */
194 #define	RPC_ALLOWANON	0x2	/* allow anonymous access */
195 #define	RPC_MAPRESP	0x4	/* use mapped response buffer */
196 #define	RPC_AVOIDWORK	0x8	/* do work avoidance for dups */
197 #define	RPC_PUBLICFH_OK	0x10	/* allow use of public filehandle */
198 
199 /*
200  * RPC_ALL is an or of all above bits to be used with "don't care"
201  * nfsv4 ops. The flags of an nfsv4 request is the bit-AND of the
202  * per-op flags.
203  */
204 #define	RPC_ALL	(RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_AVOIDWORK|RPC_PUBLICFH_OK)
205 
206 
207 #ifdef VOLATILE_FH_TEST
208 struct ex_vol_rename {
209 	nfs_fh4_fmt_t vrn_fh_fmt;
210 	struct ex_vol_rename *vrn_next;
211 };
212 #endif /* VOLATILE_FH_TEST */
213 
214 /*
215  * An authorization cache entry
216  */
217 struct auth_cache {
218 	struct netbuf		auth_addr;
219 	int			auth_flavor;
220 	int			auth_access;
221 	time_t			auth_time;
222 	struct auth_cache	*auth_next;
223 };
224 
225 #define	AUTH_TABLESIZE	32
226 
227 /*
228  * Structure containing log file meta-data.
229  */
230 struct log_file {
231 	unsigned int	lf_flags;	/* flags (see below) */
232 	int		lf_writers;	/* outstanding writers */
233 	int		lf_refcnt;	/* references to this struct */
234 	caddr_t		lf_path;	/* buffer file location */
235 	vnode_t		*lf_vp;		/* vnode for the buffer file */
236 	kmutex_t	lf_lock;
237 	kcondvar_t	lf_cv_waiters;
238 };
239 
240 /*
241  * log_file and log_buffer flags.
242  */
243 #define	L_WAITING	0x01		/* flush of in-core data to stable */
244 					/* storage in progress */
245 #define	L_PRINTED	0x02		/* error message printed to console */
246 #define	L_ERROR		0x04		/* error condition detected */
247 
248 /*
249  * The logging buffer information.
250  * This structure may be shared by multiple exportinfo structures,
251  * if they share the same buffer file.
252  * This structure contains the basic information about the buffer, such
253  * as it's location in the filesystem.
254  *
255  * 'lb_lock' protects all the fields in this structure except for 'lb_path',
256  * and 'lb_next'.
257  * 'lb_path' is a write-once/read-many field which needs no locking, it is
258  * set before the structure is linked to any exportinfo structure.
259  * 'lb_next' is protected by the log_buffer_list_lock.
260  */
261 struct log_buffer {
262 	unsigned int	lb_flags;	/* L_ONLIST set? */
263 	int		lb_refcnt;	/* references to this struct */
264 	unsigned int	lb_rec_id;	/* used to generate unique id */
265 	caddr_t		lb_path;	/* buffer file pathname */
266 	struct log_file	*lb_logfile;	/* points to log_file structure */
267 	kmutex_t	lb_lock;
268 	struct log_buffer	*lb_next;
269 	kcondvar_t	lb_cv_waiters;
270 	caddr_t		lb_records;	/* linked list of records to write */
271 	int		lb_num_recs;	/* # of records to write */
272 	ssize_t		lb_size_queued; /* number of bytes queued for write */
273 };
274 
275 #define	LOG_BUFFER_HOLD(lbp)	{ \
276 	mutex_enter(&(lbp)->lb_lock); \
277 	(lbp)->lb_refcnt++; \
278 	mutex_exit(&(lbp)->lb_lock); \
279 }
280 
281 #define	LOG_BUFFER_RELE(lbp)	{ \
282 	log_buffer_rele(lbp); \
283 }
284 
285 /*
286  * Structure for character set conversion mapping based on client address.
287  */
288 struct charset_cache {
289 	struct charset_cache *next;
290 	kiconv_t	inbound;
291 	kiconv_t	outbound;
292 	struct sockaddr	client_addr;
293 };
294 
295 /* Forward declarations */
296 struct exportinfo;
297 struct exp_visible;
298 
299 /*
300  * Treenodes are used to build tree representing every node which is part
301  * of nfs server pseudo namespace.
302  * This tree is interconnected with both exportinfo and exp_visible struct.
303  * When there is a need to walk the namespace (either starting in
304  * exportinfo or in exp_visible) we first make a step aside (to the left),
305  * walk up or down as needed, and then we step back (to the right).
306  *
307  *
308  *
309  *     NEW DATA STRUCT         ORIGINAL DATA STRUCT
310  *
311  * ns_root +---+               +----------+
312  *         | / |               |PSEUDO EXP|-->+---+   +---+   +---+
313  *         +---+---------  ----+----------+   | a |-->| k |-->| b |
314  *          /\                                +---+   +---+   +---+
315  *         /  \                                .       .       .
316  *     +---+...\.........  .....................       .       .
317  *    *| a |    \              +----------+            .       .
318  *     +---+-----\-------  ----|REAL EXP a|            .       .
319  *       /        \            +----------+            .       .
320  *      /        +===+...  .............................       .
321  *     /        *| k |         +----------+                    .
322  *    /          +===+---  ----|REAL EXP k|                    .
323  *   /                         +----------+                    .
324  *  +===+................  .....................................
325  * *| b |                      +----------+
326  *  +===+----------------  ----|REAL EXP b|-->+---+
327  *     \                       +----------+   | d |
328  *     +===+.............  ...................+---+
329  *     | d |                   +----------+
330  *     +===+-------------  ----|PSEUDO EXP|-->+---+   +---+
331  *     /                       +----------+   | e |-->| g |
332  * +---+.................  ...................+---+   +---+
333  * | e |                                              .
334  * +---+                                              .
335  *    \                                               .
336  *    +---+..............  ............................
337  *   *| g |                    +----------+
338  *    +---+--------------  ----|REAL EXP g|
339  *                             +----------+
340  *
341  *
342  *
343  * +===+               +---+                    +---+
344  * | b |..mountpoint   | e |..directory/file   *| a |..node is shared
345  * +===+  (VROOT)      +---+                    +---+
346  *
347  *
348  * Bi-directional interconnect:
349  *
350  * treenode_t::tree_exi ---------  exportinfo_t::exi_tree
351  * treenode_t::tree_vis ......... exp_visible_t::vis_tree
352  */
353 /* Access to treenodei_t is under under protection of exported_lock RW_LOCK */
354 typedef struct treenode {
355 	/* support for generic n-ary trees */
356 	struct treenode *tree_parent;
357 	struct treenode *tree_child_first;
358 	struct treenode *tree_sibling; /* next sibling */
359 	/* private, nfs specific part */
360 	struct exportinfo  *tree_exi;
361 	struct exp_visible *tree_vis;
362 } treenode_t;
363 
364 /*
365  * TREE_ROOT checks if the node corresponds to a filesystem root
366  * TREE_EXPORTED checks if the node is explicitly shared
367  */
368 
369 #define	TREE_ROOT(t) \
370 	((t)->tree_exi && (t)->tree_exi->exi_vp->v_flag & VROOT)
371 
372 #define	TREE_EXPORTED(t) \
373 	((t)->tree_exi && !PSEUDO((t)->tree_exi))
374 
375 /* Root of nfs pseudo namespace */
376 treenode_t *ns_root;
377 
378 #define	EXPTABLESIZE	16
379 
380 /*
381  * A node associated with an export entry on the
382  * list of exported filesystems.
383  *
384  * exi_count+exi_lock protects an individual exportinfo from being freed
385  * when in use.
386  * You must have the writer lock on exported_lock to add/delete an exportinfo
387  * structure to/from the list.
388  *
389  * exi_volatile_dev maps to VSW_VOLATILEDEV.  It means that the
390  * underlying fs devno can change on each mount.  When set, the server
391  * should not use va_fsid for a GETATTR(FATTR4_FSID) reply.  It must
392  * use exi_fsid because it is guaranteed to be persistent.  This isn't
393  * in any way related to NFS4 volatile filehandles.
394  */
395 struct exportinfo {
396 	struct exportdata	exi_export;
397 	fsid_t			exi_fsid;
398 	struct fid		exi_fid;
399 	struct exportinfo	*exi_hash;
400 	struct treenode		*exi_tree;
401 	fhandle_t		exi_fh;
402 	krwlock_t		exi_cache_lock;
403 	kmutex_t		exi_lock;
404 	uint_t			exi_count;
405 	vnode_t			*exi_vp;
406 	vnode_t			*exi_dvp;
407 	struct auth_cache	*exi_cache[AUTH_TABLESIZE];
408 	struct log_buffer	*exi_logbuffer;
409 	struct exp_visible	*exi_visible;
410 	struct charset_cache	*exi_charset;
411 	unsigned		exi_volatile_dev:1;
412 #ifdef VOLATILE_FH_TEST
413 	uint32_t		exi_volatile_id;
414 	struct ex_vol_rename	*exi_vol_rename;
415 	kmutex_t		exi_vol_rename_lock;
416 #endif /* VOLATILE_FH_TEST */
417 };
418 
419 typedef struct exportinfo exportinfo_t;
420 typedef struct exportdata exportdata_t;
421 typedef struct secinfo secinfo_t;
422 
423 /*
424  * exp_visible is a visible list per filesystem. It is for filesystems
425  * that may need a limited view of its contents. A pseudo export and
426  * a real export at the mount point (VROOT) which has a subtree shared
427  * has a visible list.
428  *
429  * The exi_visible field is NULL for normal, non=pseudo filesystems
430  * which do not have any subtree exported. If the field is non-null,
431  * it points to a list of visible entries, identified by vis_fid and/or
432  * vis_ino. The presence of a "visible" list means that if this export
433  * can only have a limited view, it can only view the entries in the
434  * exp_visible list. The directories in the fid list comprise paths that
435  * lead to exported directories.
436  *
437  * The vis_count field records the number of paths in this filesystem
438  * that use this directory. The vis_exported field is non-zero if the
439  * entry is an exported directory (leaf node).
440  */
441 
442 struct exp_visible {
443 	vnode_t			*vis_vp;
444 	fid_t			vis_fid;
445 	u_longlong_t		vis_ino;
446 	int			vis_count;
447 	int			vis_exported;
448 	struct exp_visible	*vis_next;
449 	struct treenode		*vis_tree;
450 	struct secinfo		*vis_secinfo;
451 	int			vis_seccnt;
452 };
453 typedef struct exp_visible exp_visible_t;
454 
455 #define	PSEUDO(exi)	((exi)->exi_export.ex_flags & EX_PSEUDO)
456 
457 #define	EQFSID(fsidp1, fsidp2)	\
458 	(((fsidp1)->val[0] == (fsidp2)->val[0]) && \
459 	    ((fsidp1)->val[1] == (fsidp2)->val[1]))
460 
461 #define	EQFID(fidp1, fidp2)	\
462 	((fidp1)->fid_len == (fidp2)->fid_len && \
463 	    bcmp((char *)(fidp1)->fid_data, (char *)(fidp2)->fid_data, \
464 	    (uint_t)(fidp1)->fid_len) == 0)
465 
466 #define	exportmatch(exi, fsid, fid)	\
467 	(EQFSID(&(exi)->exi_fsid, (fsid)) && EQFID(&(exi)->exi_fid, (fid)))
468 
469 /*
470  * Returns true iff exported filesystem is read-only to the given host.
471  *
472  * Note:  this macro should be as fast as possible since it's called
473  * on each NFS modification request.
474  */
475 #define	rdonly(exi, req)  (nfsauth_access(exi, req) & NFSAUTH_RO)
476 #define	rdonly4(exi, vp, req)  \
477 	(vn_is_readonly(vp) || \
478 	    (nfsauth4_access(exi, vp, req) & (NFSAUTH_RO | NFSAUTH_LIMITED)))
479 
480 extern int	nfsauth4_access(struct exportinfo *, vnode_t *,
481 				struct svc_req *);
482 extern int	nfsauth4_secinfo_access(struct exportinfo *,
483 				struct svc_req *, int, int);
484 extern int	nfs_fhhash(fsid_t *, fid_t *);
485 extern int	nfs_fhbcmp(char *, char *, int);
486 extern int	nfs_exportinit(void);
487 extern void	nfs_exportfini(void);
488 extern int	chk_clnt_sec(struct exportinfo *, struct svc_req *req);
489 extern int	makefh(fhandle_t *, struct vnode *, struct exportinfo *);
490 extern int	makefh_ol(fhandle_t *, struct exportinfo *, uint_t);
491 extern int	makefh3(nfs_fh3 *, struct vnode *, struct exportinfo *);
492 extern int	makefh3_ol(nfs_fh3 *, struct exportinfo *, uint_t);
493 extern vnode_t *nfs_fhtovp(fhandle_t *, struct exportinfo *);
494 extern vnode_t *nfs3_fhtovp(nfs_fh3 *, struct exportinfo *);
495 extern vnode_t *lm_fhtovp(fhandle_t *fh);
496 extern vnode_t *lm_nfs3_fhtovp(nfs_fh3 *fh);
497 extern struct	exportinfo *checkexport(fsid_t *, struct fid *);
498 extern struct	exportinfo *checkexport4(fsid_t *, struct fid *, vnode_t *vp);
499 extern void	exi_rele(struct exportinfo *);
500 extern struct exportinfo *nfs_vptoexi(vnode_t *, vnode_t *, cred_t *, int *,
501     int *, bool_t);
502 extern int	nfs_check_vpexi(vnode_t *, vnode_t *, cred_t *,
503 			struct exportinfo **);
504 extern void	export_link(struct exportinfo *);
505 extern int	export_unlink(fsid_t *, fid_t *, vnode_t *,
506 			struct exportinfo **);
507 extern vnode_t *untraverse(vnode_t *);
508 
509 /*
510  * Functions that handle the NFSv4 server namespace
511  */
512 extern exportinfo_t *vis2exi(struct exp_visible *);
513 extern int	treeclimb_export(struct exportinfo *);
514 extern void	treeclimb_unexport(struct exportinfo *);
515 extern int	nfs_visible(struct exportinfo *, vnode_t *, int *);
516 extern int	nfs_visible_inode(struct exportinfo *, ino64_t, int *);
517 extern int	has_visible(struct exportinfo *, vnode_t *);
518 extern void	free_visible(struct exp_visible *);
519 extern int	nfs_exported(struct exportinfo *, vnode_t *);
520 extern int	pseudo_exportfs(vnode_t *, struct exp_visible *,
521     struct exportdata *, struct exportinfo **);
522 extern int	vop_fid_pseudo(vnode_t *, fid_t *fidp);
523 extern int	nfs4_vget_pseudo(struct exportinfo *, vnode_t **, fid_t *);
524 /*
525  * Functions that handle the NFSv4 server namespace security flavors
526  * information.
527  */
528 extern void	srv_secinfo_exp2pseu(struct exportdata *, struct exportdata *);
529 extern void	srv_secinfo_list_free(struct secinfo *, int);
530 
531 /*
532  * "public" and default (root) location for public filehandle
533  */
534 extern struct exportinfo *exi_public, *exi_root;
535 extern fhandle_t nullfh2;	/* for comparing V2 filehandles */
536 extern krwlock_t exported_lock;
537 extern struct exportinfo *exptable[];
538 
539 /*
540  * Two macros for identifying public filehandles.
541  * A v2 public filehandle is 32 zero bytes.
542  * A v3 public filehandle is zero length.
543  */
544 #define	PUBLIC_FH2(fh) \
545 	((fh)->fh_fsid.val[1] == 0 && \
546 	bcmp((fh), &nullfh2, sizeof (fhandle_t)) == 0)
547 
548 #define	PUBLIC_FH3(fh) \
549 	((fh)->fh3_length == 0)
550 
551 extern int	makefh4(nfs_fh4 *, struct vnode *, struct exportinfo *);
552 extern vnode_t *nfs4_fhtovp(nfs_fh4 *, struct exportinfo *, nfsstat4 *);
553 
554 #endif /* _KERNEL */
555 
556 #ifdef	__cplusplus
557 }
558 #endif
559 
560 #endif	/* _NFS_EXPORT_H */
561