xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_srv.c (revision 214d537c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
28  *	All Rights Reserved
29  */
30 
31 /*
32  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33  * Copyright 2019 Nexenta Systems, Inc.
34  * Copyright 2019 Nexenta by DDN, Inc.
35  * Copyright 2021 Racktop Systems, Inc.
36  */
37 
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/systm.h>
41 #include <sys/cred.h>
42 #include <sys/buf.h>
43 #include <sys/vfs.h>
44 #include <sys/vfs_opreg.h>
45 #include <sys/vnode.h>
46 #include <sys/uio.h>
47 #include <sys/errno.h>
48 #include <sys/sysmacros.h>
49 #include <sys/statvfs.h>
50 #include <sys/kmem.h>
51 #include <sys/dirent.h>
52 #include <sys/cmn_err.h>
53 #include <sys/debug.h>
54 #include <sys/systeminfo.h>
55 #include <sys/flock.h>
56 #include <sys/pathname.h>
57 #include <sys/nbmlock.h>
58 #include <sys/share.h>
59 #include <sys/atomic.h>
60 #include <sys/policy.h>
61 #include <sys/fem.h>
62 #include <sys/sdt.h>
63 #include <sys/ddi.h>
64 #include <sys/zone.h>
65 
66 #include <fs/fs_reparse.h>
67 
68 #include <rpc/types.h>
69 #include <rpc/auth.h>
70 #include <rpc/rpcsec_gss.h>
71 #include <rpc/svc.h>
72 
73 #include <nfs/nfs.h>
74 #include <nfs/nfssys.h>
75 #include <nfs/export.h>
76 #include <nfs/nfs_cmd.h>
77 #include <nfs/lm.h>
78 #include <nfs/nfs4.h>
79 #include <nfs/nfs4_drc.h>
80 
81 #include <sys/strsubr.h>
82 #include <sys/strsun.h>
83 
84 #include <inet/common.h>
85 #include <inet/ip.h>
86 #include <inet/ip6.h>
87 
88 #include <sys/tsol/label.h>
89 #include <sys/tsol/tndb.h>
90 
91 #define	RFS4_MAXLOCK_TRIES 4	/* Try to get the lock this many times */
92 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
93 #define	RFS4_LOCK_DELAY 10	/* Milliseconds */
94 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
95 extern struct svc_ops rdma_svc_ops;
96 extern int nfs_loaned_buffers;
97 /* End of Tunables */
98 
99 static int rdma_setup_read_data4(READ4args *, READ4res *);
100 
101 /*
102  * Used to bump the stateid4.seqid value and show changes in the stateid
103  */
104 #define	next_stateid(sp) (++(sp)->bits.chgseq)
105 
106 /*
107  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
108  *	This is used to return NFS4ERR_TOOSMALL when clients specify
109  *	maxcount that isn't large enough to hold the smallest possible
110  *	XDR encoded dirent.
111  *
112  *	    sizeof cookie (8 bytes) +
113  *	    sizeof name_len (4 bytes) +
114  *	    sizeof smallest (padded) name (4 bytes) +
115  *	    sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
116  *	    sizeof attrlist4_len (4 bytes) +
117  *	    sizeof next boolean (4 bytes)
118  *
119  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
120  * the smallest possible entry4 (assumes no attrs requested).
121  *	sizeof nfsstat4 (4 bytes) +
122  *	sizeof verifier4 (8 bytes) +
123  *	sizeof entry4list bool (4 bytes) +
124  *	sizeof entry4 (36 bytes) +
125  *	sizeof eof bool (4 bytes)
126  *
127  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
128  *	VOP_READDIR.  Its value is the size of the maximum possible dirent
129  *	for solaris.  The DIRENT64_RECLEN macro returns	the size of dirent
130  *	required for a given name length.  MAXNAMELEN is the maximum
131  *	filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
132  *	macros are to allow for . and .. entries -- just a minor tweak to try
133  *	and guarantee that buffer we give to VOP_READDIR will be large enough
134  *	to hold ., .., and the largest possible solaris dirent64.
135  */
136 #define	RFS4_MINLEN_ENTRY4 36
137 #define	RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
138 #define	RFS4_MINLEN_RDDIR_BUF \
139 	(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
140 
141 /*
142  * It would be better to pad to 4 bytes since that's what XDR would do,
143  * but the dirents UFS gives us are already padded to 8, so just take
144  * what we're given.  Dircount is only a hint anyway.  Currently the
145  * solaris kernel is ASCII only, so there's no point in calling the
146  * UTF8 functions.
147  *
148  * dirent64: named padded to provide 8 byte struct alignment
149  *	d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
150  *
151  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
152  *
153  */
154 #define	DIRENT64_TO_DIRCOUNT(dp) \
155 	(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
156 
157 
158 static sysid_t		lockt_sysid;	/* dummy sysid for all LOCKT calls */
159 
160 u_longlong_t	nfs4_srv_caller_id;
161 uint_t		nfs4_srv_vkey = 0;
162 
163 void	rfs4_init_compound_state(struct compound_state *);
164 
165 static void	nullfree(caddr_t);
166 static void	rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 		    struct compound_state *);
168 static void	rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 		    struct compound_state *);
170 static void	rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
171 		    struct compound_state *);
172 static void	rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
173 		    struct compound_state *);
174 static void	rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
175 		    struct compound_state *);
176 static void	rfs4_op_create_free(nfs_resop4 *resop);
177 static void	rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
178 		    struct svc_req *, struct compound_state *);
179 static void	rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
180 		    struct svc_req *, struct compound_state *);
181 static void	rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
182 		    struct compound_state *);
183 static void	rfs4_op_getattr_free(nfs_resop4 *);
184 static void	rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
185 		    struct compound_state *);
186 static void	rfs4_op_getfh_free(nfs_resop4 *);
187 static void	rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
188 		    struct compound_state *);
189 static void	rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
190 		    struct compound_state *);
191 static void	rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
192 		    struct compound_state *);
193 static void	lock_denied_free(nfs_resop4 *);
194 static void	rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 		    struct compound_state *);
196 static void	rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
197 		    struct compound_state *);
198 static void	rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
199 		    struct compound_state *);
200 static void	rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
201 		    struct compound_state *);
202 static void	rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
203 		    struct svc_req *req, struct compound_state *cs);
204 static void	rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
205 		    struct compound_state *);
206 static void	rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
207 		    struct compound_state *);
208 static void	rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
209 		    struct svc_req *, struct compound_state *);
210 static void	rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
211 		    struct svc_req *, struct compound_state *);
212 static void	rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
213 		    struct compound_state *);
214 static void	rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
215 		    struct compound_state *);
216 static void	rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
217 		    struct compound_state *);
218 static void	rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
219 		    struct compound_state *);
220 static void	rfs4_op_read_free(nfs_resop4 *);
221 static void	rfs4_op_readdir_free(nfs_resop4 *resop);
222 static void	rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
223 		    struct compound_state *);
224 static void	rfs4_op_readlink_free(nfs_resop4 *);
225 static void	rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
226 		    struct svc_req *, struct compound_state *);
227 static void	rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
228 		    struct compound_state *);
229 static void	rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
230 		    struct compound_state *);
231 static void	rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
232 		    struct compound_state *);
233 static void	rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
234 		    struct compound_state *);
235 static void	rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
236 		    struct compound_state *);
237 static void	rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
238 		    struct compound_state *);
239 static void	rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
240 		    struct compound_state *);
241 static void	rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
242 		    struct compound_state *);
243 static void	rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
244 		    struct svc_req *, struct compound_state *);
245 static void	rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
246 		    struct svc_req *req, struct compound_state *);
247 static void	rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
248 		    struct compound_state *);
249 static void	rfs4_op_secinfo_free(nfs_resop4 *);
250 
251 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
252 		    struct svc_req *);
253 nfsstat4	rfs4_client_sysid(rfs4_client_t *, sysid_t *);
254 void		rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
255 
256 
257 /*
258  * translation table for attrs
259  */
260 struct nfs4_ntov_table {
261 	union nfs4_attr_u *na;
262 	uint8_t amap[NFS4_MAXNUM_ATTRS];
263 	int attrcnt;
264 	bool_t vfsstat;
265 };
266 
267 static void	nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
268 static void	nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
269 		    struct nfs4_svgetit_arg *sargp);
270 
271 static nfsstat4	do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
272 		    struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
273 		    struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
274 
275 static void	hanfsv4_failover(nfs4_srv_t *);
276 
277 fem_t		*deleg_rdops;
278 fem_t		*deleg_wrops;
279 
280 /*
281  * NFS4 op dispatch table
282  */
283 
284 struct rfsv4disp {
285 	void	(*dis_proc)();		/* proc to call */
286 	void	(*dis_resfree)();	/* frees space allocated by proc */
287 	int	dis_flags;		/* RPC_IDEMPOTENT, etc... */
288 };
289 
290 static struct rfsv4disp rfsv4disptab[] = {
291 	/*
292 	 * NFS VERSION 4
293 	 */
294 
295 	/* RFS_NULL = 0 */
296 	{rfs4_op_illegal, nullfree, 0},
297 
298 	/* UNUSED = 1 */
299 	{rfs4_op_illegal, nullfree, 0},
300 
301 	/* UNUSED = 2 */
302 	{rfs4_op_illegal, nullfree, 0},
303 
304 	/* OP_ACCESS = 3 */
305 	{rfs4_op_access, nullfree, RPC_IDEMPOTENT},
306 
307 	/* OP_CLOSE = 4 */
308 	{rfs4_op_close, nullfree, 0},
309 
310 	/* OP_COMMIT = 5 */
311 	{rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
312 
313 	/* OP_CREATE = 6 */
314 	{rfs4_op_create, nullfree, 0},
315 
316 	/* OP_DELEGPURGE = 7 */
317 	{rfs4_op_delegpurge, nullfree, 0},
318 
319 	/* OP_DELEGRETURN = 8 */
320 	{rfs4_op_delegreturn, nullfree, 0},
321 
322 	/* OP_GETATTR = 9 */
323 	{rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
324 
325 	/* OP_GETFH = 10 */
326 	{rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
327 
328 	/* OP_LINK = 11 */
329 	{rfs4_op_link, nullfree, 0},
330 
331 	/* OP_LOCK = 12 */
332 	{rfs4_op_lock, lock_denied_free, 0},
333 
334 	/* OP_LOCKT = 13 */
335 	{rfs4_op_lockt, lock_denied_free, 0},
336 
337 	/* OP_LOCKU = 14 */
338 	{rfs4_op_locku, nullfree, 0},
339 
340 	/* OP_LOOKUP = 15 */
341 	{rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
342 
343 	/* OP_LOOKUPP = 16 */
344 	{rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
345 
346 	/* OP_NVERIFY = 17 */
347 	{rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
348 
349 	/* OP_OPEN = 18 */
350 	{rfs4_op_open, rfs4_free_reply, 0},
351 
352 	/* OP_OPENATTR = 19 */
353 	{rfs4_op_openattr, nullfree, 0},
354 
355 	/* OP_OPEN_CONFIRM = 20 */
356 	{rfs4_op_open_confirm, nullfree, 0},
357 
358 	/* OP_OPEN_DOWNGRADE = 21 */
359 	{rfs4_op_open_downgrade, nullfree, 0},
360 
361 	/* OP_OPEN_PUTFH = 22 */
362 	{rfs4_op_putfh, nullfree, RPC_ALL},
363 
364 	/* OP_PUTPUBFH = 23 */
365 	{rfs4_op_putpubfh, nullfree, RPC_ALL},
366 
367 	/* OP_PUTROOTFH = 24 */
368 	{rfs4_op_putrootfh, nullfree, RPC_ALL},
369 
370 	/* OP_READ = 25 */
371 	{rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
372 
373 	/* OP_READDIR = 26 */
374 	{rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
375 
376 	/* OP_READLINK = 27 */
377 	{rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
378 
379 	/* OP_REMOVE = 28 */
380 	{rfs4_op_remove, nullfree, 0},
381 
382 	/* OP_RENAME = 29 */
383 	{rfs4_op_rename, nullfree, 0},
384 
385 	/* OP_RENEW = 30 */
386 	{rfs4_op_renew, nullfree, 0},
387 
388 	/* OP_RESTOREFH = 31 */
389 	{rfs4_op_restorefh, nullfree, RPC_ALL},
390 
391 	/* OP_SAVEFH = 32 */
392 	{rfs4_op_savefh, nullfree, RPC_ALL},
393 
394 	/* OP_SECINFO = 33 */
395 	{rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
396 
397 	/* OP_SETATTR = 34 */
398 	{rfs4_op_setattr, nullfree, 0},
399 
400 	/* OP_SETCLIENTID = 35 */
401 	{rfs4_op_setclientid, nullfree, 0},
402 
403 	/* OP_SETCLIENTID_CONFIRM = 36 */
404 	{rfs4_op_setclientid_confirm, nullfree, 0},
405 
406 	/* OP_VERIFY = 37 */
407 	{rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
408 
409 	/* OP_WRITE = 38 */
410 	{rfs4_op_write, nullfree, 0},
411 
412 	/* OP_RELEASE_LOCKOWNER = 39 */
413 	{rfs4_op_release_lockowner, nullfree, 0},
414 };
415 
416 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
417 
418 #define	OP_ILLEGAL_IDX (rfsv4disp_cnt)
419 
420 #ifdef DEBUG
421 
422 int		rfs4_fillone_debug = 0;
423 int		rfs4_no_stub_access = 1;
424 int		rfs4_rddir_debug = 0;
425 
426 static char    *rfs4_op_string[] = {
427 	"rfs4_op_null",
428 	"rfs4_op_1 unused",
429 	"rfs4_op_2 unused",
430 	"rfs4_op_access",
431 	"rfs4_op_close",
432 	"rfs4_op_commit",
433 	"rfs4_op_create",
434 	"rfs4_op_delegpurge",
435 	"rfs4_op_delegreturn",
436 	"rfs4_op_getattr",
437 	"rfs4_op_getfh",
438 	"rfs4_op_link",
439 	"rfs4_op_lock",
440 	"rfs4_op_lockt",
441 	"rfs4_op_locku",
442 	"rfs4_op_lookup",
443 	"rfs4_op_lookupp",
444 	"rfs4_op_nverify",
445 	"rfs4_op_open",
446 	"rfs4_op_openattr",
447 	"rfs4_op_open_confirm",
448 	"rfs4_op_open_downgrade",
449 	"rfs4_op_putfh",
450 	"rfs4_op_putpubfh",
451 	"rfs4_op_putrootfh",
452 	"rfs4_op_read",
453 	"rfs4_op_readdir",
454 	"rfs4_op_readlink",
455 	"rfs4_op_remove",
456 	"rfs4_op_rename",
457 	"rfs4_op_renew",
458 	"rfs4_op_restorefh",
459 	"rfs4_op_savefh",
460 	"rfs4_op_secinfo",
461 	"rfs4_op_setattr",
462 	"rfs4_op_setclientid",
463 	"rfs4_op_setclient_confirm",
464 	"rfs4_op_verify",
465 	"rfs4_op_write",
466 	"rfs4_op_release_lockowner",
467 	"rfs4_op_illegal"
468 };
469 #endif
470 
471 void	rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
472 
473 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
474 
475 extern void	rfs4_free_fs_locations4(fs_locations4 *);
476 
477 #ifdef	nextdp
478 #undef nextdp
479 #endif
480 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
481 
482 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
483 	VOPNAME_OPEN,		{ .femop_open = deleg_rd_open },
484 	VOPNAME_WRITE,		{ .femop_write = deleg_rd_write },
485 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_rd_setattr },
486 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_rd_rwlock },
487 	VOPNAME_SPACE,		{ .femop_space = deleg_rd_space },
488 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_rd_setsecattr },
489 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_rd_vnevent },
490 	NULL,			NULL
491 };
492 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
493 	VOPNAME_OPEN,		{ .femop_open = deleg_wr_open },
494 	VOPNAME_READ,		{ .femop_read = deleg_wr_read },
495 	VOPNAME_WRITE,		{ .femop_write = deleg_wr_write },
496 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_wr_setattr },
497 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_wr_rwlock },
498 	VOPNAME_SPACE,		{ .femop_space = deleg_wr_space },
499 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_wr_setsecattr },
500 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_wr_vnevent },
501 	NULL,			NULL
502 };
503 
504 nfs4_srv_t *
nfs4_get_srv(void)505 nfs4_get_srv(void)
506 {
507 	nfs_globals_t *ng = nfs_srv_getzg();
508 	nfs4_srv_t *srv = ng->nfs4_srv;
509 	ASSERT(srv != NULL);
510 	return (srv);
511 }
512 
513 void
rfs4_srv_zone_init(nfs_globals_t * ng)514 rfs4_srv_zone_init(nfs_globals_t *ng)
515 {
516 	nfs4_srv_t *nsrv4;
517 	timespec32_t verf;
518 
519 	nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
520 
521 	/*
522 	 * The following algorithm attempts to find a unique verifier
523 	 * to be used as the write verifier returned from the server
524 	 * to the client.  It is important that this verifier change
525 	 * whenever the server reboots.  Of secondary importance, it
526 	 * is important for the verifier to be unique between two
527 	 * different servers.
528 	 *
529 	 * Thus, an attempt is made to use the system hostid and the
530 	 * current time in seconds when the nfssrv kernel module is
531 	 * loaded.  It is assumed that an NFS server will not be able
532 	 * to boot and then to reboot in less than a second.  If the
533 	 * hostid has not been set, then the current high resolution
534 	 * time is used.  This will ensure different verifiers each
535 	 * time the server reboots and minimize the chances that two
536 	 * different servers will have the same verifier.
537 	 * XXX - this is broken on LP64 kernels.
538 	 */
539 	verf.tv_sec = (time_t)zone_get_hostid(NULL);
540 	if (verf.tv_sec != 0) {
541 		verf.tv_nsec = gethrestime_sec();
542 	} else {
543 		timespec_t tverf;
544 
545 		gethrestime(&tverf);
546 		verf.tv_sec = (time_t)tverf.tv_sec;
547 		verf.tv_nsec = tverf.tv_nsec;
548 	}
549 	nsrv4->write4verf = *(uint64_t *)&verf;
550 
551 	/* Used to manage create/destroy of server state */
552 	nsrv4->nfs4_server_state = NULL;
553 	nsrv4->nfs4_cur_servinst = NULL;
554 	nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
555 	mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
556 	mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
557 	mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
558 	rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
559 
560 	ng->nfs4_srv = nsrv4;
561 }
562 
563 void
rfs4_srv_zone_fini(nfs_globals_t * ng)564 rfs4_srv_zone_fini(nfs_globals_t *ng)
565 {
566 	nfs4_srv_t *nsrv4 = ng->nfs4_srv;
567 
568 	ng->nfs4_srv = NULL;
569 
570 	mutex_destroy(&nsrv4->deleg_lock);
571 	mutex_destroy(&nsrv4->state_lock);
572 	mutex_destroy(&nsrv4->servinst_lock);
573 	rw_destroy(&nsrv4->deleg_policy_lock);
574 
575 	kmem_free(nsrv4, sizeof (*nsrv4));
576 }
577 
578 void
rfs4_srvrinit(void)579 rfs4_srvrinit(void)
580 {
581 	extern void rfs4_attr_init();
582 
583 	rfs4_attr_init();
584 
585 	if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
586 		rfs4_disable_delegation();
587 	} else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
588 	    &deleg_wrops) != 0) {
589 		rfs4_disable_delegation();
590 		fem_free(deleg_rdops);
591 	}
592 
593 	nfs4_srv_caller_id = fs_new_caller_id();
594 	lockt_sysid = lm_alloc_sysidt();
595 	vsd_create(&nfs4_srv_vkey, NULL);
596 	rfs4_state_g_init();
597 }
598 
599 void
rfs4_srvrfini(void)600 rfs4_srvrfini(void)
601 {
602 	if (lockt_sysid != LM_NOSYSID) {
603 		lm_free_sysidt(lockt_sysid);
604 		lockt_sysid = LM_NOSYSID;
605 	}
606 
607 	rfs4_state_g_fini();
608 
609 	fem_free(deleg_rdops);
610 	fem_free(deleg_wrops);
611 }
612 
613 void
rfs4_do_server_start(int server_upordown,int srv_delegation,int cluster_booted)614 rfs4_do_server_start(int server_upordown,
615     int srv_delegation, int cluster_booted)
616 {
617 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
618 
619 	/* Is this a warm start? */
620 	if (server_upordown == NFS_SERVER_QUIESCED) {
621 		cmn_err(CE_NOTE, "nfs4_srv: "
622 		    "server was previously quiesced; "
623 		    "existing NFSv4 state will be re-used");
624 
625 		/*
626 		 * HA-NFSv4: this is also the signal
627 		 * that a Resource Group failover has
628 		 * occurred.
629 		 */
630 		if (cluster_booted)
631 			hanfsv4_failover(nsrv4);
632 	} else {
633 		/* Cold start */
634 		nsrv4->rfs4_start_time = 0;
635 		rfs4_state_zone_init(nsrv4);
636 		nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
637 		    nfs4_drc_hash);
638 
639 		/*
640 		 * The nfsd service was started with the -s option
641 		 * we need to pull in any state from the paths indicated.
642 		 */
643 		if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
644 			/* read in the stable storage state from these paths */
645 			rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
646 			    rfs4_dss_newpaths);
647 		}
648 	}
649 
650 	/* Check if delegation is to be enabled */
651 	if (srv_delegation != FALSE)
652 		rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
653 }
654 
655 void
rfs4_init_compound_state(struct compound_state * cs)656 rfs4_init_compound_state(struct compound_state *cs)
657 {
658 	bzero(cs, sizeof (*cs));
659 	cs->cont = TRUE;
660 	cs->access = CS_ACCESS_DENIED;
661 	cs->deleg = FALSE;
662 	cs->mandlock = FALSE;
663 	cs->fh.nfs_fh4_val = cs->fhbuf;
664 }
665 
666 void
rfs4_grace_start(rfs4_servinst_t * sip)667 rfs4_grace_start(rfs4_servinst_t *sip)
668 {
669 	rw_enter(&sip->rwlock, RW_WRITER);
670 	sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
671 	sip->grace_period = rfs4_grace_period;
672 	rw_exit(&sip->rwlock);
673 }
674 
675 /*
676  * returns true if the instance's grace period has never been started
677  */
678 int
rfs4_servinst_grace_new(rfs4_servinst_t * sip)679 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
680 {
681 	time_t start_time;
682 
683 	rw_enter(&sip->rwlock, RW_READER);
684 	start_time = sip->start_time;
685 	rw_exit(&sip->rwlock);
686 
687 	return (start_time == 0);
688 }
689 
690 /*
691  * Indicates if server instance is within the
692  * grace period.
693  */
694 int
rfs4_servinst_in_grace(rfs4_servinst_t * sip)695 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
696 {
697 	time_t grace_expiry;
698 
699 	rw_enter(&sip->rwlock, RW_READER);
700 	grace_expiry = sip->start_time + sip->grace_period;
701 	rw_exit(&sip->rwlock);
702 
703 	return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
704 }
705 
706 int
rfs4_clnt_in_grace(rfs4_client_t * cp)707 rfs4_clnt_in_grace(rfs4_client_t *cp)
708 {
709 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
710 
711 	return (rfs4_servinst_in_grace(cp->rc_server_instance));
712 }
713 
714 /*
715  * reset all currently active grace periods
716  */
717 void
rfs4_grace_reset_all(nfs4_srv_t * nsrv4)718 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
719 {
720 	rfs4_servinst_t *sip;
721 
722 	mutex_enter(&nsrv4->servinst_lock);
723 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
724 		if (rfs4_servinst_in_grace(sip))
725 			rfs4_grace_start(sip);
726 	mutex_exit(&nsrv4->servinst_lock);
727 }
728 
729 /*
730  * start any new instances' grace periods
731  */
732 void
rfs4_grace_start_new(nfs4_srv_t * nsrv4)733 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
734 {
735 	rfs4_servinst_t *sip;
736 
737 	mutex_enter(&nsrv4->servinst_lock);
738 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
739 		if (rfs4_servinst_grace_new(sip))
740 			rfs4_grace_start(sip);
741 	mutex_exit(&nsrv4->servinst_lock);
742 }
743 
744 static rfs4_dss_path_t *
rfs4_dss_newpath(nfs4_srv_t * nsrv4,rfs4_servinst_t * sip,char * path,unsigned index)745 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
746     char *path, unsigned index)
747 {
748 	size_t len;
749 	rfs4_dss_path_t *dss_path;
750 
751 	dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
752 
753 	/*
754 	 * Take a copy of the string, since the original may be overwritten.
755 	 * Sadly, no strdup() in the kernel.
756 	 */
757 	/* allow for NUL */
758 	len = strlen(path) + 1;
759 	dss_path->path = kmem_alloc(len, KM_SLEEP);
760 	(void) strlcpy(dss_path->path, path, len);
761 
762 	/* associate with servinst */
763 	dss_path->sip = sip;
764 	dss_path->index = index;
765 
766 	/*
767 	 * Add to list of served paths.
768 	 * No locking required, as we're only ever called at startup.
769 	 */
770 	if (nsrv4->dss_pathlist == NULL) {
771 		/* this is the first dss_path_t */
772 
773 		/* needed for insque/remque */
774 		dss_path->next = dss_path->prev = dss_path;
775 
776 		nsrv4->dss_pathlist = dss_path;
777 	} else {
778 		insque(dss_path, nsrv4->dss_pathlist);
779 	}
780 
781 	return (dss_path);
782 }
783 
784 /*
785  * Create a new server instance, and make it the currently active instance.
786  * Note that starting the grace period too early will reduce the clients'
787  * recovery window.
788  */
789 void
rfs4_servinst_create(nfs4_srv_t * nsrv4,int start_grace,int dss_npaths,char ** dss_paths)790 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
791     int dss_npaths, char **dss_paths)
792 {
793 	unsigned i;
794 	rfs4_servinst_t *sip;
795 	rfs4_oldstate_t *oldstate;
796 
797 	sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
798 	rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
799 
800 	sip->start_time = (time_t)0;
801 	sip->grace_period = (time_t)0;
802 	sip->next = NULL;
803 	sip->prev = NULL;
804 
805 	rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
806 	/*
807 	 * This initial dummy entry is required to setup for insque/remque.
808 	 * It must be skipped over whenever the list is traversed.
809 	 */
810 	oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
811 	/* insque/remque require initial list entry to be self-terminated */
812 	oldstate->next = oldstate;
813 	oldstate->prev = oldstate;
814 	sip->oldstate = oldstate;
815 
816 
817 	sip->dss_npaths = dss_npaths;
818 	sip->dss_paths = kmem_alloc(dss_npaths *
819 	    sizeof (rfs4_dss_path_t *), KM_SLEEP);
820 
821 	for (i = 0; i < dss_npaths; i++) {
822 		sip->dss_paths[i] =
823 		    rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
824 	}
825 
826 	mutex_enter(&nsrv4->servinst_lock);
827 	if (nsrv4->nfs4_cur_servinst != NULL) {
828 		/* add to linked list */
829 		sip->prev = nsrv4->nfs4_cur_servinst;
830 		nsrv4->nfs4_cur_servinst->next = sip;
831 	}
832 	if (start_grace)
833 		rfs4_grace_start(sip);
834 	/* make the new instance "current" */
835 	nsrv4->nfs4_cur_servinst = sip;
836 
837 	mutex_exit(&nsrv4->servinst_lock);
838 }
839 
840 /*
841  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
842  * all instances directly.
843  */
844 void
rfs4_servinst_destroy_all(nfs4_srv_t * nsrv4)845 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
846 {
847 	rfs4_servinst_t *sip, *prev, *current;
848 #ifdef DEBUG
849 	int n = 0;
850 #endif
851 
852 	mutex_enter(&nsrv4->servinst_lock);
853 	ASSERT(nsrv4->nfs4_cur_servinst != NULL);
854 	current = nsrv4->nfs4_cur_servinst;
855 	nsrv4->nfs4_cur_servinst = NULL;
856 	for (sip = current; sip != NULL; sip = prev) {
857 		prev = sip->prev;
858 		rw_destroy(&sip->rwlock);
859 		if (sip->oldstate)
860 			kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
861 		if (sip->dss_paths) {
862 			int i = sip->dss_npaths;
863 
864 			while (i > 0) {
865 				i--;
866 				if (sip->dss_paths[i] != NULL) {
867 					char *path = sip->dss_paths[i]->path;
868 
869 					if (path != NULL) {
870 						kmem_free(path,
871 						    strlen(path) + 1);
872 					}
873 					kmem_free(sip->dss_paths[i],
874 					    sizeof (rfs4_dss_path_t));
875 				}
876 			}
877 			kmem_free(sip->dss_paths,
878 			    sip->dss_npaths * sizeof (rfs4_dss_path_t *));
879 		}
880 		kmem_free(sip, sizeof (rfs4_servinst_t));
881 #ifdef DEBUG
882 		n++;
883 #endif
884 	}
885 	mutex_exit(&nsrv4->servinst_lock);
886 }
887 
888 /*
889  * Assign the current server instance to a client_t.
890  * Should be called with cp->rc_dbe held.
891  */
892 void
rfs4_servinst_assign(nfs4_srv_t * nsrv4,rfs4_client_t * cp,rfs4_servinst_t * sip)893 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
894     rfs4_servinst_t *sip)
895 {
896 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
897 
898 	/*
899 	 * The lock ensures that if the current instance is in the process
900 	 * of changing, we will see the new one.
901 	 */
902 	mutex_enter(&nsrv4->servinst_lock);
903 	cp->rc_server_instance = sip;
904 	mutex_exit(&nsrv4->servinst_lock);
905 }
906 
907 rfs4_servinst_t *
rfs4_servinst(rfs4_client_t * cp)908 rfs4_servinst(rfs4_client_t *cp)
909 {
910 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
911 
912 	return (cp->rc_server_instance);
913 }
914 
915 /* ARGSUSED */
916 static void
nullfree(caddr_t resop)917 nullfree(caddr_t resop)
918 {
919 }
920 
921 /*
922  * This is a fall-through for invalid or not implemented (yet) ops
923  */
924 /* ARGSUSED */
925 static void
rfs4_op_inval(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)926 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
927     struct compound_state *cs)
928 {
929 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
930 }
931 
932 /*
933  * Check if the security flavor, nfsnum, is in the flavor_list.
934  */
935 bool_t
in_flavor_list(int nfsnum,int * flavor_list,int count)936 in_flavor_list(int nfsnum, int *flavor_list, int count)
937 {
938 	int i;
939 
940 	for (i = 0; i < count; i++) {
941 		if (nfsnum == flavor_list[i])
942 			return (TRUE);
943 	}
944 	return (FALSE);
945 }
946 
947 /*
948  * Used by rfs4_op_secinfo to get the security information from the
949  * export structure associated with the component.
950  */
951 /* ARGSUSED */
952 static nfsstat4
do_rfs4_op_secinfo(struct compound_state * cs,char * nm,SECINFO4res * resp)953 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
954 {
955 	int error, different_export = 0;
956 	vnode_t *dvp, *vp;
957 	struct exportinfo *exi;
958 	fid_t fid;
959 	uint_t count, i;
960 	secinfo4 *resok_val;
961 	struct secinfo *secp;
962 	seconfig_t *si;
963 	bool_t did_traverse = FALSE;
964 	int dotdot, walk;
965 	nfs_export_t *ne = nfs_get_export();
966 
967 	dvp = cs->vp;
968 	exi = cs->exi;
969 	ASSERT(exi != NULL);
970 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
971 
972 	/*
973 	 * If dotdotting, then need to check whether it's above the
974 	 * root of a filesystem, or above an export point.
975 	 */
976 	if (dotdot) {
977 		vnode_t *zone_rootvp = ne->exi_root->exi_vp;
978 
979 		ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid);
980 		/*
981 		 * If dotdotting at the root of a filesystem, then
982 		 * need to traverse back to the mounted-on filesystem
983 		 * and do the dotdot lookup there.
984 		 */
985 		if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) {
986 
987 			/*
988 			 * If at the system root, then can
989 			 * go up no further.
990 			 */
991 			if (VN_CMP(dvp, zone_rootvp))
992 				return (puterrno4(ENOENT));
993 
994 			/*
995 			 * Traverse back to the mounted-on filesystem
996 			 */
997 			dvp = untraverse(dvp, zone_rootvp);
998 
999 			/*
1000 			 * Set the different_export flag so we remember
1001 			 * to pick up a new exportinfo entry for
1002 			 * this new filesystem.
1003 			 */
1004 			different_export = 1;
1005 		} else {
1006 
1007 			/*
1008 			 * If dotdotting above an export point then set
1009 			 * the different_export to get new export info.
1010 			 */
1011 			different_export = nfs_exported(exi, dvp);
1012 		}
1013 	}
1014 
1015 	/*
1016 	 * Get the vnode for the component "nm".
1017 	 */
1018 	error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1019 	    NULL, NULL, NULL);
1020 	if (error)
1021 		return (puterrno4(error));
1022 
1023 	/*
1024 	 * If the vnode is in a pseudo filesystem, or if the security flavor
1025 	 * used in the request is valid but not an explicitly shared flavor,
1026 	 * or the access bit indicates that this is a limited access,
1027 	 * check whether this vnode is visible.
1028 	 */
1029 	if (!different_export &&
1030 	    (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1031 	    cs->access & CS_ACCESS_LIMITED)) {
1032 		if (! nfs_visible(exi, vp, &different_export)) {
1033 			VN_RELE(vp);
1034 			return (puterrno4(ENOENT));
1035 		}
1036 	}
1037 
1038 	/*
1039 	 * If it's a mountpoint, then traverse it.
1040 	 */
1041 	if (vn_ismntpt(vp)) {
1042 		if ((error = traverse(&vp)) != 0) {
1043 			VN_RELE(vp);
1044 			return (puterrno4(error));
1045 		}
1046 		/* remember that we had to traverse mountpoint */
1047 		did_traverse = TRUE;
1048 		different_export = 1;
1049 	} else if (vp->v_vfsp != dvp->v_vfsp) {
1050 		/*
1051 		 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1052 		 * then vp is probably an LOFS object.  We don't need the
1053 		 * realvp, we just need to know that we might have crossed
1054 		 * a server fs boundary and need to call checkexport4.
1055 		 * (LOFS lookup hides server fs mountpoints, and actually calls
1056 		 * traverse)
1057 		 */
1058 		different_export = 1;
1059 	}
1060 
1061 	/*
1062 	 * Get the export information for it.
1063 	 */
1064 	if (different_export) {
1065 
1066 		bzero(&fid, sizeof (fid));
1067 		fid.fid_len = MAXFIDSZ;
1068 		error = vop_fid_pseudo(vp, &fid);
1069 		if (error) {
1070 			VN_RELE(vp);
1071 			return (puterrno4(error));
1072 		}
1073 
1074 		/* We'll need to reassign "exi". */
1075 		if (dotdot)
1076 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1077 		else
1078 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1079 
1080 		if (exi == NULL) {
1081 			if (did_traverse == TRUE) {
1082 				/*
1083 				 * If this vnode is a mounted-on vnode,
1084 				 * but the mounted-on file system is not
1085 				 * exported, send back the secinfo for
1086 				 * the exported node that the mounted-on
1087 				 * vnode lives in.
1088 				 */
1089 				exi = cs->exi;
1090 			} else {
1091 				VN_RELE(vp);
1092 				return (puterrno4(EACCES));
1093 			}
1094 		}
1095 	}
1096 	ASSERT(exi != NULL);
1097 
1098 
1099 	/*
1100 	 * Create the secinfo result based on the security information
1101 	 * from the exportinfo structure (exi).
1102 	 *
1103 	 * Return all flavors for a pseudo node.
1104 	 * For a real export node, return the flavor that the client
1105 	 * has access with.
1106 	 */
1107 	ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1108 	if (PSEUDO(exi)) {
1109 		count = exi->exi_export.ex_seccnt; /* total sec count */
1110 		resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1111 		secp = exi->exi_export.ex_secinfo;
1112 
1113 		for (i = 0; i < count; i++) {
1114 			si = &secp[i].s_secinfo;
1115 			resok_val[i].flavor = si->sc_rpcnum;
1116 			if (resok_val[i].flavor == RPCSEC_GSS) {
1117 				rpcsec_gss_info *info;
1118 
1119 				info = &resok_val[i].flavor_info;
1120 				info->qop = si->sc_qop;
1121 				info->service = (rpc_gss_svc_t)si->sc_service;
1122 
1123 				/* get oid opaque data */
1124 				info->oid.sec_oid4_len =
1125 				    si->sc_gss_mech_type->length;
1126 				info->oid.sec_oid4_val = kmem_alloc(
1127 				    si->sc_gss_mech_type->length, KM_SLEEP);
1128 				bcopy(
1129 				    si->sc_gss_mech_type->elements,
1130 				    info->oid.sec_oid4_val,
1131 				    info->oid.sec_oid4_len);
1132 			}
1133 		}
1134 		resp->SECINFO4resok_len = count;
1135 		resp->SECINFO4resok_val = resok_val;
1136 	} else {
1137 		int ret_cnt = 0, k = 0;
1138 		int *flavor_list;
1139 
1140 		count = exi->exi_export.ex_seccnt; /* total sec count */
1141 		secp = exi->exi_export.ex_secinfo;
1142 
1143 		flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1144 		/* find out which flavors to return */
1145 		for (i = 0; i < count; i ++) {
1146 			int access, flavor, perm;
1147 
1148 			flavor = secp[i].s_secinfo.sc_nfsnum;
1149 			perm = secp[i].s_flags;
1150 
1151 			access = nfsauth4_secinfo_access(exi, cs->req,
1152 			    flavor, perm, cs->basecr);
1153 
1154 			if (! (access & NFSAUTH_DENIED) &&
1155 			    ! (access & NFSAUTH_WRONGSEC)) {
1156 				flavor_list[ret_cnt] = flavor;
1157 				ret_cnt++;
1158 			}
1159 		}
1160 
1161 		/* Create the returning SECINFO value */
1162 		resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1163 
1164 		for (i = 0; i < count; i++) {
1165 			/*
1166 			 * If the flavor is in the flavor list,
1167 			 * fill in resok_val.
1168 			 */
1169 			si = &secp[i].s_secinfo;
1170 			if (in_flavor_list(si->sc_nfsnum,
1171 			    flavor_list, ret_cnt)) {
1172 				resok_val[k].flavor = si->sc_rpcnum;
1173 				if (resok_val[k].flavor == RPCSEC_GSS) {
1174 					rpcsec_gss_info *info;
1175 
1176 					info = &resok_val[k].flavor_info;
1177 					info->qop = si->sc_qop;
1178 					info->service = (rpc_gss_svc_t)
1179 					    si->sc_service;
1180 
1181 					/* get oid opaque data */
1182 					info->oid.sec_oid4_len =
1183 					    si->sc_gss_mech_type->length;
1184 					info->oid.sec_oid4_val = kmem_alloc(
1185 					    si->sc_gss_mech_type->length,
1186 					    KM_SLEEP);
1187 					bcopy(si->sc_gss_mech_type->elements,
1188 					    info->oid.sec_oid4_val,
1189 					    info->oid.sec_oid4_len);
1190 				}
1191 				k++;
1192 			}
1193 			if (k >= ret_cnt)
1194 				break;
1195 		}
1196 		resp->SECINFO4resok_len = ret_cnt;
1197 		resp->SECINFO4resok_val = resok_val;
1198 		kmem_free(flavor_list, count * sizeof (int));
1199 	}
1200 
1201 	VN_RELE(vp);
1202 	return (NFS4_OK);
1203 }
1204 
1205 /*
1206  * SECINFO (Operation 33): Obtain required security information on
1207  * the component name in the format of (security-mechanism-oid, qop, service)
1208  * triplets.
1209  */
1210 /* ARGSUSED */
1211 static void
rfs4_op_secinfo(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1212 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1213     struct compound_state *cs)
1214 {
1215 	SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1216 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1217 	utf8string *utfnm = &args->name;
1218 	uint_t len;
1219 	char *nm;
1220 	struct sockaddr *ca;
1221 	char *name = NULL;
1222 	nfsstat4 status = NFS4_OK;
1223 
1224 	DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1225 	    SECINFO4args *, args);
1226 
1227 	/*
1228 	 * Current file handle (cfh) should have been set before getting
1229 	 * into this function. If not, return error.
1230 	 */
1231 	if (cs->vp == NULL) {
1232 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1233 		goto out;
1234 	}
1235 
1236 	if (cs->vp->v_type != VDIR) {
1237 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1238 		goto out;
1239 	}
1240 
1241 	/*
1242 	 * Verify the component name. If failed, error out, but
1243 	 * do not error out if the component name is a "..".
1244 	 * SECINFO will return its parents secinfo data for SECINFO "..".
1245 	 */
1246 	status = utf8_dir_verify(utfnm);
1247 	if (status != NFS4_OK) {
1248 		if (utfnm->utf8string_len != 2 ||
1249 		    utfnm->utf8string_val[0] != '.' ||
1250 		    utfnm->utf8string_val[1] != '.') {
1251 			*cs->statusp = resp->status = status;
1252 			goto out;
1253 		}
1254 	}
1255 
1256 	nm = utf8_to_str(utfnm, &len, NULL);
1257 	if (nm == NULL) {
1258 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1259 		goto out;
1260 	}
1261 
1262 	if (len > MAXNAMELEN) {
1263 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1264 		kmem_free(nm, len);
1265 		goto out;
1266 	}
1267 
1268 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1269 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1270 	    MAXPATHLEN  + 1);
1271 
1272 	if (name == NULL) {
1273 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1274 		kmem_free(nm, len);
1275 		goto out;
1276 	}
1277 
1278 
1279 	*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1280 
1281 	if (name != nm)
1282 		kmem_free(name, MAXPATHLEN + 1);
1283 	kmem_free(nm, len);
1284 
1285 out:
1286 	DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1287 	    SECINFO4res *, resp);
1288 }
1289 
1290 /*
1291  * Free SECINFO result.
1292  */
1293 /* ARGSUSED */
1294 static void
rfs4_op_secinfo_free(nfs_resop4 * resop)1295 rfs4_op_secinfo_free(nfs_resop4 *resop)
1296 {
1297 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1298 	int count, i;
1299 	secinfo4 *resok_val;
1300 
1301 	/* If this is not an Ok result, nothing to free. */
1302 	if (resp->status != NFS4_OK) {
1303 		return;
1304 	}
1305 
1306 	count = resp->SECINFO4resok_len;
1307 	resok_val = resp->SECINFO4resok_val;
1308 
1309 	for (i = 0; i < count; i++) {
1310 		if (resok_val[i].flavor == RPCSEC_GSS) {
1311 			rpcsec_gss_info *info;
1312 
1313 			info = &resok_val[i].flavor_info;
1314 			kmem_free(info->oid.sec_oid4_val,
1315 			    info->oid.sec_oid4_len);
1316 		}
1317 	}
1318 	kmem_free(resok_val, count * sizeof (secinfo4));
1319 	resp->SECINFO4resok_len = 0;
1320 	resp->SECINFO4resok_val = NULL;
1321 }
1322 
1323 /* ARGSUSED */
1324 static void
rfs4_op_access(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1325 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1326     struct compound_state *cs)
1327 {
1328 	ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1329 	ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1330 	int error;
1331 	vnode_t *vp;
1332 	struct vattr va;
1333 	int checkwriteperm;
1334 	cred_t *cr = cs->cr;
1335 	bslabel_t *clabel, *slabel;
1336 	ts_label_t *tslabel;
1337 	boolean_t admin_low_client;
1338 
1339 	DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1340 	    ACCESS4args *, args);
1341 
1342 #if 0	/* XXX allow access even if !cs->access. Eventually only pseudo fs */
1343 	if (cs->access == CS_ACCESS_DENIED) {
1344 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1345 		goto out;
1346 	}
1347 #endif
1348 	if (cs->vp == NULL) {
1349 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1350 		goto out;
1351 	}
1352 
1353 	ASSERT(cr != NULL);
1354 
1355 	vp = cs->vp;
1356 
1357 	/*
1358 	 * If the file system is exported read only, it is not appropriate
1359 	 * to check write permissions for regular files and directories.
1360 	 * Special files are interpreted by the client, so the underlying
1361 	 * permissions are sent back to the client for interpretation.
1362 	 */
1363 	if (rdonly4(req, cs) &&
1364 	    (vp->v_type == VREG || vp->v_type == VDIR))
1365 		checkwriteperm = 0;
1366 	else
1367 		checkwriteperm = 1;
1368 
1369 	/*
1370 	 * XXX
1371 	 * We need the mode so that we can correctly determine access
1372 	 * permissions relative to a mandatory lock file.  Access to
1373 	 * mandatory lock files is denied on the server, so it might
1374 	 * as well be reflected to the server during the open.
1375 	 */
1376 	va.va_mask = AT_MODE;
1377 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1378 	if (error) {
1379 		*cs->statusp = resp->status = puterrno4(error);
1380 		goto out;
1381 	}
1382 	resp->access = 0;
1383 	resp->supported = 0;
1384 
1385 	if (is_system_labeled()) {
1386 		ASSERT(req->rq_label != NULL);
1387 		clabel = req->rq_label;
1388 		DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1389 		    "got client label from request(1)",
1390 		    struct svc_req *, req);
1391 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1392 			if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1393 				*cs->statusp = resp->status = puterrno4(EACCES);
1394 				goto out;
1395 			}
1396 			slabel = label2bslabel(tslabel);
1397 			DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1398 			    char *, "got server label(1) for vp(2)",
1399 			    bslabel_t *, slabel, vnode_t *, vp);
1400 
1401 			admin_low_client = B_FALSE;
1402 		} else
1403 			admin_low_client = B_TRUE;
1404 	}
1405 
1406 	if (args->access & ACCESS4_READ) {
1407 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1408 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1409 		    (!is_system_labeled() || admin_low_client ||
1410 		    bldominates(clabel, slabel)))
1411 			resp->access |= ACCESS4_READ;
1412 		resp->supported |= ACCESS4_READ;
1413 	}
1414 	if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1415 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1416 		if (!error && (!is_system_labeled() || admin_low_client ||
1417 		    bldominates(clabel, slabel)))
1418 			resp->access |= ACCESS4_LOOKUP;
1419 		resp->supported |= ACCESS4_LOOKUP;
1420 	}
1421 	if (checkwriteperm &&
1422 	    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1423 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1424 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1425 		    (!is_system_labeled() || admin_low_client ||
1426 		    blequal(clabel, slabel)))
1427 			resp->access |=
1428 			    (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1429 		resp->supported |=
1430 		    resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1431 	}
1432 
1433 	if (checkwriteperm &&
1434 	    (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1435 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1436 		if (!error && (!is_system_labeled() || admin_low_client ||
1437 		    blequal(clabel, slabel)))
1438 			resp->access |= ACCESS4_DELETE;
1439 		resp->supported |= ACCESS4_DELETE;
1440 	}
1441 	if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1442 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1443 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1444 		    (!is_system_labeled() || admin_low_client ||
1445 		    bldominates(clabel, slabel)))
1446 			resp->access |= ACCESS4_EXECUTE;
1447 		resp->supported |= ACCESS4_EXECUTE;
1448 	}
1449 
1450 	if (is_system_labeled() && !admin_low_client)
1451 		label_rele(tslabel);
1452 
1453 	*cs->statusp = resp->status = NFS4_OK;
1454 out:
1455 	DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1456 	    ACCESS4res *, resp);
1457 }
1458 
1459 /* ARGSUSED */
1460 static void
rfs4_op_commit(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1461 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1462     struct compound_state *cs)
1463 {
1464 	COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1465 	COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1466 	int error;
1467 	vnode_t *vp = cs->vp;
1468 	cred_t *cr = cs->cr;
1469 	vattr_t va;
1470 	nfs4_srv_t *nsrv4;
1471 
1472 	DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1473 	    COMMIT4args *, args);
1474 
1475 	if (vp == NULL) {
1476 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1477 		goto out;
1478 	}
1479 	if (cs->access == CS_ACCESS_DENIED) {
1480 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1481 		goto out;
1482 	}
1483 
1484 	if (args->offset + args->count < args->offset) {
1485 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1486 		goto out;
1487 	}
1488 
1489 	va.va_mask = AT_UID;
1490 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1491 
1492 	/*
1493 	 * If we can't get the attributes, then we can't do the
1494 	 * right access checking.  So, we'll fail the request.
1495 	 */
1496 	if (error) {
1497 		*cs->statusp = resp->status = puterrno4(error);
1498 		goto out;
1499 	}
1500 	if (rdonly4(req, cs)) {
1501 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1502 		goto out;
1503 	}
1504 
1505 	if (vp->v_type != VREG) {
1506 		if (vp->v_type == VDIR)
1507 			resp->status = NFS4ERR_ISDIR;
1508 		else
1509 			resp->status = NFS4ERR_INVAL;
1510 		*cs->statusp = resp->status;
1511 		goto out;
1512 	}
1513 
1514 	if (crgetuid(cr) != va.va_uid &&
1515 	    (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1516 		*cs->statusp = resp->status = puterrno4(error);
1517 		goto out;
1518 	}
1519 
1520 	error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1521 
1522 	if (error) {
1523 		*cs->statusp = resp->status = puterrno4(error);
1524 		goto out;
1525 	}
1526 
1527 	nsrv4 = nfs4_get_srv();
1528 	*cs->statusp = resp->status = NFS4_OK;
1529 	resp->writeverf = nsrv4->write4verf;
1530 out:
1531 	DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1532 	    COMMIT4res *, resp);
1533 }
1534 
1535 /*
1536  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1537  * was completed. It does the nfsv4 create for special files.
1538  */
1539 /* ARGSUSED */
1540 static vnode_t *
do_rfs4_op_mknod(CREATE4args * args,CREATE4res * resp,struct svc_req * req,struct compound_state * cs,vattr_t * vap,char * nm)1541 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1542     struct compound_state *cs, vattr_t *vap, char *nm)
1543 {
1544 	int error;
1545 	cred_t *cr = cs->cr;
1546 	vnode_t *dvp = cs->vp;
1547 	vnode_t *vp = NULL;
1548 	int mode;
1549 	enum vcexcl excl;
1550 
1551 	switch (args->type) {
1552 	case NF4CHR:
1553 	case NF4BLK:
1554 		if (secpolicy_sys_devices(cr) != 0) {
1555 			*cs->statusp = resp->status = NFS4ERR_PERM;
1556 			return (NULL);
1557 		}
1558 		if (args->type == NF4CHR)
1559 			vap->va_type = VCHR;
1560 		else
1561 			vap->va_type = VBLK;
1562 		vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1563 		    args->ftype4_u.devdata.specdata2);
1564 		vap->va_mask |= AT_RDEV;
1565 		break;
1566 	case NF4SOCK:
1567 		vap->va_type = VSOCK;
1568 		break;
1569 	case NF4FIFO:
1570 		vap->va_type = VFIFO;
1571 		break;
1572 	default:
1573 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1574 		return (NULL);
1575 	}
1576 
1577 	/*
1578 	 * Must specify the mode.
1579 	 */
1580 	if (!(vap->va_mask & AT_MODE)) {
1581 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1582 		return (NULL);
1583 	}
1584 
1585 	excl = EXCL;
1586 
1587 	mode = 0;
1588 
1589 	error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1590 	if (error) {
1591 		*cs->statusp = resp->status = puterrno4(error);
1592 		return (NULL);
1593 	}
1594 	return (vp);
1595 }
1596 
1597 /*
1598  * nfsv4 create is used to create non-regular files. For regular files,
1599  * use nfsv4 open.
1600  */
1601 /* ARGSUSED */
1602 static void
rfs4_op_create(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1603 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1604     struct compound_state *cs)
1605 {
1606 	CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1607 	CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1608 	int error;
1609 	struct vattr bva, iva, iva2, ava, *vap;
1610 	cred_t *cr = cs->cr;
1611 	vnode_t *dvp = cs->vp;
1612 	vnode_t *vp = NULL;
1613 	vnode_t *realvp;
1614 	char *nm, *lnm;
1615 	uint_t len, llen;
1616 	int syncval = 0;
1617 	struct nfs4_svgetit_arg sarg;
1618 	struct nfs4_ntov_table ntov;
1619 	struct statvfs64 sb;
1620 	nfsstat4 status;
1621 	struct sockaddr *ca;
1622 	char *name = NULL;
1623 	char *lname = NULL;
1624 
1625 	DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1626 	    CREATE4args *, args);
1627 
1628 	resp->attrset = 0;
1629 
1630 	if (dvp == NULL) {
1631 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1632 		goto out;
1633 	}
1634 
1635 	/*
1636 	 * If there is an unshared filesystem mounted on this vnode,
1637 	 * do not allow to create an object in this directory.
1638 	 */
1639 	if (vn_ismntpt(dvp)) {
1640 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1641 		goto out;
1642 	}
1643 
1644 	/* Verify that type is correct */
1645 	switch (args->type) {
1646 	case NF4LNK:
1647 	case NF4BLK:
1648 	case NF4CHR:
1649 	case NF4SOCK:
1650 	case NF4FIFO:
1651 	case NF4DIR:
1652 		break;
1653 	default:
1654 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1655 		goto out;
1656 	};
1657 
1658 	if (cs->access == CS_ACCESS_DENIED) {
1659 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1660 		goto out;
1661 	}
1662 	if (dvp->v_type != VDIR) {
1663 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1664 		goto out;
1665 	}
1666 	status = utf8_dir_verify(&args->objname);
1667 	if (status != NFS4_OK) {
1668 		*cs->statusp = resp->status = status;
1669 		goto out;
1670 	}
1671 
1672 	if (rdonly4(req, cs)) {
1673 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1674 		goto out;
1675 	}
1676 
1677 	/*
1678 	 * Name of newly created object
1679 	 */
1680 	nm = utf8_to_fn(&args->objname, &len, NULL);
1681 	if (nm == NULL) {
1682 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1683 		goto out;
1684 	}
1685 
1686 	if (len > MAXNAMELEN) {
1687 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1688 		kmem_free(nm, len);
1689 		goto out;
1690 	}
1691 
1692 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1693 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1694 	    MAXPATHLEN  + 1);
1695 
1696 	if (name == NULL) {
1697 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1698 		kmem_free(nm, len);
1699 		goto out;
1700 	}
1701 
1702 	resp->attrset = 0;
1703 
1704 	sarg.sbp = &sb;
1705 	sarg.is_referral = B_FALSE;
1706 	nfs4_ntov_table_init(&ntov);
1707 
1708 	status = do_rfs4_set_attrs(&resp->attrset,
1709 	    &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1710 
1711 	if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1712 		status = NFS4ERR_INVAL;
1713 
1714 	if (status != NFS4_OK) {
1715 		*cs->statusp = resp->status = status;
1716 		if (name != nm)
1717 			kmem_free(name, MAXPATHLEN + 1);
1718 		kmem_free(nm, len);
1719 		nfs4_ntov_table_free(&ntov, &sarg);
1720 		resp->attrset = 0;
1721 		goto out;
1722 	}
1723 
1724 	/* Get "before" change value */
1725 	bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1726 	error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1727 	if (error) {
1728 		*cs->statusp = resp->status = puterrno4(error);
1729 		if (name != nm)
1730 			kmem_free(name, MAXPATHLEN + 1);
1731 		kmem_free(nm, len);
1732 		nfs4_ntov_table_free(&ntov, &sarg);
1733 		resp->attrset = 0;
1734 		goto out;
1735 	}
1736 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1737 
1738 	vap = sarg.vap;
1739 
1740 	/*
1741 	 * Set the default initial values for attributes when the parent
1742 	 * directory does not have the VSUID/VSGID bit set and they have
1743 	 * not been specified in createattrs.
1744 	 */
1745 	if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1746 		vap->va_uid = crgetuid(cr);
1747 		vap->va_mask |= AT_UID;
1748 	}
1749 	if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1750 		vap->va_gid = crgetgid(cr);
1751 		vap->va_mask |= AT_GID;
1752 	}
1753 
1754 	vap->va_mask |= AT_TYPE;
1755 	switch (args->type) {
1756 	case NF4DIR:
1757 		vap->va_type = VDIR;
1758 		if ((vap->va_mask & AT_MODE) == 0) {
1759 			vap->va_mode = 0700;	/* default: owner rwx only */
1760 			vap->va_mask |= AT_MODE;
1761 		}
1762 		error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1763 		if (error)
1764 			break;
1765 
1766 		/*
1767 		 * Get the initial "after" sequence number, if it fails,
1768 		 * set to zero
1769 		 */
1770 		iva.va_mask = AT_SEQ;
1771 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1772 			iva.va_seq = 0;
1773 		break;
1774 	case NF4LNK:
1775 		vap->va_type = VLNK;
1776 		if ((vap->va_mask & AT_MODE) == 0) {
1777 			vap->va_mode = 0700;	/* default: owner rwx only */
1778 			vap->va_mask |= AT_MODE;
1779 		}
1780 
1781 		/*
1782 		 * symlink names must be treated as data
1783 		 */
1784 		lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1785 		    &llen, NULL);
1786 
1787 		if (lnm == NULL) {
1788 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1789 			if (name != nm)
1790 				kmem_free(name, MAXPATHLEN + 1);
1791 			kmem_free(nm, len);
1792 			nfs4_ntov_table_free(&ntov, &sarg);
1793 			resp->attrset = 0;
1794 			goto out;
1795 		}
1796 
1797 		if (llen > MAXPATHLEN) {
1798 			*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1799 			if (name != nm)
1800 				kmem_free(name, MAXPATHLEN + 1);
1801 			kmem_free(nm, len);
1802 			kmem_free(lnm, llen);
1803 			nfs4_ntov_table_free(&ntov, &sarg);
1804 			resp->attrset = 0;
1805 			goto out;
1806 		}
1807 
1808 		lname = nfscmd_convname(ca, cs->exi, lnm,
1809 		    NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1810 
1811 		if (lname == NULL) {
1812 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1813 			if (name != nm)
1814 				kmem_free(name, MAXPATHLEN + 1);
1815 			kmem_free(nm, len);
1816 			kmem_free(lnm, llen);
1817 			nfs4_ntov_table_free(&ntov, &sarg);
1818 			resp->attrset = 0;
1819 			goto out;
1820 		}
1821 
1822 		error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1823 		if (lname != lnm)
1824 			kmem_free(lname, MAXPATHLEN + 1);
1825 		kmem_free(lnm, llen);
1826 		if (error)
1827 			break;
1828 
1829 		/*
1830 		 * Get the initial "after" sequence number, if it fails,
1831 		 * set to zero
1832 		 */
1833 		iva.va_mask = AT_SEQ;
1834 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1835 			iva.va_seq = 0;
1836 
1837 		error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1838 		    NULL, NULL, NULL);
1839 		if (error)
1840 			break;
1841 
1842 		/*
1843 		 * va_seq is not safe over VOP calls, check it again
1844 		 * if it has changed zero out iva to force atomic = FALSE.
1845 		 */
1846 		iva2.va_mask = AT_SEQ;
1847 		if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1848 		    iva2.va_seq != iva.va_seq)
1849 			iva.va_seq = 0;
1850 		break;
1851 	default:
1852 		/*
1853 		 * probably a special file.
1854 		 */
1855 		if ((vap->va_mask & AT_MODE) == 0) {
1856 			vap->va_mode = 0600;	/* default: owner rw only */
1857 			vap->va_mask |= AT_MODE;
1858 		}
1859 		syncval = FNODSYNC;
1860 		/*
1861 		 * We know this will only generate one VOP call
1862 		 */
1863 		vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1864 
1865 		if (vp == NULL) {
1866 			if (name != nm)
1867 				kmem_free(name, MAXPATHLEN + 1);
1868 			kmem_free(nm, len);
1869 			nfs4_ntov_table_free(&ntov, &sarg);
1870 			resp->attrset = 0;
1871 			goto out;
1872 		}
1873 
1874 		/*
1875 		 * Get the initial "after" sequence number, if it fails,
1876 		 * set to zero
1877 		 */
1878 		iva.va_mask = AT_SEQ;
1879 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1880 			iva.va_seq = 0;
1881 
1882 		break;
1883 	}
1884 	if (name != nm)
1885 		kmem_free(name, MAXPATHLEN + 1);
1886 	kmem_free(nm, len);
1887 
1888 	if (error) {
1889 		*cs->statusp = resp->status = puterrno4(error);
1890 	}
1891 
1892 	/*
1893 	 * Force modified data and metadata out to stable storage.
1894 	 */
1895 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1896 
1897 	if (resp->status != NFS4_OK) {
1898 		if (vp != NULL)
1899 			VN_RELE(vp);
1900 		nfs4_ntov_table_free(&ntov, &sarg);
1901 		resp->attrset = 0;
1902 		goto out;
1903 	}
1904 
1905 	/*
1906 	 * Finish setup of cinfo response, "before" value already set.
1907 	 * Get "after" change value, if it fails, simply return the
1908 	 * before value.
1909 	 */
1910 	ava.va_mask = AT_CTIME|AT_SEQ;
1911 	if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1912 		ava.va_ctime = bva.va_ctime;
1913 		ava.va_seq = 0;
1914 	}
1915 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1916 
1917 	/*
1918 	 * True verification that object was created with correct
1919 	 * attrs is impossible.  The attrs could have been changed
1920 	 * immediately after object creation.  If attributes did
1921 	 * not verify, the only recourse for the server is to
1922 	 * destroy the object.  Maybe if some attrs (like gid)
1923 	 * are set incorrectly, the object should be destroyed;
1924 	 * however, seems bad as a default policy.  Do we really
1925 	 * want to destroy an object over one of the times not
1926 	 * verifying correctly?  For these reasons, the server
1927 	 * currently sets bits in attrset for createattrs
1928 	 * that were set; however, no verification is done.
1929 	 *
1930 	 * vmask_to_nmask accounts for vattr bits set on create
1931 	 *	[do_rfs4_set_attrs() only sets resp bits for
1932 	 *	 non-vattr/vfs bits.]
1933 	 * Mask off any bits set by default so as not to return
1934 	 * more attrset bits than were requested in createattrs
1935 	 */
1936 	nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1937 	resp->attrset &= args->createattrs.attrmask;
1938 	nfs4_ntov_table_free(&ntov, &sarg);
1939 
1940 	error = makefh4(&cs->fh, vp, cs->exi);
1941 	if (error) {
1942 		*cs->statusp = resp->status = puterrno4(error);
1943 	}
1944 
1945 	/*
1946 	 * The cinfo.atomic = TRUE only if we got no errors, we have
1947 	 * non-zero va_seq's, and it has incremented by exactly one
1948 	 * during the creation and it didn't change during the VOP_LOOKUP
1949 	 * or VOP_FSYNC.
1950 	 */
1951 	if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1952 	    iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1953 		resp->cinfo.atomic = TRUE;
1954 	else
1955 		resp->cinfo.atomic = FALSE;
1956 
1957 	/*
1958 	 * Force modified metadata out to stable storage.
1959 	 *
1960 	 * if a underlying vp exists, pass it to VOP_FSYNC
1961 	 */
1962 	if (VOP_REALVP(vp, &realvp, NULL) == 0)
1963 		(void) VOP_FSYNC(realvp, syncval, cr, NULL);
1964 	else
1965 		(void) VOP_FSYNC(vp, syncval, cr, NULL);
1966 
1967 	if (resp->status != NFS4_OK) {
1968 		VN_RELE(vp);
1969 		goto out;
1970 	}
1971 	if (cs->vp)
1972 		VN_RELE(cs->vp);
1973 
1974 	cs->vp = vp;
1975 	*cs->statusp = resp->status = NFS4_OK;
1976 out:
1977 	DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1978 	    CREATE4res *, resp);
1979 }
1980 
1981 /*ARGSUSED*/
1982 static void
rfs4_op_delegpurge(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1983 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1984     struct compound_state *cs)
1985 {
1986 	DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1987 	    DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1988 
1989 	rfs4_op_inval(argop, resop, req, cs);
1990 
1991 	DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1992 	    DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1993 }
1994 
1995 /*ARGSUSED*/
1996 static void
rfs4_op_delegreturn(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1997 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1998     struct compound_state *cs)
1999 {
2000 	DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
2001 	DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
2002 	rfs4_deleg_state_t *dsp;
2003 	nfsstat4 status;
2004 
2005 	DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2006 	    DELEGRETURN4args *, args);
2007 
2008 	status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2009 	resp->status = *cs->statusp = status;
2010 	if (status != NFS4_OK)
2011 		goto out;
2012 
2013 	/* Ensure specified filehandle matches */
2014 	if (cs->vp != dsp->rds_finfo->rf_vp) {
2015 		resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2016 	} else
2017 		rfs4_return_deleg(dsp, FALSE);
2018 
2019 	rfs4_update_lease(dsp->rds_client);
2020 
2021 	rfs4_deleg_state_rele(dsp);
2022 out:
2023 	DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2024 	    DELEGRETURN4res *, resp);
2025 }
2026 
2027 /*
2028  * Check to see if a given "flavor" is an explicitly shared flavor.
2029  * The assumption of this routine is the "flavor" is already a valid
2030  * flavor in the secinfo list of "exi".
2031  *
2032  *	e.g.
2033  *		# share -o sec=flavor1 /export
2034  *		# share -o sec=flavor2 /export/home
2035  *
2036  *		flavor2 is not an explicitly shared flavor for /export,
2037  *		however it is in the secinfo list for /export thru the
2038  *		server namespace setup.
2039  */
2040 int
is_exported_sec(int flavor,struct exportinfo * exi)2041 is_exported_sec(int flavor, struct exportinfo *exi)
2042 {
2043 	int	i;
2044 	struct secinfo *sp;
2045 
2046 	sp = exi->exi_export.ex_secinfo;
2047 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2048 		if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2049 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2050 			return (SEC_REF_EXPORTED(&sp[i]));
2051 		}
2052 	}
2053 
2054 	/* Should not reach this point based on the assumption */
2055 	return (0);
2056 }
2057 
2058 /*
2059  * Check if the security flavor used in the request matches what is
2060  * required at the export point or at the root pseudo node (exi_root).
2061  *
2062  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2063  *
2064  */
2065 static int
secinfo_match_or_authnone(struct compound_state * cs)2066 secinfo_match_or_authnone(struct compound_state *cs)
2067 {
2068 	int	i;
2069 	struct secinfo *sp;
2070 
2071 	/*
2072 	 * Check cs->nfsflavor (from the request) against
2073 	 * the current export data in cs->exi.
2074 	 */
2075 	sp = cs->exi->exi_export.ex_secinfo;
2076 	for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2077 		if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2078 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2079 			return (1);
2080 	}
2081 
2082 	return (0);
2083 }
2084 
2085 /*
2086  * Check the access authority for the client and return the correct error.
2087  */
2088 nfsstat4
call_checkauth4(struct compound_state * cs,struct svc_req * req)2089 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2090 {
2091 	int	authres;
2092 
2093 	/*
2094 	 * First, check if the security flavor used in the request
2095 	 * are among the flavors set in the server namespace.
2096 	 */
2097 	if (!secinfo_match_or_authnone(cs)) {
2098 		*cs->statusp = NFS4ERR_WRONGSEC;
2099 		return (*cs->statusp);
2100 	}
2101 
2102 	authres = checkauth4(cs, req);
2103 
2104 	if (authres > 0) {
2105 		*cs->statusp = NFS4_OK;
2106 		if (! (cs->access & CS_ACCESS_LIMITED))
2107 			cs->access = CS_ACCESS_OK;
2108 	} else if (authres == 0) {
2109 		*cs->statusp = NFS4ERR_ACCESS;
2110 	} else if (authres == -2) {
2111 		*cs->statusp = NFS4ERR_WRONGSEC;
2112 	} else {
2113 		*cs->statusp = NFS4ERR_DELAY;
2114 	}
2115 	return (*cs->statusp);
2116 }
2117 
2118 /*
2119  * bitmap4_to_attrmask is called by getattr and readdir.
2120  * It sets up the vattr mask and determines whether vfsstat call is needed
2121  * based on the input bitmap.
2122  * Returns nfsv4 status.
2123  */
2124 static nfsstat4
bitmap4_to_attrmask(bitmap4 breq,struct nfs4_svgetit_arg * sargp)2125 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2126 {
2127 	int i;
2128 	uint_t	va_mask;
2129 	struct statvfs64 *sbp = sargp->sbp;
2130 
2131 	sargp->sbp = NULL;
2132 	sargp->flag = 0;
2133 	sargp->rdattr_error = NFS4_OK;
2134 	sargp->mntdfid_set = FALSE;
2135 	if (sargp->cs->vp)
2136 		sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2137 		    FH4_ATTRDIR | FH4_NAMEDATTR);
2138 	else
2139 		sargp->xattr = 0;
2140 
2141 	/*
2142 	 * Set rdattr_error_req to true if return error per
2143 	 * failed entry rather than fail the readdir.
2144 	 */
2145 	if (breq & FATTR4_RDATTR_ERROR_MASK)
2146 		sargp->rdattr_error_req = 1;
2147 	else
2148 		sargp->rdattr_error_req = 0;
2149 
2150 	/*
2151 	 * generate the va_mask
2152 	 * Handle the easy cases first
2153 	 */
2154 	switch (breq) {
2155 	case NFS4_NTOV_ATTR_MASK:
2156 		sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2157 		return (NFS4_OK);
2158 
2159 	case NFS4_FS_ATTR_MASK:
2160 		sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2161 		sargp->sbp = sbp;
2162 		return (NFS4_OK);
2163 
2164 	case NFS4_NTOV_ATTR_CACHE_MASK:
2165 		sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2166 		return (NFS4_OK);
2167 
2168 	case FATTR4_LEASE_TIME_MASK:
2169 		sargp->vap->va_mask = 0;
2170 		return (NFS4_OK);
2171 
2172 	default:
2173 		va_mask = 0;
2174 		for (i = 0; i < nfs4_ntov_map_size; i++) {
2175 			if ((breq & nfs4_ntov_map[i].fbit) &&
2176 			    nfs4_ntov_map[i].vbit)
2177 				va_mask |= nfs4_ntov_map[i].vbit;
2178 		}
2179 
2180 		/*
2181 		 * Check is vfsstat is needed
2182 		 */
2183 		if (breq & NFS4_FS_ATTR_MASK)
2184 			sargp->sbp = sbp;
2185 
2186 		sargp->vap->va_mask = va_mask;
2187 		return (NFS4_OK);
2188 	}
2189 	/* NOTREACHED */
2190 }
2191 
2192 /*
2193  * bitmap4_get_sysattrs is called by getattr and readdir.
2194  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2195  * Returns nfsv4 status.
2196  */
2197 static nfsstat4
bitmap4_get_sysattrs(struct nfs4_svgetit_arg * sargp)2198 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2199 {
2200 	int error;
2201 	struct compound_state *cs = sargp->cs;
2202 	vnode_t *vp = cs->vp;
2203 
2204 	if (sargp->sbp != NULL) {
2205 		if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2206 			sargp->sbp = NULL;	/* to identify error */
2207 			return (puterrno4(error));
2208 		}
2209 	}
2210 
2211 	return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2212 }
2213 
2214 static void
nfs4_ntov_table_init(struct nfs4_ntov_table * ntovp)2215 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2216 {
2217 	ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2218 	    KM_SLEEP);
2219 	ntovp->attrcnt = 0;
2220 	ntovp->vfsstat = FALSE;
2221 }
2222 
2223 static void
nfs4_ntov_table_free(struct nfs4_ntov_table * ntovp,struct nfs4_svgetit_arg * sargp)2224 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2225     struct nfs4_svgetit_arg *sargp)
2226 {
2227 	int i;
2228 	union nfs4_attr_u *na;
2229 	uint8_t *amap;
2230 
2231 	/*
2232 	 * XXX Should do the same checks for whether the bit is set
2233 	 */
2234 	for (i = 0, na = ntovp->na, amap = ntovp->amap;
2235 	    i < ntovp->attrcnt; i++, na++, amap++) {
2236 		(void) (*nfs4_ntov_map[*amap].sv_getit)(
2237 		    NFS4ATTR_FREEIT, sargp, na);
2238 	}
2239 	if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2240 		/*
2241 		 * xdr_free for getattr will be done later
2242 		 */
2243 		for (i = 0, na = ntovp->na, amap = ntovp->amap;
2244 		    i < ntovp->attrcnt; i++, na++, amap++) {
2245 			xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2246 		}
2247 	}
2248 	kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2249 }
2250 
2251 /*
2252  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2253  */
2254 static nfsstat4
do_rfs4_op_getattr(bitmap4 breq,fattr4 * fattrp,struct nfs4_svgetit_arg * sargp)2255 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2256     struct nfs4_svgetit_arg *sargp)
2257 {
2258 	int error = 0;
2259 	int i, k;
2260 	struct nfs4_ntov_table ntov;
2261 	XDR xdr;
2262 	ulong_t xdr_size;
2263 	char *xdr_attrs;
2264 	nfsstat4 status = NFS4_OK;
2265 	nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2266 	union nfs4_attr_u *na;
2267 	uint8_t *amap;
2268 
2269 	sargp->op = NFS4ATTR_GETIT;
2270 	sargp->flag = 0;
2271 
2272 	fattrp->attrmask = 0;
2273 	/* if no bits requested, then return empty fattr4 */
2274 	if (breq == 0) {
2275 		fattrp->attrlist4_len = 0;
2276 		fattrp->attrlist4 = NULL;
2277 		return (NFS4_OK);
2278 	}
2279 
2280 	/*
2281 	 * return NFS4ERR_INVAL when client requests write-only attrs
2282 	 */
2283 	if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2284 		return (NFS4ERR_INVAL);
2285 
2286 	nfs4_ntov_table_init(&ntov);
2287 	na = ntov.na;
2288 	amap = ntov.amap;
2289 
2290 	/*
2291 	 * Now loop to get or verify the attrs
2292 	 */
2293 	for (i = 0; i < nfs4_ntov_map_size; i++) {
2294 		if (breq & nfs4_ntov_map[i].fbit) {
2295 			if ((*nfs4_ntov_map[i].sv_getit)(
2296 			    NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2297 
2298 				error = (*nfs4_ntov_map[i].sv_getit)(
2299 				    NFS4ATTR_GETIT, sargp, na);
2300 
2301 				/*
2302 				 * Possible error values:
2303 				 * >0 if sv_getit failed to
2304 				 * get the attr; 0 if succeeded;
2305 				 * <0 if rdattr_error and the
2306 				 * attribute cannot be returned.
2307 				 */
2308 				if (error && !(sargp->rdattr_error_req))
2309 					goto done;
2310 				/*
2311 				 * If error then just for entry
2312 				 */
2313 				if (error == 0) {
2314 					fattrp->attrmask |=
2315 					    nfs4_ntov_map[i].fbit;
2316 					*amap++ =
2317 					    (uint8_t)nfs4_ntov_map[i].nval;
2318 					na++;
2319 					(ntov.attrcnt)++;
2320 				} else if ((error > 0) &&
2321 				    (sargp->rdattr_error == NFS4_OK)) {
2322 					sargp->rdattr_error = puterrno4(error);
2323 				}
2324 				error = 0;
2325 			}
2326 		}
2327 	}
2328 
2329 	/*
2330 	 * If rdattr_error was set after the return value for it was assigned,
2331 	 * update it.
2332 	 */
2333 	if (prev_rdattr_error != sargp->rdattr_error) {
2334 		na = ntov.na;
2335 		amap = ntov.amap;
2336 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2337 			k = *amap;
2338 			if (k < FATTR4_RDATTR_ERROR) {
2339 				continue;
2340 			}
2341 			if ((k == FATTR4_RDATTR_ERROR) &&
2342 			    ((*nfs4_ntov_map[k].sv_getit)(
2343 			    NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2344 
2345 				(void) (*nfs4_ntov_map[k].sv_getit)(
2346 				    NFS4ATTR_GETIT, sargp, na);
2347 			}
2348 			break;
2349 		}
2350 	}
2351 
2352 	xdr_size = 0;
2353 	na = ntov.na;
2354 	amap = ntov.amap;
2355 	for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2356 		xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2357 	}
2358 
2359 	fattrp->attrlist4_len = xdr_size;
2360 	if (xdr_size) {
2361 		/* freed by rfs4_op_getattr_free() */
2362 		fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2363 
2364 		xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2365 
2366 		na = ntov.na;
2367 		amap = ntov.amap;
2368 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2369 			if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2370 				DTRACE_PROBE1(nfss__e__getattr4_encfail,
2371 				    int, *amap);
2372 				status = NFS4ERR_SERVERFAULT;
2373 				break;
2374 			}
2375 		}
2376 		/* xdrmem_destroy(&xdrs); */	/* NO-OP */
2377 	} else {
2378 		fattrp->attrlist4 = NULL;
2379 	}
2380 done:
2381 
2382 	nfs4_ntov_table_free(&ntov, sargp);
2383 
2384 	if (error != 0)
2385 		status = puterrno4(error);
2386 
2387 	return (status);
2388 }
2389 
2390 /* ARGSUSED */
2391 static void
rfs4_op_getattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2392 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2393     struct compound_state *cs)
2394 {
2395 	GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2396 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2397 	struct nfs4_svgetit_arg sarg;
2398 	struct statvfs64 sb;
2399 	nfsstat4 status;
2400 
2401 	DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2402 	    GETATTR4args *, args);
2403 
2404 	if (cs->vp == NULL) {
2405 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2406 		goto out;
2407 	}
2408 
2409 	if (cs->access == CS_ACCESS_DENIED) {
2410 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2411 		goto out;
2412 	}
2413 
2414 	sarg.sbp = &sb;
2415 	sarg.cs = cs;
2416 	sarg.is_referral = B_FALSE;
2417 
2418 	status = bitmap4_to_attrmask(args->attr_request, &sarg);
2419 	if (status == NFS4_OK) {
2420 
2421 		status = bitmap4_get_sysattrs(&sarg);
2422 		if (status == NFS4_OK) {
2423 
2424 			/* Is this a referral? */
2425 			if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2426 				/* Older V4 Solaris client sees a link */
2427 				if (client_is_downrev(req))
2428 					sarg.vap->va_type = VLNK;
2429 				else
2430 					sarg.is_referral = B_TRUE;
2431 			}
2432 
2433 			status = do_rfs4_op_getattr(args->attr_request,
2434 			    &resp->obj_attributes, &sarg);
2435 		}
2436 	}
2437 	*cs->statusp = resp->status = status;
2438 out:
2439 	DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2440 	    GETATTR4res *, resp);
2441 }
2442 
2443 static void
rfs4_op_getattr_free(nfs_resop4 * resop)2444 rfs4_op_getattr_free(nfs_resop4 *resop)
2445 {
2446 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2447 
2448 	nfs4_fattr4_free(&resp->obj_attributes);
2449 }
2450 
2451 /* ARGSUSED */
2452 static void
rfs4_op_getfh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2453 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2454     struct compound_state *cs)
2455 {
2456 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2457 
2458 	DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2459 
2460 	if (cs->vp == NULL) {
2461 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2462 		goto out;
2463 	}
2464 	if (cs->access == CS_ACCESS_DENIED) {
2465 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2466 		goto out;
2467 	}
2468 
2469 	/* check for reparse point at the share point */
2470 	if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2471 		/* it's all bad */
2472 		cs->exi->exi_moved = 1;
2473 		*cs->statusp = resp->status = NFS4ERR_MOVED;
2474 		DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2475 		    vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2476 		return;
2477 	}
2478 
2479 	/* check for reparse point at vp */
2480 	if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2481 		/* it's not all bad */
2482 		*cs->statusp = resp->status = NFS4ERR_MOVED;
2483 		DTRACE_PROBE2(nfs4serv__func__referral__moved,
2484 		    vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2485 		return;
2486 	}
2487 
2488 	resp->object.nfs_fh4_val =
2489 	    kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2490 	nfs_fh4_copy(&cs->fh, &resp->object);
2491 	*cs->statusp = resp->status = NFS4_OK;
2492 out:
2493 	DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2494 	    GETFH4res *, resp);
2495 }
2496 
2497 static void
rfs4_op_getfh_free(nfs_resop4 * resop)2498 rfs4_op_getfh_free(nfs_resop4 *resop)
2499 {
2500 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2501 
2502 	if (resp->status == NFS4_OK &&
2503 	    resp->object.nfs_fh4_val != NULL) {
2504 		kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2505 		resp->object.nfs_fh4_val = NULL;
2506 		resp->object.nfs_fh4_len = 0;
2507 	}
2508 }
2509 
2510 /*
2511  * illegal: args: void
2512  *	    res : status (NFS4ERR_OP_ILLEGAL)
2513  */
2514 /* ARGSUSED */
2515 static void
rfs4_op_illegal(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2516 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2517     struct svc_req *req, struct compound_state *cs)
2518 {
2519 	ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2520 
2521 	resop->resop = OP_ILLEGAL;
2522 	*cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2523 }
2524 
2525 /*
2526  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2527  *	 res: status. If success - CURRENT_FH unchanged, return change_info
2528  */
2529 /* ARGSUSED */
2530 static void
rfs4_op_link(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2531 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2532     struct compound_state *cs)
2533 {
2534 	LINK4args *args = &argop->nfs_argop4_u.oplink;
2535 	LINK4res *resp = &resop->nfs_resop4_u.oplink;
2536 	int error;
2537 	vnode_t *vp;
2538 	vnode_t *dvp;
2539 	struct vattr bdva, idva, adva;
2540 	char *nm;
2541 	uint_t  len;
2542 	struct sockaddr *ca;
2543 	char *name = NULL;
2544 	nfsstat4 status;
2545 
2546 	DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2547 	    LINK4args *, args);
2548 
2549 	/* SAVED_FH: source object */
2550 	vp = cs->saved_vp;
2551 	if (vp == NULL) {
2552 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2553 		goto out;
2554 	}
2555 
2556 	/* CURRENT_FH: target directory */
2557 	dvp = cs->vp;
2558 	if (dvp == NULL) {
2559 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2560 		goto out;
2561 	}
2562 
2563 	/*
2564 	 * If there is a non-shared filesystem mounted on this vnode,
2565 	 * do not allow to link any file in this directory.
2566 	 */
2567 	if (vn_ismntpt(dvp)) {
2568 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2569 		goto out;
2570 	}
2571 
2572 	if (cs->access == CS_ACCESS_DENIED) {
2573 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2574 		goto out;
2575 	}
2576 
2577 	/* Check source object's type validity */
2578 	if (vp->v_type == VDIR) {
2579 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
2580 		goto out;
2581 	}
2582 
2583 	/* Check target directory's type */
2584 	if (dvp->v_type != VDIR) {
2585 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2586 		goto out;
2587 	}
2588 
2589 	if (cs->saved_exi != cs->exi) {
2590 		*cs->statusp = resp->status = NFS4ERR_XDEV;
2591 		goto out;
2592 	}
2593 
2594 	status = utf8_dir_verify(&args->newname);
2595 	if (status != NFS4_OK) {
2596 		*cs->statusp = resp->status = status;
2597 		goto out;
2598 	}
2599 
2600 	nm = utf8_to_fn(&args->newname, &len, NULL);
2601 	if (nm == NULL) {
2602 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2603 		goto out;
2604 	}
2605 
2606 	if (len > MAXNAMELEN) {
2607 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2608 		kmem_free(nm, len);
2609 		goto out;
2610 	}
2611 
2612 	if (rdonly4(req, cs)) {
2613 		*cs->statusp = resp->status = NFS4ERR_ROFS;
2614 		kmem_free(nm, len);
2615 		goto out;
2616 	}
2617 
2618 	/* Get "before" change value */
2619 	bdva.va_mask = AT_CTIME|AT_SEQ;
2620 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2621 	if (error) {
2622 		*cs->statusp = resp->status = puterrno4(error);
2623 		kmem_free(nm, len);
2624 		goto out;
2625 	}
2626 
2627 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2628 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2629 	    MAXPATHLEN  + 1);
2630 
2631 	if (name == NULL) {
2632 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2633 		kmem_free(nm, len);
2634 		goto out;
2635 	}
2636 
2637 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2638 
2639 	error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2640 
2641 	if (nm != name)
2642 		kmem_free(name, MAXPATHLEN + 1);
2643 	kmem_free(nm, len);
2644 
2645 	/*
2646 	 * Get the initial "after" sequence number, if it fails, set to zero
2647 	 */
2648 	idva.va_mask = AT_SEQ;
2649 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2650 		idva.va_seq = 0;
2651 
2652 	/*
2653 	 * Force modified data and metadata out to stable storage.
2654 	 */
2655 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2656 	(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2657 
2658 	if (error) {
2659 		*cs->statusp = resp->status = puterrno4(error);
2660 		goto out;
2661 	}
2662 
2663 	/*
2664 	 * Get "after" change value, if it fails, simply return the
2665 	 * before value.
2666 	 */
2667 	adva.va_mask = AT_CTIME|AT_SEQ;
2668 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2669 		adva.va_ctime = bdva.va_ctime;
2670 		adva.va_seq = 0;
2671 	}
2672 
2673 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2674 
2675 	/*
2676 	 * The cinfo.atomic = TRUE only if we have
2677 	 * non-zero va_seq's, and it has incremented by exactly one
2678 	 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2679 	 */
2680 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2681 	    idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2682 		resp->cinfo.atomic = TRUE;
2683 	else
2684 		resp->cinfo.atomic = FALSE;
2685 
2686 	*cs->statusp = resp->status = NFS4_OK;
2687 out:
2688 	DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2689 	    LINK4res *, resp);
2690 }
2691 
2692 /*
2693  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2694  */
2695 
2696 /* ARGSUSED */
2697 static nfsstat4
do_rfs4_op_lookup(char * nm,struct svc_req * req,struct compound_state * cs)2698 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2699 {
2700 	int error;
2701 	int different_export = 0;
2702 	vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2703 	struct exportinfo *exi = NULL, *pre_exi = NULL;
2704 	nfsstat4 stat;
2705 	fid_t fid;
2706 	int attrdir, dotdot, walk;
2707 	bool_t is_newvp = FALSE;
2708 
2709 	if (cs->vp->v_flag & V_XATTRDIR) {
2710 		attrdir = 1;
2711 		ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2712 	} else {
2713 		attrdir = 0;
2714 		ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2715 	}
2716 
2717 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2718 
2719 	/*
2720 	 * If dotdotting, then need to check whether it's
2721 	 * above the root of a filesystem, or above an
2722 	 * export point.
2723 	 */
2724 	if (dotdot) {
2725 		vnode_t *zone_rootvp;
2726 
2727 		ASSERT(cs->exi != NULL);
2728 		zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp;
2729 		/*
2730 		 * If dotdotting at the root of a filesystem, then
2731 		 * need to traverse back to the mounted-on filesystem
2732 		 * and do the dotdot lookup there.
2733 		 */
2734 		if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) {
2735 
2736 			/*
2737 			 * If at the system root, then can
2738 			 * go up no further.
2739 			 */
2740 			if (VN_CMP(cs->vp, zone_rootvp))
2741 				return (puterrno4(ENOENT));
2742 
2743 			/*
2744 			 * Traverse back to the mounted-on filesystem
2745 			 */
2746 			cs->vp = untraverse(cs->vp, zone_rootvp);
2747 
2748 			/*
2749 			 * Set the different_export flag so we remember
2750 			 * to pick up a new exportinfo entry for
2751 			 * this new filesystem.
2752 			 */
2753 			different_export = 1;
2754 		} else {
2755 
2756 			/*
2757 			 * If dotdotting above an export point then set
2758 			 * the different_export to get new export info.
2759 			 */
2760 			different_export = nfs_exported(cs->exi, cs->vp);
2761 		}
2762 	}
2763 
2764 	error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2765 	    NULL, NULL, NULL);
2766 	if (error)
2767 		return (puterrno4(error));
2768 
2769 	/*
2770 	 * If the vnode is in a pseudo filesystem, check whether it is visible.
2771 	 *
2772 	 * XXX if the vnode is a symlink and it is not visible in
2773 	 * a pseudo filesystem, return ENOENT (not following symlink).
2774 	 * V4 client can not mount such symlink. This is a regression
2775 	 * from V2/V3.
2776 	 *
2777 	 * In the same exported filesystem, if the security flavor used
2778 	 * is not an explicitly shared flavor, limit the view to the visible
2779 	 * list entries only. This is not a WRONGSEC case because it's already
2780 	 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2781 	 */
2782 	if (!different_export &&
2783 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2784 	    cs->access & CS_ACCESS_LIMITED)) {
2785 		if (! nfs_visible(cs->exi, vp, &different_export)) {
2786 			VN_RELE(vp);
2787 			return (puterrno4(ENOENT));
2788 		}
2789 	}
2790 
2791 	/*
2792 	 * If it's a mountpoint, then traverse it.
2793 	 */
2794 	if (vn_ismntpt(vp)) {
2795 		pre_exi = cs->exi;	/* save pre-traversed exportinfo */
2796 		pre_tvp = vp;		/* save pre-traversed vnode	*/
2797 
2798 		/*
2799 		 * hold pre_tvp to counteract rele by traverse.  We will
2800 		 * need pre_tvp below if checkexport4 fails
2801 		 */
2802 		VN_HOLD(pre_tvp);
2803 		if ((error = traverse(&vp)) != 0) {
2804 			VN_RELE(vp);
2805 			VN_RELE(pre_tvp);
2806 			return (puterrno4(error));
2807 		}
2808 		different_export = 1;
2809 	} else if (vp->v_vfsp != cs->vp->v_vfsp) {
2810 		/*
2811 		 * The vfsp comparison is to handle the case where
2812 		 * a LOFS mount is shared.  lo_lookup traverses mount points,
2813 		 * and NFS is unaware of local fs transistions because
2814 		 * v_vfsmountedhere isn't set.  For this special LOFS case,
2815 		 * the dir and the obj returned by lookup will have different
2816 		 * vfs ptrs.
2817 		 */
2818 		different_export = 1;
2819 	}
2820 
2821 	if (different_export) {
2822 
2823 		bzero(&fid, sizeof (fid));
2824 		fid.fid_len = MAXFIDSZ;
2825 		error = vop_fid_pseudo(vp, &fid);
2826 		if (error) {
2827 			VN_RELE(vp);
2828 			if (pre_tvp)
2829 				VN_RELE(pre_tvp);
2830 			return (puterrno4(error));
2831 		}
2832 
2833 		if (dotdot)
2834 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2835 		else
2836 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2837 
2838 		if (exi == NULL) {
2839 			if (pre_tvp) {
2840 				/*
2841 				 * If this vnode is a mounted-on vnode,
2842 				 * but the mounted-on file system is not
2843 				 * exported, send back the filehandle for
2844 				 * the mounted-on vnode, not the root of
2845 				 * the mounted-on file system.
2846 				 */
2847 				VN_RELE(vp);
2848 				vp = pre_tvp;
2849 				exi = pre_exi;
2850 			} else {
2851 				VN_RELE(vp);
2852 				return (puterrno4(EACCES));
2853 			}
2854 		} else if (pre_tvp) {
2855 			/* we're done with pre_tvp now. release extra hold */
2856 			VN_RELE(pre_tvp);
2857 		}
2858 
2859 		cs->exi = exi;
2860 
2861 		/*
2862 		 * Now we do a checkauth4. The reason is that
2863 		 * this client/user may not have access to the new
2864 		 * exported file system, and if they do,
2865 		 * the client/user may be mapped to a different uid.
2866 		 *
2867 		 * We start with a new cr, because the checkauth4 done
2868 		 * in the PUT*FH operation over wrote the cred's uid,
2869 		 * gid, etc, and we want the real thing before calling
2870 		 * checkauth4()
2871 		 */
2872 		crfree(cs->cr);
2873 		cs->cr = crdup(cs->basecr);
2874 
2875 		oldvp = cs->vp;
2876 		cs->vp = vp;
2877 		is_newvp = TRUE;
2878 
2879 		stat = call_checkauth4(cs, req);
2880 		if (stat != NFS4_OK) {
2881 			VN_RELE(cs->vp);
2882 			cs->vp = oldvp;
2883 			return (stat);
2884 		}
2885 	}
2886 
2887 	/*
2888 	 * After various NFS checks, do a label check on the path
2889 	 * component. The label on this path should either be the
2890 	 * global zone's label or a zone's label. We are only
2891 	 * interested in the zone's label because exported files
2892 	 * in global zone is accessible (though read-only) to
2893 	 * clients. The exportability/visibility check is already
2894 	 * done before reaching this code.
2895 	 */
2896 	if (is_system_labeled()) {
2897 		bslabel_t *clabel;
2898 
2899 		ASSERT(req->rq_label != NULL);
2900 		clabel = req->rq_label;
2901 		DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2902 		    "got client label from request(1)", struct svc_req *, req);
2903 
2904 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2905 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2906 			    cs->exi)) {
2907 				error = EACCES;
2908 				goto err_out;
2909 			}
2910 		} else {
2911 			/*
2912 			 * We grant access to admin_low label clients
2913 			 * only if the client is trusted, i.e. also
2914 			 * running Solaris Trusted Extension.
2915 			 */
2916 			struct sockaddr	*ca;
2917 			int		addr_type;
2918 			void		*ipaddr;
2919 			tsol_tpc_t	*tp;
2920 
2921 			ca = (struct sockaddr *)svc_getrpccaller(
2922 			    req->rq_xprt)->buf;
2923 			if (ca->sa_family == AF_INET) {
2924 				addr_type = IPV4_VERSION;
2925 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2926 			} else if (ca->sa_family == AF_INET6) {
2927 				addr_type = IPV6_VERSION;
2928 				ipaddr = &((struct sockaddr_in6 *)
2929 				    ca)->sin6_addr;
2930 			}
2931 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
2932 			if (tp == NULL || tp->tpc_tp.tp_doi !=
2933 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2934 			    SUN_CIPSO) {
2935 				if (tp != NULL)
2936 					TPC_RELE(tp);
2937 				error = EACCES;
2938 				goto err_out;
2939 			}
2940 			TPC_RELE(tp);
2941 		}
2942 	}
2943 
2944 	error = makefh4(&cs->fh, vp, cs->exi);
2945 
2946 err_out:
2947 	if (error) {
2948 		if (is_newvp) {
2949 			VN_RELE(cs->vp);
2950 			cs->vp = oldvp;
2951 		} else
2952 			VN_RELE(vp);
2953 		return (puterrno4(error));
2954 	}
2955 
2956 	if (!is_newvp) {
2957 		if (cs->vp)
2958 			VN_RELE(cs->vp);
2959 		cs->vp = vp;
2960 	} else if (oldvp)
2961 		VN_RELE(oldvp);
2962 
2963 	/*
2964 	 * if did lookup on attrdir and didn't lookup .., set named
2965 	 * attr fh flag
2966 	 */
2967 	if (attrdir && ! dotdot)
2968 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2969 
2970 	/* Assume false for now, open proc will set this */
2971 	cs->mandlock = FALSE;
2972 
2973 	return (NFS4_OK);
2974 }
2975 
2976 /* ARGSUSED */
2977 static void
rfs4_op_lookup(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2978 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2979     struct compound_state *cs)
2980 {
2981 	LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2982 	LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2983 	char *nm;
2984 	uint_t len;
2985 	struct sockaddr *ca;
2986 	char *name = NULL;
2987 	nfsstat4 status;
2988 
2989 	DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2990 	    LOOKUP4args *, args);
2991 
2992 	if (cs->vp == NULL) {
2993 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2994 		goto out;
2995 	}
2996 
2997 	if (cs->vp->v_type == VLNK) {
2998 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
2999 		goto out;
3000 	}
3001 
3002 	if (cs->vp->v_type != VDIR) {
3003 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3004 		goto out;
3005 	}
3006 
3007 	status = utf8_dir_verify(&args->objname);
3008 	if (status != NFS4_OK) {
3009 		*cs->statusp = resp->status = status;
3010 		goto out;
3011 	}
3012 
3013 	nm = utf8_to_str(&args->objname, &len, NULL);
3014 	if (nm == NULL) {
3015 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3016 		goto out;
3017 	}
3018 
3019 	if (len > MAXNAMELEN) {
3020 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3021 		kmem_free(nm, len);
3022 		goto out;
3023 	}
3024 
3025 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3026 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3027 	    MAXPATHLEN  + 1);
3028 
3029 	if (name == NULL) {
3030 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3031 		kmem_free(nm, len);
3032 		goto out;
3033 	}
3034 
3035 	*cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3036 
3037 	if (name != nm)
3038 		kmem_free(name, MAXPATHLEN + 1);
3039 	kmem_free(nm, len);
3040 
3041 out:
3042 	DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3043 	    LOOKUP4res *, resp);
3044 }
3045 
3046 /* ARGSUSED */
3047 static void
rfs4_op_lookupp(nfs_argop4 * args,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3048 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3049     struct compound_state *cs)
3050 {
3051 	LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3052 
3053 	DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3054 
3055 	if (cs->vp == NULL) {
3056 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3057 		goto out;
3058 	}
3059 
3060 	if (cs->vp->v_type != VDIR) {
3061 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3062 		goto out;
3063 	}
3064 
3065 	*cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3066 
3067 	/*
3068 	 * From NFSV4 Specification, LOOKUPP should not check for
3069 	 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3070 	 */
3071 	if (resp->status == NFS4ERR_WRONGSEC) {
3072 		*cs->statusp = resp->status = NFS4_OK;
3073 	}
3074 
3075 out:
3076 	DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3077 	    LOOKUPP4res *, resp);
3078 }
3079 
3080 
3081 /*ARGSUSED2*/
3082 static void
rfs4_op_openattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3083 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3084     struct compound_state *cs)
3085 {
3086 	OPENATTR4args	*args = &argop->nfs_argop4_u.opopenattr;
3087 	OPENATTR4res	*resp = &resop->nfs_resop4_u.opopenattr;
3088 	vnode_t		*avp = NULL;
3089 	int		lookup_flags = LOOKUP_XATTR, error;
3090 	int		exp_ro = 0;
3091 
3092 	DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3093 	    OPENATTR4args *, args);
3094 
3095 	if (cs->vp == NULL) {
3096 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3097 		goto out;
3098 	}
3099 
3100 	if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3101 	    !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3102 		*cs->statusp = resp->status = puterrno4(ENOTSUP);
3103 		goto out;
3104 	}
3105 
3106 	/*
3107 	 * If file system supports passing ACE mask to VOP_ACCESS then
3108 	 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3109 	 */
3110 
3111 	if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3112 		error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3113 		    V_ACE_MASK, cs->cr, NULL);
3114 	else
3115 		error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3116 		    (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3117 		    (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3118 
3119 	if (error) {
3120 		*cs->statusp = resp->status = puterrno4(EACCES);
3121 		goto out;
3122 	}
3123 
3124 	/*
3125 	 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3126 	 * the file system is exported read-only -- regardless of
3127 	 * createdir flag.  Otherwise the attrdir would be created
3128 	 * (assuming server fs isn't mounted readonly locally).  If
3129 	 * VOP_LOOKUP returns ENOENT in this case, the error will
3130 	 * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3131 	 * because specfs has no VOP_LOOKUP op, so the macro would
3132 	 * return ENOSYS.  EINVAL is returned by all (current)
3133 	 * Solaris file system implementations when any of their
3134 	 * restrictions are violated (xattr(dir) can't have xattrdir).
3135 	 * Returning NOTSUPP is more appropriate in this case
3136 	 * because the object will never be able to have an attrdir.
3137 	 */
3138 	if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3139 		lookup_flags |= CREATE_XATTR_DIR;
3140 
3141 	error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3142 	    NULL, NULL, NULL);
3143 
3144 	if (error) {
3145 		if (error == ENOENT && args->createdir && exp_ro)
3146 			*cs->statusp = resp->status = puterrno4(EROFS);
3147 		else if (error == EINVAL || error == ENOSYS)
3148 			*cs->statusp = resp->status = puterrno4(ENOTSUP);
3149 		else
3150 			*cs->statusp = resp->status = puterrno4(error);
3151 		goto out;
3152 	}
3153 
3154 	ASSERT(avp->v_flag & V_XATTRDIR);
3155 
3156 	error = makefh4(&cs->fh, avp, cs->exi);
3157 
3158 	if (error) {
3159 		VN_RELE(avp);
3160 		*cs->statusp = resp->status = puterrno4(error);
3161 		goto out;
3162 	}
3163 
3164 	VN_RELE(cs->vp);
3165 	cs->vp = avp;
3166 
3167 	/*
3168 	 * There is no requirement for an attrdir fh flag
3169 	 * because the attrdir has a vnode flag to distinguish
3170 	 * it from regular (non-xattr) directories.  The
3171 	 * FH4_ATTRDIR flag is set for future sanity checks.
3172 	 */
3173 	set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3174 	*cs->statusp = resp->status = NFS4_OK;
3175 
3176 out:
3177 	DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3178 	    OPENATTR4res *, resp);
3179 }
3180 
3181 static int
do_io(int direction,vnode_t * vp,struct uio * uio,int ioflag,cred_t * cred,caller_context_t * ct)3182 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3183     caller_context_t *ct)
3184 {
3185 	int error;
3186 	int i;
3187 	clock_t delaytime;
3188 
3189 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3190 
3191 	/*
3192 	 * Don't block on mandatory locks. If this routine returns
3193 	 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3194 	 */
3195 	uio->uio_fmode = FNONBLOCK;
3196 
3197 	for (i = 0; i < rfs4_maxlock_tries; i++) {
3198 
3199 
3200 		if (direction == FREAD) {
3201