xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs3_srv.c (revision dfdcac05)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2018 Nexenta Systems, Inc.
24  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2013 by Delphix. All rights reserved.
26  */
27 
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30 
31 
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/buf.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/uio.h>
40 #include <sys/errno.h>
41 #include <sys/sysmacros.h>
42 #include <sys/statvfs.h>
43 #include <sys/kmem.h>
44 #include <sys/dirent.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/systeminfo.h>
48 #include <sys/flock.h>
49 #include <sys/nbmlock.h>
50 #include <sys/policy.h>
51 #include <sys/sdt.h>
52 
53 #include <rpc/types.h>
54 #include <rpc/auth.h>
55 #include <rpc/svc.h>
56 #include <rpc/rpc_rdma.h>
57 
58 #include <nfs/nfs.h>
59 #include <nfs/export.h>
60 #include <nfs/nfs_cmd.h>
61 
62 #include <sys/strsubr.h>
63 #include <sys/tsol/label.h>
64 #include <sys/tsol/tndb.h>
65 
66 #include <sys/zone.h>
67 
68 #include <inet/ip.h>
69 #include <inet/ip6.h>
70 
71 /*
72  * Zone global variables of NFSv3 server
73  */
74 typedef struct nfs3_srv {
75 	writeverf3	write3verf;
76 } nfs3_srv_t;
77 
78 /*
79  * These are the interface routines for the server side of the
80  * Network File System.  See the NFS version 3 protocol specification
81  * for a description of this interface.
82  */
83 
84 static int	sattr3_to_vattr(sattr3 *, struct vattr *);
85 static int	vattr_to_fattr3(struct vattr *, fattr3 *);
86 static int	vattr_to_wcc_attr(struct vattr *, wcc_attr *);
87 static void	vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
88 static void	vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
89 static int	rdma_setup_read_data3(READ3args *, READ3resok *);
90 
91 extern int nfs_loaned_buffers;
92 
93 u_longlong_t nfs3_srv_caller_id;
94 
95 static nfs3_srv_t *
nfs3_get_srv(void)96 nfs3_get_srv(void)
97 {
98 	nfs_globals_t *ng = nfs_srv_getzg();
99 	nfs3_srv_t *srv = ng->nfs3_srv;
100 	ASSERT(srv != NULL);
101 	return (srv);
102 }
103 
104 /* ARGSUSED */
105 void
rfs3_getattr(GETATTR3args * args,GETATTR3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)106 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
107     struct svc_req *req, cred_t *cr, bool_t ro)
108 {
109 	int error;
110 	vnode_t *vp;
111 	struct vattr va;
112 
113 	vp = nfs3_fhtovp(&args->object, exi);
114 
115 	DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
116 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
117 	    GETATTR3args *, args);
118 
119 	if (vp == NULL) {
120 		error = ESTALE;
121 		goto out;
122 	}
123 
124 	va.va_mask = AT_ALL;
125 	error = rfs4_delegated_getattr(vp, &va, 0, cr);
126 
127 	if (!error) {
128 		/* Lie about the object type for a referral */
129 		if (vn_is_nfs_reparse(vp, cr))
130 			va.va_type = VLNK;
131 
132 		/* overflow error if time or size is out of range */
133 		error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
134 		if (error)
135 			goto out;
136 		resp->status = NFS3_OK;
137 
138 		DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
139 		    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
140 		    GETATTR3res *, resp);
141 
142 		VN_RELE(vp);
143 
144 		return;
145 	}
146 
147 out:
148 	if (curthread->t_flag & T_WOULDBLOCK) {
149 		curthread->t_flag &= ~T_WOULDBLOCK;
150 		resp->status = NFS3ERR_JUKEBOX;
151 	} else
152 		resp->status = puterrno3(error);
153 
154 	DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
155 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
156 	    GETATTR3res *, resp);
157 
158 	if (vp != NULL)
159 		VN_RELE(vp);
160 }
161 
162 void *
rfs3_getattr_getfh(GETATTR3args * args)163 rfs3_getattr_getfh(GETATTR3args *args)
164 {
165 
166 	return (&args->object);
167 }
168 
169 void
rfs3_setattr(SETATTR3args * args,SETATTR3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)170 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
171     struct svc_req *req, cred_t *cr, bool_t ro)
172 {
173 	int error;
174 	vnode_t *vp;
175 	struct vattr *bvap;
176 	struct vattr bva;
177 	struct vattr *avap;
178 	struct vattr ava;
179 	int flag;
180 	int in_crit = 0;
181 	struct flock64 bf;
182 	caller_context_t ct;
183 
184 	bvap = NULL;
185 	avap = NULL;
186 
187 	vp = nfs3_fhtovp(&args->object, exi);
188 
189 	DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
190 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
191 	    SETATTR3args *, args);
192 
193 	if (vp == NULL) {
194 		error = ESTALE;
195 		goto out;
196 	}
197 
198 	error = sattr3_to_vattr(&args->new_attributes, &ava);
199 	if (error)
200 		goto out;
201 
202 	if (is_system_labeled()) {
203 		bslabel_t *clabel = req->rq_label;
204 
205 		ASSERT(clabel != NULL);
206 		DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
207 		    "got client label from request(1)", struct svc_req *, req);
208 
209 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
210 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
211 			    exi)) {
212 				resp->status = NFS3ERR_ACCES;
213 				goto out1;
214 			}
215 		}
216 	}
217 
218 	/*
219 	 * We need to specially handle size changes because of
220 	 * possible conflicting NBMAND locks. Get into critical
221 	 * region before VOP_GETATTR, so the size attribute is
222 	 * valid when checking conflicts.
223 	 *
224 	 * Also, check to see if the v4 side of the server has
225 	 * delegated this file.  If so, then we return JUKEBOX to
226 	 * allow the client to retrasmit its request.
227 	 */
228 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
229 		if (nbl_need_check(vp)) {
230 			nbl_start_crit(vp, RW_READER);
231 			in_crit = 1;
232 		}
233 	}
234 
235 	bva.va_mask = AT_ALL;
236 	error = rfs4_delegated_getattr(vp, &bva, 0, cr);
237 
238 	/*
239 	 * If we can't get the attributes, then we can't do the
240 	 * right access checking.  So, we'll fail the request.
241 	 */
242 	if (error)
243 		goto out;
244 
245 	bvap = &bva;
246 
247 	if (rdonly(ro, vp)) {
248 		resp->status = NFS3ERR_ROFS;
249 		goto out1;
250 	}
251 
252 	if (args->guard.check &&
253 	    (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
254 	    args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
255 		resp->status = NFS3ERR_NOT_SYNC;
256 		goto out1;
257 	}
258 
259 	if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
260 		flag = ATTR_UTIME;
261 	else
262 		flag = 0;
263 
264 	/*
265 	 * If the filesystem is exported with nosuid, then mask off
266 	 * the setuid and setgid bits.
267 	 */
268 	if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
269 	    (exi->exi_export.ex_flags & EX_NOSUID))
270 		ava.va_mode &= ~(VSUID | VSGID);
271 
272 	ct.cc_sysid = 0;
273 	ct.cc_pid = 0;
274 	ct.cc_caller_id = nfs3_srv_caller_id;
275 	ct.cc_flags = CC_DONTBLOCK;
276 
277 	/*
278 	 * We need to specially handle size changes because it is
279 	 * possible for the client to create a file with modes
280 	 * which indicate read-only, but with the file opened for
281 	 * writing.  If the client then tries to set the size of
282 	 * the file, then the normal access checking done in
283 	 * VOP_SETATTR would prevent the client from doing so,
284 	 * although it should be legal for it to do so.  To get
285 	 * around this, we do the access checking for ourselves
286 	 * and then use VOP_SPACE which doesn't do the access
287 	 * checking which VOP_SETATTR does. VOP_SPACE can only
288 	 * operate on VREG files, let VOP_SETATTR handle the other
289 	 * extremely rare cases.
290 	 * Also the client should not be allowed to change the
291 	 * size of the file if there is a conflicting non-blocking
292 	 * mandatory lock in the region the change.
293 	 */
294 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
295 		if (in_crit) {
296 			u_offset_t offset;
297 			ssize_t length;
298 
299 			if (ava.va_size < bva.va_size) {
300 				offset = ava.va_size;
301 				length = bva.va_size - ava.va_size;
302 			} else {
303 				offset = bva.va_size;
304 				length = ava.va_size - bva.va_size;
305 			}
306 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
307 			    NULL)) {
308 				error = EACCES;
309 				goto out;
310 			}
311 		}
312 
313 		if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
314 			ava.va_mask &= ~AT_SIZE;
315 			bf.l_type = F_WRLCK;
316 			bf.l_whence = 0;
317 			bf.l_start = (off64_t)ava.va_size;
318 			bf.l_len = 0;
319 			bf.l_sysid = 0;
320 			bf.l_pid = 0;
321 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
322 			    (offset_t)ava.va_size, cr, &ct);
323 		}
324 	}
325 
326 	if (!error && ava.va_mask)
327 		error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
328 
329 	/* check if a monitor detected a delegation conflict */
330 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
331 		resp->status = NFS3ERR_JUKEBOX;
332 		goto out1;
333 	}
334 
335 	ava.va_mask = AT_ALL;
336 	avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
337 
338 	/*
339 	 * Force modified metadata out to stable storage.
340 	 */
341 	(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
342 
343 	if (error)
344 		goto out;
345 
346 	if (in_crit)
347 		nbl_end_crit(vp);
348 
349 	resp->status = NFS3_OK;
350 	vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
351 
352 	DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
353 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
354 	    SETATTR3res *, resp);
355 
356 	VN_RELE(vp);
357 
358 	return;
359 
360 out:
361 	if (curthread->t_flag & T_WOULDBLOCK) {
362 		curthread->t_flag &= ~T_WOULDBLOCK;
363 		resp->status = NFS3ERR_JUKEBOX;
364 	} else
365 		resp->status = puterrno3(error);
366 out1:
367 	DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
368 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
369 	    SETATTR3res *, resp);
370 
371 	if (vp != NULL) {
372 		if (in_crit)
373 			nbl_end_crit(vp);
374 		VN_RELE(vp);
375 	}
376 	vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
377 }
378 
379 void *
rfs3_setattr_getfh(SETATTR3args * args)380 rfs3_setattr_getfh(SETATTR3args *args)
381 {
382 
383 	return (&args->object);
384 }
385 
386 /* ARGSUSED */
387 void
rfs3_lookup(LOOKUP3args * args,LOOKUP3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)388 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
389     struct svc_req *req, cred_t *cr, bool_t ro)
390 {
391 	int error;
392 	vnode_t *vp;
393 	vnode_t *dvp;
394 	struct vattr *vap;
395 	struct vattr va;
396 	struct vattr *dvap;
397 	struct vattr dva;
398 	nfs_fh3 *fhp;
399 	struct sec_ol sec = {0, 0};
400 	bool_t publicfh_flag = FALSE, auth_weak = FALSE;
401 	struct sockaddr *ca;
402 	char *name = NULL;
403 
404 	dvap = NULL;
405 
406 	if (exi != NULL)
407 		exi_hold(exi);
408 
409 	/*
410 	 * Allow lookups from the root - the default
411 	 * location of the public filehandle.
412 	 */
413 	if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
414 		ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
415 		dvp = ZONE_ROOTVP();
416 		VN_HOLD(dvp);
417 
418 		DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
419 		    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
420 		    LOOKUP3args *, args);
421 	} else {
422 		dvp = nfs3_fhtovp(&args->what.dir, exi);
423 
424 		DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
425 		    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
426 		    LOOKUP3args *, args);
427 
428 		if (dvp == NULL) {
429 			error = ESTALE;
430 			goto out;
431 		}
432 	}
433 
434 	dva.va_mask = AT_ALL;
435 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
436 
437 	if (args->what.name == nfs3nametoolong) {
438 		resp->status = NFS3ERR_NAMETOOLONG;
439 		goto out1;
440 	}
441 
442 	if (args->what.name == NULL || *(args->what.name) == '\0') {
443 		resp->status = NFS3ERR_ACCES;
444 		goto out1;
445 	}
446 
447 	fhp = &args->what.dir;
448 	ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL */
449 	if (strcmp(args->what.name, "..") == 0 &&
450 	    EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
451 		if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
452 		    ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
453 			/*
454 			 * special case for ".." and 'nohide'exported root
455 			 */
456 			if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
457 				resp->status = NFS3ERR_ACCES;
458 				goto out1;
459 			}
460 		} else {
461 			resp->status = NFS3ERR_NOENT;
462 			goto out1;
463 		}
464 	}
465 
466 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
467 	name = nfscmd_convname(ca, exi, args->what.name,
468 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
469 
470 	if (name == NULL) {
471 		resp->status = NFS3ERR_ACCES;
472 		goto out1;
473 	}
474 
475 	/*
476 	 * If the public filehandle is used then allow
477 	 * a multi-component lookup
478 	 */
479 	if (PUBLIC_FH3(&args->what.dir)) {
480 		publicfh_flag = TRUE;
481 
482 		exi_rele(exi);
483 		exi = NULL;
484 
485 		error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
486 		    &exi, &sec);
487 
488 		/*
489 		 * Since WebNFS may bypass MOUNT, we need to ensure this
490 		 * request didn't come from an unlabeled admin_low client.
491 		 */
492 		if (is_system_labeled() && error == 0) {
493 			int		addr_type;
494 			void		*ipaddr;
495 			tsol_tpc_t	*tp;
496 
497 			if (ca->sa_family == AF_INET) {
498 				addr_type = IPV4_VERSION;
499 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
500 			} else if (ca->sa_family == AF_INET6) {
501 				addr_type = IPV6_VERSION;
502 				ipaddr = &((struct sockaddr_in6 *)
503 				    ca)->sin6_addr;
504 			}
505 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
506 			if (tp == NULL || tp->tpc_tp.tp_doi !=
507 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
508 			    SUN_CIPSO) {
509 				VN_RELE(vp);
510 				error = EACCES;
511 			}
512 			if (tp != NULL)
513 				TPC_RELE(tp);
514 		}
515 	} else {
516 		error = VOP_LOOKUP(dvp, name, &vp,
517 		    NULL, 0, NULL, cr, NULL, NULL, NULL);
518 	}
519 
520 	if (name != args->what.name)
521 		kmem_free(name, MAXPATHLEN + 1);
522 
523 	if (error == 0 && vn_ismntpt(vp)) {
524 		error = rfs_cross_mnt(&vp, &exi);
525 		if (error)
526 			VN_RELE(vp);
527 	}
528 
529 	if (is_system_labeled() && error == 0) {
530 		bslabel_t *clabel = req->rq_label;
531 
532 		ASSERT(clabel != NULL);
533 		DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
534 		    "got client label from request(1)", struct svc_req *, req);
535 
536 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
537 			if (!do_rfs_label_check(clabel, dvp,
538 			    DOMINANCE_CHECK, exi)) {
539 				VN_RELE(vp);
540 				error = EACCES;
541 			}
542 		}
543 	}
544 
545 	dva.va_mask = AT_ALL;
546 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
547 
548 	if (error)
549 		goto out;
550 
551 	if (sec.sec_flags & SEC_QUERY) {
552 		error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
553 	} else {
554 		error = makefh3(&resp->resok.object, vp, exi);
555 		if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
556 			auth_weak = TRUE;
557 	}
558 
559 	if (error) {
560 		VN_RELE(vp);
561 		goto out;
562 	}
563 
564 	va.va_mask = AT_ALL;
565 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
566 
567 	VN_RELE(vp);
568 
569 	resp->status = NFS3_OK;
570 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
571 	vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
572 
573 	/*
574 	 * If it's public fh, no 0x81, and client's flavor is
575 	 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
576 	 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
577 	 */
578 	if (auth_weak)
579 		resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
580 
581 	DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
582 	    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
583 	    LOOKUP3res *, resp);
584 	VN_RELE(dvp);
585 	exi_rele(exi);
586 
587 	return;
588 
589 out:
590 	if (curthread->t_flag & T_WOULDBLOCK) {
591 		curthread->t_flag &= ~T_WOULDBLOCK;
592 		resp->status = NFS3ERR_JUKEBOX;
593 	} else
594 		resp->status = puterrno3(error);
595 out1:
596 	DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
597 	    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
598 	    LOOKUP3res *, resp);
599 
600 	if (exi != NULL)
601 		exi_rele(exi);
602 
603 	if (dvp != NULL)
604 		VN_RELE(dvp);
605 	vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
606 
607 }
608 
609 void *
rfs3_lookup_getfh(LOOKUP3args * args)610 rfs3_lookup_getfh(LOOKUP3args *args)
611 {
612 
613 	return (&args->what.dir);
614 }
615 
616 /* ARGSUSED */
617 void
rfs3_access(ACCESS3args * args,ACCESS3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)618 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
619     struct svc_req *req, cred_t *cr, bool_t ro)
620 {
621 	int error;
622 	vnode_t *vp;
623 	struct vattr *vap;
624 	struct vattr va;
625 	int checkwriteperm;
626 	boolean_t dominant_label = B_FALSE;
627 	boolean_t equal_label = B_FALSE;
628 	boolean_t admin_low_client;
629 
630 	vap = NULL;
631 
632 	vp = nfs3_fhtovp(&args->object, exi);
633 
634 	DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
635 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
636 	    ACCESS3args *, args);
637 
638 	if (vp == NULL) {
639 		error = ESTALE;
640 		goto out;
641 	}
642 
643 	/*
644 	 * If the file system is exported read only, it is not appropriate
645 	 * to check write permissions for regular files and directories.
646 	 * Special files are interpreted by the client, so the underlying
647 	 * permissions are sent back to the client for interpretation.
648 	 */
649 	if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
650 		checkwriteperm = 0;
651 	else
652 		checkwriteperm = 1;
653 
654 	/*
655 	 * We need the mode so that we can correctly determine access
656 	 * permissions relative to a mandatory lock file.  Access to
657 	 * mandatory lock files is denied on the server, so it might
658 	 * as well be reflected to the server during the open.
659 	 */
660 	va.va_mask = AT_MODE;
661 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
662 	if (error)
663 		goto out;
664 
665 	vap = &va;
666 
667 	resp->resok.access = 0;
668 
669 	if (is_system_labeled()) {
670 		bslabel_t *clabel = req->rq_label;
671 
672 		ASSERT(clabel != NULL);
673 		DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
674 		    "got client label from request(1)", struct svc_req *, req);
675 
676 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
677 			if ((equal_label = do_rfs_label_check(clabel, vp,
678 			    EQUALITY_CHECK, exi)) == B_FALSE) {
679 				dominant_label = do_rfs_label_check(clabel,
680 				    vp, DOMINANCE_CHECK, exi);
681 			} else
682 				dominant_label = B_TRUE;
683 			admin_low_client = B_FALSE;
684 		} else
685 			admin_low_client = B_TRUE;
686 	}
687 
688 	if (args->access & ACCESS3_READ) {
689 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
690 		if (error) {
691 			if (curthread->t_flag & T_WOULDBLOCK)
692 				goto out;
693 		} else if (!MANDLOCK(vp, va.va_mode) &&
694 		    (!is_system_labeled() || admin_low_client ||
695 		    dominant_label))
696 			resp->resok.access |= ACCESS3_READ;
697 	}
698 	if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
699 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
700 		if (error) {
701 			if (curthread->t_flag & T_WOULDBLOCK)
702 				goto out;
703 		} else if (!is_system_labeled() || admin_low_client ||
704 		    dominant_label)
705 			resp->resok.access |= ACCESS3_LOOKUP;
706 	}
707 	if (checkwriteperm &&
708 	    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
709 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
710 		if (error) {
711 			if (curthread->t_flag & T_WOULDBLOCK)
712 				goto out;
713 		} else if (!MANDLOCK(vp, va.va_mode) &&
714 		    (!is_system_labeled() || admin_low_client || equal_label)) {
715 			resp->resok.access |=
716 			    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
717 		}
718 	}
719 	if (checkwriteperm &&
720 	    (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
721 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
722 		if (error) {
723 			if (curthread->t_flag & T_WOULDBLOCK)
724 				goto out;
725 		} else if (!is_system_labeled() || admin_low_client ||
726 		    equal_label)
727 			resp->resok.access |= ACCESS3_DELETE;
728 	}
729 	if (args->access & ACCESS3_EXECUTE) {
730 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
731 		if (error) {
732 			if (curthread->t_flag & T_WOULDBLOCK)
733 				goto out;
734 		} else if (!MANDLOCK(vp, va.va_mode) &&
735 		    (!is_system_labeled() || admin_low_client ||
736 		    dominant_label))
737 			resp->resok.access |= ACCESS3_EXECUTE;
738 	}
739 
740 	va.va_mask = AT_ALL;
741 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
742 
743 	resp->status = NFS3_OK;
744 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
745 
746 	DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
747 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
748 	    ACCESS3res *, resp);
749 
750 	VN_RELE(vp);
751 
752 	return;
753 
754 out:
755 	if (curthread->t_flag & T_WOULDBLOCK) {
756 		curthread->t_flag &= ~T_WOULDBLOCK;
757 		resp->status = NFS3ERR_JUKEBOX;
758 	} else
759 		resp->status = puterrno3(error);
760 	DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
761 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
762 	    ACCESS3res *, resp);
763 	if (vp != NULL)
764 		VN_RELE(vp);
765 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
766 }
767 
768 void *
rfs3_access_getfh(ACCESS3args * args)769 rfs3_access_getfh(ACCESS3args *args)
770 {
771 
772 	return (&args->object);
773 }
774 
775 /* ARGSUSED */
776 void
rfs3_readlink(READLINK3args * args,READLINK3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)777 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
778     struct svc_req *req, cred_t *cr, bool_t ro)
779 {
780 	int error;
781 	vnode_t *vp;
782 	struct vattr *vap;
783 	struct vattr va;
784 	struct iovec iov;
785 	struct uio uio;
786 	char *data;
787 	struct sockaddr *ca;
788 	char *name = NULL;
789 	int is_referral = 0;
790 
791 	vap = NULL;
792 
793 	vp = nfs3_fhtovp(&args->symlink, exi);
794 
795 	DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
796 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
797 	    READLINK3args *, args);
798 
799 	if (vp == NULL) {
800 		error = ESTALE;
801 		goto out;
802 	}
803 
804 	va.va_mask = AT_ALL;
805 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
806 	if (error)
807 		goto out;
808 
809 	vap = &va;
810 
811 	/* We lied about the object type for a referral */
812 	if (vn_is_nfs_reparse(vp, cr))
813 		is_referral = 1;
814 
815 	if (vp->v_type != VLNK && !is_referral) {
816 		resp->status = NFS3ERR_INVAL;
817 		goto out1;
818 	}
819 
820 	if (MANDLOCK(vp, va.va_mode)) {
821 		resp->status = NFS3ERR_ACCES;
822 		goto out1;
823 	}
824 
825 	if (is_system_labeled()) {
826 		bslabel_t *clabel = req->rq_label;
827 
828 		ASSERT(clabel != NULL);
829 		DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
830 		    "got client label from request(1)", struct svc_req *, req);
831 
832 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
833 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
834 			    exi)) {
835 				resp->status = NFS3ERR_ACCES;
836 				goto out1;
837 			}
838 		}
839 	}
840 
841 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
842 
843 	if (is_referral) {
844 		char *s;
845 		size_t strsz;
846 		kstat_named_t *stat = exi->exi_ne->ne_globals->svstat[NFS_V3];
847 
848 		/* Get an artificial symlink based on a referral */
849 		s = build_symlink(vp, cr, &strsz);
850 		stat[NFS_REFERLINKS].value.ui64++;
851 		DTRACE_PROBE2(nfs3serv__func__referral__reflink,
852 		    vnode_t *, vp, char *, s);
853 		if (s == NULL)
854 			error = EINVAL;
855 		else {
856 			error = 0;
857 			(void) strlcpy(data, s, MAXPATHLEN + 1);
858 			kmem_free(s, strsz);
859 		}
860 
861 	} else {
862 
863 		iov.iov_base = data;
864 		iov.iov_len = MAXPATHLEN;
865 		uio.uio_iov = &iov;
866 		uio.uio_iovcnt = 1;
867 		uio.uio_segflg = UIO_SYSSPACE;
868 		uio.uio_extflg = UIO_COPY_CACHED;
869 		uio.uio_loffset = 0;
870 		uio.uio_resid = MAXPATHLEN;
871 
872 		error = VOP_READLINK(vp, &uio, cr, NULL);
873 
874 		if (!error)
875 			*(data + MAXPATHLEN - uio.uio_resid) = '\0';
876 	}
877 
878 	va.va_mask = AT_ALL;
879 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
880 
881 	/* Lie about object type again just to be consistent */
882 	if (is_referral && vap != NULL)
883 		vap->va_type = VLNK;
884 
885 #if 0 /* notyet */
886 	/*
887 	 * Don't do this.  It causes local disk writes when just
888 	 * reading the file and the overhead is deemed larger
889 	 * than the benefit.
890 	 */
891 	/*
892 	 * Force modified metadata out to stable storage.
893 	 */
894 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
895 #endif
896 
897 	if (error) {
898 		kmem_free(data, MAXPATHLEN + 1);
899 		goto out;
900 	}
901 
902 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
903 	name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
904 	    MAXPATHLEN + 1);
905 
906 	if (name == NULL) {
907 		/*
908 		 * Even though the conversion failed, we return
909 		 * something. We just don't translate it.
910 		 */
911 		name = data;
912 	}
913 
914 	resp->status = NFS3_OK;
915 	vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
916 	resp->resok.data = name;
917 
918 	DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
919 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
920 	    READLINK3res *, resp);
921 	VN_RELE(vp);
922 
923 	if (name != data)
924 		kmem_free(data, MAXPATHLEN + 1);
925 
926 	return;
927 
928 out:
929 	if (curthread->t_flag & T_WOULDBLOCK) {
930 		curthread->t_flag &= ~T_WOULDBLOCK;
931 		resp->status = NFS3ERR_JUKEBOX;
932 	} else
933 		resp->status = puterrno3(error);
934 out1:
935 	DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
936 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
937 	    READLINK3res *, resp);
938 	if (vp != NULL)
939 		VN_RELE(vp);
940 	vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
941 }
942 
943 void *
rfs3_readlink_getfh(READLINK3args * args)944 rfs3_readlink_getfh(READLINK3args *args)
945 {
946 
947 	return (&args->symlink);
948 }
949 
950 void
rfs3_readlink_free(READLINK3res * resp)951 rfs3_readlink_free(READLINK3res *resp)
952 {
953 
954 	if (resp->status == NFS3_OK)
955 		kmem_free(resp->resok.data, MAXPATHLEN + 1);
956 }
957 
958 /*
959  * Server routine to handle read
960  * May handle RDMA data as well as mblks
961  */
962 /* ARGSUSED */
963 void
rfs3_read(READ3args * args,READ3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)964 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
965     struct svc_req *req, cred_t *cr, bool_t ro)
966 {
967 	int error;
968 	vnode_t *vp;
969 	struct vattr *vap;
970 	struct vattr va;
971 	struct iovec iov, *iovp = NULL;
972 	int iovcnt;
973 	struct uio uio;
974 	u_offset_t offset;
975 	mblk_t *mp = NULL;
976 	int in_crit = 0;
977 	int need_rwunlock = 0;
978 	caller_context_t ct;
979 	int rdma_used = 0;
980 	int loaned_buffers;
981 	struct uio *uiop;
982 
983 	vap = NULL;
984 
985 	vp = nfs3_fhtovp(&args->file, exi);
986 
987 	DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
988 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
989 	    READ3args *, args);
990 
991 
992 	if (vp == NULL) {
993 		error = ESTALE;
994 		goto out;
995 	}
996 
997 	if (args->wlist) {
998 		if (args->count > clist_len(args->wlist)) {
999 			error = EINVAL;
1000 			goto out;
1001 		}
1002 		rdma_used = 1;
1003 	}
1004 
1005 	/* use loaned buffers for TCP */
1006 	loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
1007 
1008 	if (is_system_labeled()) {
1009 		bslabel_t *clabel = req->rq_label;
1010 
1011 		ASSERT(clabel != NULL);
1012 		DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
1013 		    "got client label from request(1)", struct svc_req *, req);
1014 
1015 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1016 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
1017 			    exi)) {
1018 				resp->status = NFS3ERR_ACCES;
1019 				goto out1;
1020 			}
1021 		}
1022 	}
1023 
1024 	ct.cc_sysid = 0;
1025 	ct.cc_pid = 0;
1026 	ct.cc_caller_id = nfs3_srv_caller_id;
1027 	ct.cc_flags = CC_DONTBLOCK;
1028 
1029 	/*
1030 	 * Enter the critical region before calling VOP_RWLOCK
1031 	 * to avoid a deadlock with write requests.
1032 	 */
1033 	if (nbl_need_check(vp)) {
1034 		nbl_start_crit(vp, RW_READER);
1035 		in_crit = 1;
1036 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1037 		    NULL)) {
1038 			error = EACCES;
1039 			goto out;
1040 		}
1041 	}
1042 
1043 	error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1044 
1045 	/* check if a monitor detected a delegation conflict */
1046 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1047 		resp->status = NFS3ERR_JUKEBOX;
1048 		goto out1;
1049 	}
1050 
1051 	need_rwunlock = 1;
1052 
1053 	va.va_mask = AT_ALL;
1054 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1055 
1056 	/*
1057 	 * If we can't get the attributes, then we can't do the
1058 	 * right access checking.  So, we'll fail the request.
1059 	 */
1060 	if (error)
1061 		goto out;
1062 
1063 	vap = &va;
1064 
1065 	if (vp->v_type != VREG) {
1066 		resp->status = NFS3ERR_INVAL;
1067 		goto out1;
1068 	}
1069 
1070 	if (crgetuid(cr) != va.va_uid) {
1071 		error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1072 		if (error) {
1073 			if (curthread->t_flag & T_WOULDBLOCK)
1074 				goto out;
1075 			error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1076 			if (error)
1077 				goto out;
1078 		}
1079 	}
1080 
1081 	if (MANDLOCK(vp, va.va_mode)) {
1082 		resp->status = NFS3ERR_ACCES;
1083 		goto out1;
1084 	}
1085 
1086 	offset = args->offset;
1087 	if (offset >= va.va_size) {
1088 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1089 		if (in_crit)
1090 			nbl_end_crit(vp);
1091 		resp->status = NFS3_OK;
1092 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1093 		resp->resok.count = 0;
1094 		resp->resok.eof = TRUE;
1095 		resp->resok.data.data_len = 0;
1096 		resp->resok.data.data_val = NULL;
1097 		resp->resok.data.mp = NULL;
1098 		/* RDMA */
1099 		resp->resok.wlist = args->wlist;
1100 		resp->resok.wlist_len = resp->resok.count;
1101 		if (resp->resok.wlist)
1102 			clist_zero_len(resp->resok.wlist);
1103 		goto done;
1104 	}
1105 
1106 	if (args->count == 0) {
1107 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1108 		if (in_crit)
1109 			nbl_end_crit(vp);
1110 		resp->status = NFS3_OK;
1111 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1112 		resp->resok.count = 0;
1113 		resp->resok.eof = FALSE;
1114 		resp->resok.data.data_len = 0;
1115 		resp->resok.data.data_val = NULL;
1116 		resp->resok.data.mp = NULL;
1117 		/* RDMA */
1118 		resp->resok.wlist = args->wlist;
1119 		resp->resok.wlist_len = resp->resok.count;
1120 		if (resp->resok.wlist)
1121 			clist_zero_len(resp->resok.wlist);
1122 		goto done;
1123 	}
1124 
1125 	/*
1126 	 * do not allocate memory more the max. allowed
1127 	 * transfer size
1128 	 */
1129 	if (args->count > rfs3_tsize(req))
1130 		args->count = rfs3_tsize(req);
1131 
1132 	if (loaned_buffers) {
1133 		uiop = (uio_t *)rfs_setup_xuio(vp);
1134 		ASSERT(uiop != NULL);
1135 		uiop->uio_segflg = UIO_SYSSPACE;
1136 		uiop->uio_loffset = args->offset;
1137 		uiop->uio_resid = args->count;
1138 
1139 		/* Jump to do the read if successful */
1140 		if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1141 			/*
1142 			 * Need to hold the vnode until after VOP_RETZCBUF()
1143 			 * is called.
1144 			 */
1145 			VN_HOLD(vp);
1146 			goto doio_read;
1147 		}
1148 
1149 		DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1150 		    uiop->uio_loffset, int, uiop->uio_resid);
1151 
1152 		uiop->uio_extflg = 0;
1153 		/* failure to setup for zero copy */
1154 		rfs_free_xuio((void *)uiop);
1155 		loaned_buffers = 0;
1156 	}
1157 
1158 	/*
1159 	 * If returning data via RDMA Write, then grab the chunk list.
1160 	 * If we aren't returning READ data w/RDMA_WRITE, then grab
1161 	 * a mblk.
1162 	 */
1163 	if (rdma_used) {
1164 		(void) rdma_get_wchunk(req, &iov, args->wlist);
1165 		uio.uio_iov = &iov;
1166 		uio.uio_iovcnt = 1;
1167 	} else {
1168 		/*
1169 		 * mp will contain the data to be sent out in the read reply.
1170 		 * For UDP, this will be freed after the reply has been sent
1171 		 * out by the driver.  For TCP, it will be freed after the last
1172 		 * segment associated with the reply has been ACKed by the
1173 		 * client.
1174 		 */
1175 		mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1176 		uio.uio_iov = iovp;
1177 		uio.uio_iovcnt = iovcnt;
1178 	}
1179 
1180 	uio.uio_segflg = UIO_SYSSPACE;
1181 	uio.uio_extflg = UIO_COPY_CACHED;
1182 	uio.uio_loffset = args->offset;
1183 	uio.uio_resid = args->count;
1184 	uiop = &uio;
1185 
1186 doio_read:
1187 	error = VOP_READ(vp, uiop, 0, cr, &ct);
1188 
1189 	if (error) {
1190 		if (mp)
1191 			freemsg(mp);
1192 		/* check if a monitor detected a delegation conflict */
1193 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1194 			resp->status = NFS3ERR_JUKEBOX;
1195 			goto out1;
1196 		}
1197 		goto out;
1198 	}
1199 
1200 	/* make mblk using zc buffers */
1201 	if (loaned_buffers) {
1202 		mp = uio_to_mblk(uiop);
1203 		ASSERT(mp != NULL);
1204 	}
1205 
1206 	va.va_mask = AT_ALL;
1207 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1208 
1209 	if (error)
1210 		vap = NULL;
1211 	else
1212 		vap = &va;
1213 
1214 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1215 
1216 	if (in_crit)
1217 		nbl_end_crit(vp);
1218 
1219 	resp->status = NFS3_OK;
1220 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1221 	resp->resok.count = args->count - uiop->uio_resid;
1222 	if (!error && offset + resp->resok.count == va.va_size)
1223 		resp->resok.eof = TRUE;
1224 	else
1225 		resp->resok.eof = FALSE;
1226 	resp->resok.data.data_len = resp->resok.count;
1227 
1228 	if (mp)
1229 		rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1230 
1231 	resp->resok.data.mp = mp;
1232 	resp->resok.size = (uint_t)args->count;
1233 
1234 	if (rdma_used) {
1235 		resp->resok.data.data_val = (caddr_t)iov.iov_base;
1236 		if (!rdma_setup_read_data3(args, &(resp->resok))) {
1237 			resp->status = NFS3ERR_INVAL;
1238 		}
1239 	} else {
1240 		resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1241 		(resp->resok).wlist = NULL;
1242 	}
1243 
1244 done:
1245 	DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1246 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1247 	    READ3res *, resp);
1248 
1249 	VN_RELE(vp);
1250 
1251 	if (iovp != NULL)
1252 		kmem_free(iovp, iovcnt * sizeof (struct iovec));
1253 
1254 	return;
1255 
1256 out:
1257 	if (curthread->t_flag & T_WOULDBLOCK) {
1258 		curthread->t_flag &= ~T_WOULDBLOCK;
1259 		resp->status = NFS3ERR_JUKEBOX;
1260 	} else
1261 		resp->status = puterrno3(error);
1262 out1:
1263 	DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
1264 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1265 	    READ3res *, resp);
1266 
1267 	if (vp != NULL) {
1268 		if (need_rwunlock)
1269 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1270 		if (in_crit)
1271 			nbl_end_crit(vp);
1272 		VN_RELE(vp);
1273 	}
1274 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1275 
1276 	if (iovp != NULL)
1277 		kmem_free(iovp, iovcnt * sizeof (struct iovec));
1278 }
1279 
1280 void
rfs3_read_free(READ3res * resp)1281 rfs3_read_free(READ3res *resp)
1282 {
1283 	mblk_t *mp;
1284 
1285 	if (resp->status == NFS3_OK) {
1286 		mp = resp->resok.data.mp;
1287 		if (mp != NULL)
1288 			freemsg(mp);
1289 	}
1290 }
1291 
1292 void *
rfs3_read_getfh(READ3args * args)1293 rfs3_read_getfh(READ3args *args)
1294 {
1295 
1296 	return (&args->file);
1297 }
1298 
1299 #define	MAX_IOVECS	12
1300 
1301 #ifdef DEBUG
1302 static int rfs3_write_hits = 0;
1303 static int rfs3_write_misses = 0;
1304 #endif
1305 
1306 void
rfs3_write(WRITE3args * args,WRITE3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)1307 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1308     struct svc_req *req, cred_t *cr, bool_t ro)
1309 {
1310 	nfs3_srv_t *ns;
1311 	int error;
1312 	vnode_t *vp;
1313 	struct vattr *bvap = NULL;
1314 	struct vattr bva;
1315 	struct vattr *avap = NULL;
1316 	struct vattr ava;
1317 	u_offset_t rlimit;
1318 	struct uio uio;
1319 	struct iovec iov[MAX_IOVECS];
1320 	mblk_t *m;
1321 	struct iovec *iovp;
1322 	int iovcnt;
1323 	int ioflag;
1324 	cred_t *savecred;
1325 	int in_crit = 0;
1326 	int rwlock_ret = -1;
1327 	caller_context_t ct;
1328 
1329 	vp = nfs3_fhtovp(&args->file, exi);
1330 
1331 	DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
1332 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1333 	    WRITE3args *, args);
1334 
1335 	if (vp == NULL) {
1336 		error = ESTALE;
1337 		goto err;
1338 	}
1339 
1340 	ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL. */
1341 	ns = nfs3_get_srv();
1342 
1343 	if (is_system_labeled()) {
1344 		bslabel_t *clabel = req->rq_label;
1345 
1346 		ASSERT(clabel != NULL);
1347 		DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1348 		    "got client label from request(1)", struct svc_req *, req);
1349 
1350 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1351 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1352 			    exi)) {
1353 				resp->status = NFS3ERR_ACCES;
1354 				goto err1;
1355 			}
1356 		}
1357 	}
1358 
1359 	ct.cc_sysid = 0;
1360 	ct.cc_pid = 0;
1361 	ct.cc_caller_id = nfs3_srv_caller_id;
1362 	ct.cc_flags = CC_DONTBLOCK;
1363 
1364 	/*
1365 	 * We have to enter the critical region before calling VOP_RWLOCK
1366 	 * to avoid a deadlock with ufs.
1367 	 */
1368 	if (nbl_need_check(vp)) {
1369 		nbl_start_crit(vp, RW_READER);
1370 		in_crit = 1;
1371 		if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1372 		    NULL)) {
1373 			error = EACCES;
1374 			goto err;
1375 		}
1376 	}
1377 
1378 	rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1379 
1380 	/* check if a monitor detected a delegation conflict */
1381 	if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1382 		resp->status = NFS3ERR_JUKEBOX;
1383 		rwlock_ret = -1;
1384 		goto err1;
1385 	}
1386 
1387 
1388 	bva.va_mask = AT_ALL;
1389 	error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1390 
1391 	/*
1392 	 * If we can't get the attributes, then we can't do the
1393 	 * right access checking.  So, we'll fail the request.
1394 	 */
1395 	if (error)
1396 		goto err;
1397 
1398 	bvap = &bva;
1399 	avap = bvap;
1400 
1401 	if (args->count != args->data.data_len) {
1402 		resp->status = NFS3ERR_INVAL;
1403 		goto err1;
1404 	}
1405 
1406 	if (rdonly(ro, vp)) {
1407 		resp->status = NFS3ERR_ROFS;
1408 		goto err1;
1409 	}
1410 
1411 	if (vp->v_type != VREG) {
1412 		resp->status = NFS3ERR_INVAL;
1413 		goto err1;
1414 	}
1415 
1416 	if (crgetuid(cr) != bva.va_uid &&
1417 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1418 		goto err;
1419 
1420 	if (MANDLOCK(vp, bva.va_mode)) {
1421 		resp->status = NFS3ERR_ACCES;
1422 		goto err1;
1423 	}
1424 
1425 	if (args->count == 0) {
1426 		resp->status = NFS3_OK;
1427 		vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1428 		resp->resok.count = 0;
1429 		resp->resok.committed = args->stable;
1430 		resp->resok.verf = ns->write3verf;
1431 		goto out;
1432 	}
1433 
1434 	if (args->mblk != NULL) {
1435 		iovcnt = 0;
1436 		for (m = args->mblk; m != NULL; m = m->b_cont)
1437 			iovcnt++;
1438 		if (iovcnt <= MAX_IOVECS) {
1439 #ifdef DEBUG
1440 			rfs3_write_hits++;
1441 #endif
1442 			iovp = iov;
1443 		} else {
1444 #ifdef DEBUG
1445 			rfs3_write_misses++;
1446 #endif
1447 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1448 		}
1449 		mblk_to_iov(args->mblk, iovcnt, iovp);
1450 
1451 	} else if (args->rlist != NULL) {
1452 		iovcnt = 1;
1453 		iovp = iov;
1454 		iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1455 		iovp->iov_len = args->count;
1456 	} else {
1457 		iovcnt = 1;
1458 		iovp = iov;
1459 		iovp->iov_base = args->data.data_val;
1460 		iovp->iov_len = args->count;
1461 	}
1462 
1463 	uio.uio_iov = iovp;
1464 	uio.uio_iovcnt = iovcnt;
1465 
1466 	uio.uio_segflg = UIO_SYSSPACE;
1467 	uio.uio_extflg = UIO_COPY_DEFAULT;
1468 	uio.uio_loffset = args->offset;
1469 	uio.uio_resid = args->count;
1470 	uio.uio_llimit = curproc->p_fsz_ctl;
1471 	rlimit = uio.uio_llimit - args->offset;
1472 	if (rlimit < (u_offset_t)uio.uio_resid)
1473 		uio.uio_resid = (int)rlimit;
1474 
1475 	if (args->stable == UNSTABLE)
1476 		ioflag = 0;
1477 	else if (args->stable == FILE_SYNC)
1478 		ioflag = FSYNC;
1479 	else if (args->stable == DATA_SYNC)
1480 		ioflag = FDSYNC;
1481 	else {
1482 		if (iovp != iov)
1483 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
1484 		resp->status = NFS3ERR_INVAL;
1485 		goto err1;
1486 	}
1487 
1488 	/*
1489 	 * We're changing creds because VM may fault and we need
1490 	 * the cred of the current thread to be used if quota
1491 	 * checking is enabled.
1492 	 */
1493 	savecred = curthread->t_cred;
1494 	curthread->t_cred = cr;
1495 	error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1496 	curthread->t_cred = savecred;
1497 
1498 	if (iovp != iov)
1499 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
1500 
1501 	/* check if a monitor detected a delegation conflict */
1502 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1503 		resp->status = NFS3ERR_JUKEBOX;
1504 		goto err1;
1505 	}
1506 
1507 	ava.va_mask = AT_ALL;
1508 	avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1509 
1510 	if (error)
1511 		goto err;
1512 
1513 	/*
1514 	 * If we were unable to get the V_WRITELOCK_TRUE, then we
1515 	 * may not have accurate after attrs, so check if
1516 	 * we have both attributes, they have a non-zero va_seq, and
1517 	 * va_seq has changed by exactly one,
1518 	 * if not, turn off the before attr.
1519 	 */
1520 	if (rwlock_ret != V_WRITELOCK_TRUE) {
1521 		if (bvap == NULL || avap == NULL ||
1522 		    bvap->va_seq == 0 || avap->va_seq == 0 ||
1523 		    avap->va_seq != (bvap->va_seq + 1)) {
1524 			bvap = NULL;
1525 		}
1526 	}
1527 
1528 	resp->status = NFS3_OK;
1529 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1530 	resp->resok.count = args->count - uio.uio_resid;
1531 	resp->resok.committed = args->stable;
1532 	resp->resok.verf = ns->write3verf;
1533 	goto out;
1534 
1535 err:
1536 	if (curthread->t_flag & T_WOULDBLOCK) {
1537 		curthread->t_flag &= ~T_WOULDBLOCK;
1538 		resp->status = NFS3ERR_JUKEBOX;
1539 	} else
1540 		resp->status = puterrno3(error);
1541 err1:
1542 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1543 out:
1544 	DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
1545 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
1546 	    WRITE3res *, resp);
1547 
1548 	if (vp != NULL) {
1549 		if (rwlock_ret != -1)
1550 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1551 		if (in_crit)
1552 			nbl_end_crit(vp);
1553 		VN_RELE(vp);
1554 	}
1555 }
1556 
1557 void *
rfs3_write_getfh(WRITE3args * args)1558 rfs3_write_getfh(WRITE3args *args)
1559 {
1560 
1561 	return (&args->file);
1562 }
1563 
1564 void
rfs3_create(CREATE3args * args,CREATE3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)1565 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1566     struct svc_req *req, cred_t *cr, bool_t ro)
1567 {
1568 	int error;
1569 	int in_crit = 0;
1570 	vnode_t *vp;
1571 	vnode_t *tvp = NULL;
1572 	vnode_t *dvp;
1573 	struct vattr *vap;
1574 	struct vattr va;
1575 	struct vattr *dbvap;
1576 	struct vattr dbva;
1577 	struct vattr *davap;
1578 	struct vattr dava;
1579 	enum vcexcl excl;
1580 	nfstime3 *mtime;
1581 	len_t reqsize;
1582 	bool_t trunc;
1583 	struct sockaddr *ca;
1584 	char *name = NULL;
1585 
1586 	dbvap = NULL;
1587 	davap = NULL;
1588 
1589 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1590 
1591 	DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
1592 	    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1593 	    CREATE3args *, args);
1594 
1595 	if (dvp == NULL) {
1596 		error = ESTALE;
1597 		goto out;
1598 	}
1599 
1600 	dbva.va_mask = AT_ALL;
1601 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1602 	davap = dbvap;
1603 
1604 	if (args->where.name == nfs3nametoolong) {
1605 		resp->status = NFS3ERR_NAMETOOLONG;
1606 		goto out1;
1607 	}
1608 
1609 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1610 		resp->status = NFS3ERR_ACCES;
1611 		goto out1;
1612 	}
1613 
1614 	if (rdonly(ro, dvp)) {
1615 		resp->status = NFS3ERR_ROFS;
1616 		goto out1;
1617 	}
1618 
1619 	if (is_system_labeled()) {
1620 		bslabel_t *clabel = req->rq_label;
1621 
1622 		ASSERT(clabel != NULL);
1623 		DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1624 		    "got client label from request(1)", struct svc_req *, req);
1625 
1626 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1627 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1628 			    exi)) {
1629 				resp->status = NFS3ERR_ACCES;
1630 				goto out1;
1631 			}
1632 		}
1633 	}
1634 
1635 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1636 	name = nfscmd_convname(ca, exi, args->where.name,
1637 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1638 
1639 	if (name == NULL) {
1640 		/* This is really a Solaris EILSEQ */
1641 		resp->status = NFS3ERR_INVAL;
1642 		goto out1;
1643 	}
1644 
1645 	if (args->how.mode == EXCLUSIVE) {
1646 		va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1647 		va.va_type = VREG;
1648 		va.va_mode = (mode_t)0;
1649 		/*
1650 		 * Ensure no time overflows and that types match
1651 		 */
1652 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1653 		va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1654 		va.va_mtime.tv_nsec = mtime->nseconds;
1655 		excl = EXCL;
1656 	} else {
1657 		error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1658 		    &va);
1659 		if (error)
1660 			goto out;
1661 		va.va_mask |= AT_TYPE;
1662 		va.va_type = VREG;
1663 		if (args->how.mode == GUARDED)
1664 			excl = EXCL;
1665 		else {
1666 			excl = NONEXCL;
1667 
1668 			/*
1669 			 * During creation of file in non-exclusive mode
1670 			 * if size of file is being set then make sure
1671 			 * that if the file already exists that no conflicting
1672 			 * non-blocking mandatory locks exists in the region
1673 			 * being modified. If there are conflicting locks fail
1674 			 * the operation with EACCES.
1675 			 */
1676 			if (va.va_mask & AT_SIZE) {
1677 				struct vattr tva;
1678 
1679 				/*
1680 				 * Does file already exist?
1681 				 */
1682 				error = VOP_LOOKUP(dvp, name, &tvp,
1683 				    NULL, 0, NULL, cr, NULL, NULL, NULL);
1684 
1685 				/*
1686 				 * Check to see if the file has been delegated
1687 				 * to a v4 client.  If so, then begin recall of
1688 				 * the delegation and return JUKEBOX to allow
1689 				 * the client to retrasmit its request.
1690 				 */
1691 
1692 				trunc = va.va_size == 0;
1693 				if (!error &&
1694 				    rfs4_check_delegated(FWRITE, tvp, trunc)) {
1695 					resp->status = NFS3ERR_JUKEBOX;
1696 					goto out1;
1697 				}
1698 
1699 				/*
1700 				 * Check for NBMAND lock conflicts
1701 				 */
1702 				if (!error && nbl_need_check(tvp)) {
1703 					u_offset_t offset;
1704 					ssize_t len;
1705 
1706 					nbl_start_crit(tvp, RW_READER);
1707 					in_crit = 1;
1708 
1709 					tva.va_mask = AT_SIZE;
1710 					error = VOP_GETATTR(tvp, &tva, 0, cr,
1711 					    NULL);
1712 					/*
1713 					 * Can't check for conflicts, so return
1714 					 * error.
1715 					 */
1716 					if (error)
1717 						goto out;
1718 
1719 					offset = tva.va_size < va.va_size ?
1720 					    tva.va_size : va.va_size;
1721 					len = tva.va_size < va.va_size ?
1722 					    va.va_size - tva.va_size :
1723 					    tva.va_size - va.va_size;
1724 					if (nbl_conflict(tvp, NBL_WRITE,
1725 					    offset, len, 0, NULL)) {
1726 						error = EACCES;
1727 						goto out;
1728 					}
1729 				} else if (tvp) {
1730 					VN_RELE(tvp);
1731 					tvp = NULL;
1732 				}
1733 			}
1734 		}
1735 		if (va.va_mask & AT_SIZE)
1736 			reqsize = va.va_size;
1737 	}
1738 
1739 	/*
1740 	 * Must specify the mode.
1741 	 */
1742 	if (!(va.va_mask & AT_MODE)) {
1743 		resp->status = NFS3ERR_INVAL;
1744 		goto out1;
1745 	}
1746 
1747 	/*
1748 	 * If the filesystem is exported with nosuid, then mask off
1749 	 * the setuid and setgid bits.
1750 	 */
1751 	if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1752 		va.va_mode &= ~(VSUID | VSGID);
1753 
1754 tryagain:
1755 	/*
1756 	 * The file open mode used is VWRITE.  If the client needs
1757 	 * some other semantic, then it should do the access checking
1758 	 * itself.  It would have been nice to have the file open mode
1759 	 * passed as part of the arguments.
1760 	 */
1761 	error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1762 	    &vp, cr, 0, NULL, NULL);
1763 
1764 	dava.va_mask = AT_ALL;
1765 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1766 
1767 	if (error) {
1768 		/*
1769 		 * If we got something other than file already exists
1770 		 * then just return this error.  Otherwise, we got
1771 		 * EEXIST.  If we were doing a GUARDED create, then
1772 		 * just return this error.  Otherwise, we need to
1773 		 * make sure that this wasn't a duplicate of an
1774 		 * exclusive create request.
1775 		 *
1776 		 * The assumption is made that a non-exclusive create
1777 		 * request will never return EEXIST.
1778 		 */
1779 		if (error != EEXIST || args->how.mode == GUARDED)
1780 			goto out;
1781 		/*
1782 		 * Lookup the file so that we can get a vnode for it.
1783 		 */
1784 		error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1785 		    NULL, cr, NULL, NULL, NULL);
1786 		if (error) {
1787 			/*
1788 			 * We couldn't find the file that we thought that
1789 			 * we just created.  So, we'll just try creating
1790 			 * it again.
1791 			 */
1792 			if (error == ENOENT)
1793 				goto tryagain;
1794 			goto out;
1795 		}
1796 
1797 		/*
1798 		 * If the file is delegated to a v4 client, go ahead
1799 		 * and initiate recall, this create is a hint that a
1800 		 * conflicting v3 open has occurred.
1801 		 */
1802 
1803 		if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1804 			VN_RELE(vp);
1805 			resp->status = NFS3ERR_JUKEBOX;
1806 			goto out1;
1807 		}
1808 
1809 		va.va_mask = AT_ALL;
1810 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1811 
1812 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1813 		/* % with INT32_MAX to prevent overflows */
1814 		if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1815 		    vap->va_mtime.tv_sec !=
1816 		    (mtime->seconds % INT32_MAX) ||
1817 		    vap->va_mtime.tv_nsec != mtime->nseconds)) {
1818 			VN_RELE(vp);
1819 			error = EEXIST;
1820 			goto out;
1821 		}
1822 	} else {
1823 
1824 		if ((args->how.mode == UNCHECKED ||
1825 		    args->how.mode == GUARDED) &&
1826 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1827 		    va.va_size == 0)
1828 			trunc = TRUE;
1829 		else
1830 			trunc = FALSE;
1831 
1832 		if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1833 			VN_RELE(vp);
1834 			resp->status = NFS3ERR_JUKEBOX;
1835 			goto out1;
1836 		}
1837 
1838 		va.va_mask = AT_ALL;
1839 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1840 
1841 		/*
1842 		 * We need to check to make sure that the file got
1843 		 * created to the indicated size.  If not, we do a
1844 		 * setattr to try to change the size, but we don't
1845 		 * try too hard.  This shouldn't a problem as most
1846 		 * clients will only specifiy a size of zero which
1847 		 * local file systems handle.  However, even if
1848 		 * the client does specify a non-zero size, it can
1849 		 * still recover by checking the size of the file
1850 		 * after it has created it and then issue a setattr
1851 		 * request of its own to set the size of the file.
1852 		 */
1853 		if (vap != NULL &&
1854 		    (args->how.mode == UNCHECKED ||
1855 		    args->how.mode == GUARDED) &&
1856 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1857 		    vap->va_size != reqsize) {
1858 			va.va_mask = AT_SIZE;
1859 			va.va_size = reqsize;
1860 			(void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1861 			va.va_mask = AT_ALL;
1862 			vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1863 		}
1864 	}
1865 
1866 	if (name != args->where.name)
1867 		kmem_free(name, MAXPATHLEN + 1);
1868 
1869 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1870 	if (error)
1871 		resp->resok.obj.handle_follows = FALSE;
1872 	else
1873 		resp->resok.obj.handle_follows = TRUE;
1874 
1875 	/*
1876 	 * Force modified data and metadata out to stable storage.
1877 	 */
1878 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1879 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1880 
1881 	VN_RELE(vp);
1882 	if (tvp != NULL) {
1883 		if (in_crit)
1884 			nbl_end_crit(tvp);
1885 		VN_RELE(tvp);
1886 	}
1887 
1888 	resp->status = NFS3_OK;
1889 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1890 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1891 
1892 	DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1893 	    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1894 	    CREATE3res *, resp);
1895 
1896 	VN_RELE(dvp);
1897 	return;
1898 
1899 out:
1900 	if (curthread->t_flag & T_WOULDBLOCK) {
1901 		curthread->t_flag &= ~T_WOULDBLOCK;
1902 		resp->status = NFS3ERR_JUKEBOX;
1903 	} else
1904 		resp->status = puterrno3(error);
1905 out1:
1906 	DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
1907 	    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1908 	    CREATE3res *, resp);
1909 
1910 	if (name != NULL && name != args->where.name)
1911 		kmem_free(name, MAXPATHLEN + 1);
1912 
1913 	if (tvp != NULL) {
1914 		if (in_crit)
1915 			nbl_end_crit(tvp);
1916 		VN_RELE(tvp);
1917 	}
1918 	if (dvp != NULL)
1919 		VN_RELE(dvp);
1920 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1921 }
1922 
1923 void *
rfs3_create_getfh(CREATE3args * args)1924 rfs3_create_getfh(CREATE3args *args)
1925 {
1926 
1927 	return (&args->where.dir);
1928 }
1929 
1930 void
rfs3_mkdir(MKDIR3args * args,MKDIR3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)1931 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1932     struct svc_req *req, cred_t *cr, bool_t ro)
1933 {
1934 	int error;
1935 	vnode_t *vp = NULL;
1936 	vnode_t *dvp;
1937 	struct vattr *vap;
1938 	struct vattr va;
1939 	struct vattr *dbvap;
1940 	struct vattr dbva;
1941 	struct vattr *davap;
1942 	struct vattr dava;
1943 	struct sockaddr *ca;
1944 	char *name = NULL;
1945 
1946 	dbvap = NULL;
1947 	davap = NULL;
1948 
1949 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1950 
1951 	DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
1952 	    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
1953 	    MKDIR3args *, args);
1954 
1955 	if (dvp == NULL) {
1956 		error = ESTALE;
1957 		goto out;
1958 	}
1959 
1960 	dbva.va_mask = AT_ALL;
1961 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1962 	davap = dbvap;
1963 
1964 	if (args->where.name == nfs3nametoolong) {
1965 		resp->status = NFS3ERR_NAMETOOLONG;
1966 		goto out1;
1967 	}
1968 
1969 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1970 		resp->status = NFS3ERR_ACCES;
1971 		goto out1;
1972 	}
1973 
1974 	if (rdonly(ro, dvp)) {
1975 		resp->status = NFS3ERR_ROFS;
1976 		goto out1;
1977 	}
1978 
1979 	if (is_system_labeled()) {
1980 		bslabel_t *clabel = req->rq_label;
1981 
1982 		ASSERT(clabel != NULL);
1983 		DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1984 		    "got client label from request(1)", struct svc_req *, req);
1985 
1986 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1987 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1988 			    exi)) {
1989 				resp->status = NFS3ERR_ACCES;
1990 				goto out1;
1991 			}
1992 		}
1993 	}
1994 
1995 	error = sattr3_to_vattr(&args->attributes, &va);
1996 	if (error)
1997 		goto out;
1998 
1999 	if (!(va.va_mask & AT_MODE)) {
2000 		resp->status = NFS3ERR_INVAL;
2001 		goto out1;
2002 	}
2003 
2004 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2005 	name = nfscmd_convname(ca, exi, args->where.name,
2006 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2007 
2008 	if (name == NULL) {
2009 		resp->status = NFS3ERR_INVAL;
2010 		goto out1;
2011 	}
2012 
2013 	va.va_mask |= AT_TYPE;
2014 	va.va_type = VDIR;
2015 
2016 	error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2017 
2018 	if (name != args->where.name)
2019 		kmem_free(name, MAXPATHLEN + 1);
2020 
2021 	dava.va_mask = AT_ALL;
2022 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2023 
2024 	/*
2025 	 * Force modified data and metadata out to stable storage.
2026 	 */
2027 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2028 
2029 	if (error)
2030 		goto out;
2031 
2032 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2033 	if (error)
2034 		resp->resok.obj.handle_follows = FALSE;
2035 	else
2036 		resp->resok.obj.handle_follows = TRUE;
2037 
2038 	va.va_mask = AT_ALL;
2039 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2040 
2041 	/*
2042 	 * Force modified data and metadata out to stable storage.
2043 	 */
2044 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2045 
2046 	VN_RELE(vp);
2047 
2048 	resp->status = NFS3_OK;
2049 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2050 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2051 
2052 	DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2053 	    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2054 	    MKDIR3res *, resp);
2055 	VN_RELE(dvp);
2056 
2057 	return;
2058 
2059 out:
2060 	if (curthread->t_flag & T_WOULDBLOCK) {
2061 		curthread->t_flag &= ~T_WOULDBLOCK;
2062 		resp->status = NFS3ERR_JUKEBOX;
2063 	} else
2064 		resp->status = puterrno3(error);
2065 out1:
2066 	DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
2067 	    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2068 	    MKDIR3res *, resp);
2069 	if (dvp != NULL)
2070 		VN_RELE(dvp);
2071 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2072 }
2073 
2074 void *
rfs3_mkdir_getfh(MKDIR3args * args)2075 rfs3_mkdir_getfh(MKDIR3args *args)
2076 {
2077 
2078 	return (&args->where.dir);
2079 }
2080 
2081 void
rfs3_symlink(SYMLINK3args * args,SYMLINK3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2082 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2083     struct svc_req *req, cred_t *cr, bool_t ro)
2084 {
2085 	int error;
2086 	vnode_t *vp;
2087 	vnode_t *dvp;
2088 	struct vattr *vap;
2089 	struct vattr va;
2090 	struct vattr *dbvap;
2091 	struct vattr dbva;
2092 	struct vattr *davap;
2093 	struct vattr dava;
2094 	struct sockaddr *ca;
2095 	char *name = NULL;
2096 	char *symdata = NULL;
2097 
2098 	dbvap = NULL;
2099 	davap = NULL;
2100 
2101 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2102 
2103 	DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
2104 	    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2105 	    SYMLINK3args *, args);
2106 
2107 	if (dvp == NULL) {
2108 		error = ESTALE;
2109 		goto err;
2110 	}
2111 
2112 	dbva.va_mask = AT_ALL;
2113 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2114 	davap = dbvap;
2115 
2116 	if (args->where.name == nfs3nametoolong) {
2117 		resp->status = NFS3ERR_NAMETOOLONG;
2118 		goto err1;
2119 	}
2120 
2121 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2122 		resp->status = NFS3ERR_ACCES;
2123 		goto err1;
2124 	}
2125 
2126 	if (rdonly(ro, dvp)) {
2127 		resp->status = NFS3ERR_ROFS;
2128 		goto err1;
2129 	}
2130 
2131 	if (is_system_labeled()) {
2132 		bslabel_t *clabel = req->rq_label;
2133 
2134 		ASSERT(clabel != NULL);
2135 		DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2136 		    "got client label from request(1)", struct svc_req *, req);
2137 
2138 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2139 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2140 			    exi)) {
2141 				resp->status = NFS3ERR_ACCES;
2142 				goto err1;
2143 			}
2144 		}
2145 	}
2146 
2147 	error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2148 	if (error)
2149 		goto err;
2150 
2151 	if (!(va.va_mask & AT_MODE)) {
2152 		resp->status = NFS3ERR_INVAL;
2153 		goto err1;
2154 	}
2155 
2156 	if (args->symlink.symlink_data == nfs3nametoolong) {
2157 		resp->status = NFS3ERR_NAMETOOLONG;
2158 		goto err1;
2159 	}
2160 
2161 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2162 	name = nfscmd_convname(ca, exi, args->where.name,
2163 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2164 
2165 	if (name == NULL) {
2166 		/* This is really a Solaris EILSEQ */
2167 		resp->status = NFS3ERR_INVAL;
2168 		goto err1;
2169 	}
2170 
2171 	symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2172 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2173 	if (symdata == NULL) {
2174 		/* This is really a Solaris EILSEQ */
2175 		resp->status = NFS3ERR_INVAL;
2176 		goto err1;
2177 	}
2178 
2179 
2180 	va.va_mask |= AT_TYPE;
2181 	va.va_type = VLNK;
2182 
2183 	error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2184 
2185 	dava.va_mask = AT_ALL;
2186 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2187 
2188 	if (error)
2189 		goto err;
2190 
2191 	error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2192 	    NULL, NULL, NULL);
2193 
2194 	/*
2195 	 * Force modified data and metadata out to stable storage.
2196 	 */
2197 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2198 
2199 
2200 	resp->status = NFS3_OK;
2201 	if (error) {
2202 		resp->resok.obj.handle_follows = FALSE;
2203 		vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2204 		vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2205 		goto out;
2206 	}
2207 
2208 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2209 	if (error)
2210 		resp->resok.obj.handle_follows = FALSE;
2211 	else
2212 		resp->resok.obj.handle_follows = TRUE;
2213 
2214 	va.va_mask = AT_ALL;
2215 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2216 
2217 	/*
2218 	 * Force modified data and metadata out to stable storage.
2219 	 */
2220 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2221 
2222 	VN_RELE(vp);
2223 
2224 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2225 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2226 	goto out;
2227 
2228 err:
2229 	if (curthread->t_flag & T_WOULDBLOCK) {
2230 		curthread->t_flag &= ~T_WOULDBLOCK;
2231 		resp->status = NFS3ERR_JUKEBOX;
2232 	} else
2233 		resp->status = puterrno3(error);
2234 err1:
2235 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2236 out:
2237 	if (name != NULL && name != args->where.name)
2238 		kmem_free(name, MAXPATHLEN + 1);
2239 	if (symdata != NULL && symdata != args->symlink.symlink_data)
2240 		kmem_free(symdata, MAXPATHLEN + 1);
2241 
2242 	DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
2243 	    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2244 	    SYMLINK3res *, resp);
2245 
2246 	if (dvp != NULL)
2247 		VN_RELE(dvp);
2248 }
2249 
2250 void *
rfs3_symlink_getfh(SYMLINK3args * args)2251 rfs3_symlink_getfh(SYMLINK3args *args)
2252 {
2253 
2254 	return (&args->where.dir);
2255 }
2256 
2257 void
rfs3_mknod(MKNOD3args * args,MKNOD3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2258 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2259     struct svc_req *req, cred_t *cr, bool_t ro)
2260 {
2261 	int error;
2262 	vnode_t *vp;
2263 	vnode_t *realvp;
2264 	vnode_t *dvp;
2265 	struct vattr *vap;
2266 	struct vattr va;
2267 	struct vattr *dbvap;
2268 	struct vattr dbva;
2269 	struct vattr *davap;
2270 	struct vattr dava;
2271 	int mode;
2272 	enum vcexcl excl;
2273 	struct sockaddr *ca;
2274 	char *name = NULL;
2275 
2276 	dbvap = NULL;
2277 	davap = NULL;
2278 
2279 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2280 
2281 	DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
2282 	    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2283 	    MKNOD3args *, args);
2284 
2285 	if (dvp == NULL) {
2286 		error = ESTALE;
2287 		goto out;
2288 	}
2289 
2290 	dbva.va_mask = AT_ALL;
2291 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2292 	davap = dbvap;
2293 
2294 	if (args->where.name == nfs3nametoolong) {
2295 		resp->status = NFS3ERR_NAMETOOLONG;
2296 		goto out1;
2297 	}
2298 
2299 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2300 		resp->status = NFS3ERR_ACCES;
2301 		goto out1;
2302 	}
2303 
2304 	if (rdonly(ro, dvp)) {
2305 		resp->status = NFS3ERR_ROFS;
2306 		goto out1;
2307 	}
2308 
2309 	if (is_system_labeled()) {
2310 		bslabel_t *clabel = req->rq_label;
2311 
2312 		ASSERT(clabel != NULL);
2313 		DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2314 		    "got client label from request(1)", struct svc_req *, req);
2315 
2316 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2317 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2318 			    exi)) {
2319 				resp->status = NFS3ERR_ACCES;
2320 				goto out1;
2321 			}
2322 		}
2323 	}
2324 
2325 	switch (args->what.type) {
2326 	case NF3CHR:
2327 	case NF3BLK:
2328 		error = sattr3_to_vattr(
2329 		    &args->what.mknoddata3_u.device.dev_attributes, &va);
2330 		if (error)
2331 			goto out;
2332 		if (secpolicy_sys_devices(cr) != 0) {
2333 			resp->status = NFS3ERR_PERM;
2334 			goto out1;
2335 		}
2336 		if (args->what.type == NF3CHR)
2337 			va.va_type = VCHR;
2338 		else
2339 			va.va_type = VBLK;
2340 		va.va_rdev = makedevice(
2341 		    args->what.mknoddata3_u.device.spec.specdata1,
2342 		    args->what.mknoddata3_u.device.spec.specdata2);
2343 		va.va_mask |= AT_TYPE | AT_RDEV;
2344 		break;
2345 	case NF3SOCK:
2346 		error = sattr3_to_vattr(
2347 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2348 		if (error)
2349 			goto out;
2350 		va.va_type = VSOCK;
2351 		va.va_mask |= AT_TYPE;
2352 		break;
2353 	case NF3FIFO:
2354 		error = sattr3_to_vattr(
2355 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2356 		if (error)
2357 			goto out;
2358 		va.va_type = VFIFO;
2359 		va.va_mask |= AT_TYPE;
2360 		break;
2361 	default:
2362 		resp->status = NFS3ERR_BADTYPE;
2363 		goto out1;
2364 	}
2365 
2366 	/*
2367 	 * Must specify the mode.
2368 	 */
2369 	if (!(va.va_mask & AT_MODE)) {
2370 		resp->status = NFS3ERR_INVAL;
2371 		goto out1;
2372 	}
2373 
2374 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2375 	name = nfscmd_convname(ca, exi, args->where.name,
2376 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2377 
2378 	if (name == NULL) {
2379 		resp->status = NFS3ERR_INVAL;
2380 		goto out1;
2381 	}
2382 
2383 	excl = EXCL;
2384 
2385 	mode = 0;
2386 
2387 	error = VOP_CREATE(dvp, name, &va, excl, mode,
2388 	    &vp, cr, 0, NULL, NULL);
2389 
2390 	if (name != args->where.name)
2391 		kmem_free(name, MAXPATHLEN + 1);
2392 
2393 	dava.va_mask = AT_ALL;
2394 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2395 
2396 	/*
2397 	 * Force modified data and metadata out to stable storage.
2398 	 */
2399 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2400 
2401 	if (error)
2402 		goto out;
2403 
2404 	resp->status = NFS3_OK;
2405 
2406 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2407 	if (error)
2408 		resp->resok.obj.handle_follows = FALSE;
2409 	else
2410 		resp->resok.obj.handle_follows = TRUE;
2411 
2412 	va.va_mask = AT_ALL;
2413 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2414 
2415 	/*
2416 	 * Force modified metadata out to stable storage.
2417 	 *
2418 	 * if a underlying vp exists, pass it to VOP_FSYNC
2419 	 */
2420 	if (VOP_REALVP(vp, &realvp, NULL) == 0)
2421 		(void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2422 	else
2423 		(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2424 
2425 	VN_RELE(vp);
2426 
2427 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2428 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2429 	DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2430 	    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2431 	    MKNOD3res *, resp);
2432 	VN_RELE(dvp);
2433 	return;
2434 
2435 out:
2436 	if (curthread->t_flag & T_WOULDBLOCK) {
2437 		curthread->t_flag &= ~T_WOULDBLOCK;
2438 		resp->status = NFS3ERR_JUKEBOX;
2439 	} else
2440 		resp->status = puterrno3(error);
2441 out1:
2442 	DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
2443 	    cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
2444 	    MKNOD3res *, resp);
2445 	if (dvp != NULL)
2446 		VN_RELE(dvp);
2447 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2448 }
2449 
2450 void *
rfs3_mknod_getfh(MKNOD3args * args)2451 rfs3_mknod_getfh(MKNOD3args *args)
2452 {
2453 
2454 	return (&args->where.dir);
2455 }
2456 
2457 void
rfs3_remove(REMOVE3args * args,REMOVE3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2458 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2459     struct svc_req *req, cred_t *cr, bool_t ro)
2460 {
2461 	int error = 0;
2462 	vnode_t *vp;
2463 	struct vattr *bvap;
2464 	struct vattr bva;
2465 	struct vattr *avap;
2466 	struct vattr ava;
2467 	vnode_t *targvp = NULL;
2468 	struct sockaddr *ca;
2469 	char *name = NULL;
2470 
2471 	bvap = NULL;
2472 	avap = NULL;
2473 
2474 	vp = nfs3_fhtovp(&args->object.dir, exi);
2475 
2476 	DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
2477 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2478 	    REMOVE3args *, args);
2479 
2480 	if (vp == NULL) {
2481 		error = ESTALE;
2482 		goto err;
2483 	}
2484 
2485 	bva.va_mask = AT_ALL;
2486 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2487 	avap = bvap;
2488 
2489 	if (vp->v_type != VDIR) {
2490 		resp->status = NFS3ERR_NOTDIR;
2491 		goto err1;
2492 	}
2493 
2494 	if (args->object.name == nfs3nametoolong) {
2495 		resp->status = NFS3ERR_NAMETOOLONG;
2496 		goto err1;
2497 	}
2498 
2499 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2500 		resp->status = NFS3ERR_ACCES;
2501 		goto err1;
2502 	}
2503 
2504 	if (rdonly(ro, vp)) {
2505 		resp->status = NFS3ERR_ROFS;
2506 		goto err1;
2507 	}
2508 
2509 	if (is_system_labeled()) {
2510 		bslabel_t *clabel = req->rq_label;
2511 
2512 		ASSERT(clabel != NULL);
2513 		DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2514 		    "got client label from request(1)", struct svc_req *, req);
2515 
2516 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2517 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2518 			    exi)) {
2519 				resp->status = NFS3ERR_ACCES;
2520 				goto err1;
2521 			}
2522 		}
2523 	}
2524 
2525 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2526 	name = nfscmd_convname(ca, exi, args->object.name,
2527 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2528 
2529 	if (name == NULL) {
2530 		resp->status = NFS3ERR_INVAL;
2531 		goto err1;
2532 	}
2533 
2534 	/*
2535 	 * Check for a conflict with a non-blocking mandatory share
2536 	 * reservation and V4 delegations
2537 	 */
2538 	error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2539 	    NULL, cr, NULL, NULL, NULL);
2540 	if (error != 0)
2541 		goto err;
2542 
2543 	if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2544 		resp->status = NFS3ERR_JUKEBOX;
2545 		goto err1;
2546 	}
2547 
2548 	if (!nbl_need_check(targvp)) {
2549 		error = VOP_REMOVE(vp, name, cr, NULL, 0);
2550 	} else {
2551 		nbl_start_crit(targvp, RW_READER);
2552 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2553 			error = EACCES;
2554 		} else {
2555 			error = VOP_REMOVE(vp, name, cr, NULL, 0);
2556 		}
2557 		nbl_end_crit(targvp);
2558 	}
2559 	VN_RELE(targvp);
2560 	targvp = NULL;
2561 
2562 	ava.va_mask = AT_ALL;
2563 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2564 
2565 	/*
2566 	 * Force modified data and metadata out to stable storage.
2567 	 */
2568 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2569 
2570 	if (error)
2571 		goto err;
2572 
2573 	resp->status = NFS3_OK;
2574 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2575 	goto out;
2576 
2577 err:
2578 	if (curthread->t_flag & T_WOULDBLOCK) {
2579 		curthread->t_flag &= ~T_WOULDBLOCK;
2580 		resp->status = NFS3ERR_JUKEBOX;
2581 	} else
2582 		resp->status = puterrno3(error);
2583 err1:
2584 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2585 out:
2586 	DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
2587 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2588 	    REMOVE3res *, resp);
2589 
2590 	if (name != NULL && name != args->object.name)
2591 		kmem_free(name, MAXPATHLEN + 1);
2592 
2593 	if (vp != NULL)
2594 		VN_RELE(vp);
2595 }
2596 
2597 void *
rfs3_remove_getfh(REMOVE3args * args)2598 rfs3_remove_getfh(REMOVE3args *args)
2599 {
2600 
2601 	return (&args->object.dir);
2602 }
2603 
2604 void
rfs3_rmdir(RMDIR3args * args,RMDIR3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2605 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2606     struct svc_req *req, cred_t *cr, bool_t ro)
2607 {
2608 	int error;
2609 	vnode_t *vp;
2610 	struct vattr *bvap;
2611 	struct vattr bva;
2612 	struct vattr *avap;
2613 	struct vattr ava;
2614 	struct sockaddr *ca;
2615 	char *name = NULL;
2616 
2617 	bvap = NULL;
2618 	avap = NULL;
2619 
2620 	vp = nfs3_fhtovp(&args->object.dir, exi);
2621 
2622 	DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
2623 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2624 	    RMDIR3args *, args);
2625 
2626 	if (vp == NULL) {
2627 		error = ESTALE;
2628 		goto err;
2629 	}
2630 
2631 	bva.va_mask = AT_ALL;
2632 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2633 	avap = bvap;
2634 
2635 	if (vp->v_type != VDIR) {
2636 		resp->status = NFS3ERR_NOTDIR;
2637 		goto err1;
2638 	}
2639 
2640 	if (args->object.name == nfs3nametoolong) {
2641 		resp->status = NFS3ERR_NAMETOOLONG;
2642 		goto err1;
2643 	}
2644 
2645 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2646 		resp->status = NFS3ERR_ACCES;
2647 		goto err1;
2648 	}
2649 
2650 	if (rdonly(ro, vp)) {
2651 		resp->status = NFS3ERR_ROFS;
2652 		goto err1;
2653 	}
2654 
2655 	if (is_system_labeled()) {
2656 		bslabel_t *clabel = req->rq_label;
2657 
2658 		ASSERT(clabel != NULL);
2659 		DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2660 		    "got client label from request(1)", struct svc_req *, req);
2661 
2662 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2663 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2664 			    exi)) {
2665 				resp->status = NFS3ERR_ACCES;
2666 				goto err1;
2667 			}
2668 		}
2669 	}
2670 
2671 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2672 	name = nfscmd_convname(ca, exi, args->object.name,
2673 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2674 
2675 	if (name == NULL) {
2676 		resp->status = NFS3ERR_INVAL;
2677 		goto err1;
2678 	}
2679 
2680 	ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
2681 	error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
2682 
2683 	if (name != args->object.name)
2684 		kmem_free(name, MAXPATHLEN + 1);
2685 
2686 	ava.va_mask = AT_ALL;
2687 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2688 
2689 	/*
2690 	 * Force modified data and metadata out to stable storage.
2691 	 */
2692 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2693 
2694 	if (error) {
2695 		/*
2696 		 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2697 		 * if the directory is not empty.  A System V NFS server
2698 		 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2699 		 * over the wire.
2700 		 */
2701 		if (error == EEXIST)
2702 			error = ENOTEMPTY;
2703 		goto err;
2704 	}
2705 
2706 	resp->status = NFS3_OK;
2707 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2708 	goto out;
2709 
2710 err:
2711 	if (curthread->t_flag & T_WOULDBLOCK) {
2712 		curthread->t_flag &= ~T_WOULDBLOCK;
2713 		resp->status = NFS3ERR_JUKEBOX;
2714 	} else
2715 		resp->status = puterrno3(error);
2716 err1:
2717 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2718 out:
2719 	DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
2720 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2721 	    RMDIR3res *, resp);
2722 	if (vp != NULL)
2723 		VN_RELE(vp);
2724 
2725 }
2726 
2727 void *
rfs3_rmdir_getfh(RMDIR3args * args)2728 rfs3_rmdir_getfh(RMDIR3args *args)
2729 {
2730 
2731 	return (&args->object.dir);
2732 }
2733 
2734 void
rfs3_rename(RENAME3args * args,RENAME3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2735 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2736     struct svc_req *req, cred_t *cr, bool_t ro)
2737 {
2738 	int error = 0;
2739 	vnode_t *fvp;
2740 	vnode_t *tvp;
2741 	vnode_t *targvp;
2742 	struct vattr *fbvap;
2743 	struct vattr fbva;
2744 	struct vattr *favap;
2745 	struct vattr fava;
2746 	struct vattr *tbvap;
2747 	struct vattr tbva;
2748 	struct vattr *tavap;
2749 	struct vattr tava;
2750 	nfs_fh3 *fh3;
2751 	struct exportinfo *to_exi;
2752 	vnode_t *srcvp = NULL;
2753 	bslabel_t *clabel;
2754 	struct sockaddr *ca;
2755 	char *name = NULL;
2756 	char *toname = NULL;
2757 
2758 	fbvap = NULL;
2759 	favap = NULL;
2760 	tbvap = NULL;
2761 	tavap = NULL;
2762 	tvp = NULL;
2763 
2764 	fvp = nfs3_fhtovp(&args->from.dir, exi);
2765 
2766 	DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
2767 	    cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2768 	    RENAME3args *, args);
2769 
2770 	if (fvp == NULL) {
2771 		error = ESTALE;
2772 		goto err;
2773 	}
2774 
2775 	if (is_system_labeled()) {
2776 		clabel = req->rq_label;
2777 		ASSERT(clabel != NULL);
2778 		DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2779 		    "got client label from request(1)", struct svc_req *, req);
2780 
2781 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2782 			if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2783 			    exi)) {
2784 				resp->status = NFS3ERR_ACCES;
2785 				goto err1;
2786 			}
2787 		}
2788 	}
2789 
2790 	fbva.va_mask = AT_ALL;
2791 	fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2792 	favap = fbvap;
2793 
2794 	fh3 = &args->to.dir;
2795 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2796 	if (to_exi == NULL) {
2797 		resp->status = NFS3ERR_ACCES;
2798 		goto err1;
2799 	}
2800 	exi_rele(to_exi);
2801 
2802 	if (to_exi != exi) {
2803 		resp->status = NFS3ERR_XDEV;
2804 		goto err1;
2805 	}
2806 
2807 	tvp = nfs3_fhtovp(&args->to.dir, exi);
2808 	if (tvp == NULL) {
2809 		error = ESTALE;
2810 		goto err;
2811 	}
2812 
2813 	tbva.va_mask = AT_ALL;
2814 	tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2815 	tavap = tbvap;
2816 
2817 	if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2818 		resp->status = NFS3ERR_NOTDIR;
2819 		goto err1;
2820 	}
2821 
2822 	if (args->from.name == nfs3nametoolong ||
2823 	    args->to.name == nfs3nametoolong) {
2824 		resp->status = NFS3ERR_NAMETOOLONG;
2825 		goto err1;
2826 	}
2827 	if (args->from.name == NULL || *(args->from.name) == '\0' ||
2828 	    args->to.name == NULL || *(args->to.name) == '\0') {
2829 		resp->status = NFS3ERR_ACCES;
2830 		goto err1;
2831 	}
2832 
2833 	if (rdonly(ro, tvp)) {
2834 		resp->status = NFS3ERR_ROFS;
2835 		goto err1;
2836 	}
2837 
2838 	if (is_system_labeled()) {
2839 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2840 			if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2841 			    exi)) {
2842 				resp->status = NFS3ERR_ACCES;
2843 				goto err1;
2844 			}
2845 		}
2846 	}
2847 
2848 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2849 	name = nfscmd_convname(ca, exi, args->from.name,
2850 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2851 
2852 	if (name == NULL) {
2853 		resp->status = NFS3ERR_INVAL;
2854 		goto err1;
2855 	}
2856 
2857 	toname = nfscmd_convname(ca, exi, args->to.name,
2858 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2859 
2860 	if (toname == NULL) {
2861 		resp->status = NFS3ERR_INVAL;
2862 		goto err1;
2863 	}
2864 
2865 	/*
2866 	 * Check for a conflict with a non-blocking mandatory share
2867 	 * reservation or V4 delegations.
2868 	 */
2869 	error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2870 	    NULL, cr, NULL, NULL, NULL);
2871 	if (error != 0)
2872 		goto err;
2873 
2874 	/*
2875 	 * If we rename a delegated file we should recall the
2876 	 * delegation, since future opens should fail or would
2877 	 * refer to a new file.
2878 	 */
2879 	if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2880 		resp->status = NFS3ERR_JUKEBOX;
2881 		goto err1;
2882 	}
2883 
2884 	/*
2885 	 * Check for renaming over a delegated file.  Check nfs4_deleg_policy
2886 	 * first to avoid VOP_LOOKUP if possible.
2887 	 */
2888 	if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
2889 	    VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2890 	    NULL, NULL, NULL) == 0) {
2891 
2892 		if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2893 			VN_RELE(targvp);
2894 			resp->status = NFS3ERR_JUKEBOX;
2895 			goto err1;
2896 		}
2897 		VN_RELE(targvp);
2898 	}
2899 
2900 	if (!nbl_need_check(srcvp)) {
2901 		error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2902 	} else {
2903 		nbl_start_crit(srcvp, RW_READER);
2904 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2905 			error = EACCES;
2906 		else
2907 			error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2908 		nbl_end_crit(srcvp);
2909 	}
2910 	if (error == 0)
2911 		vn_renamepath(tvp, srcvp, args->to.name,
2912 		    strlen(args->to.name));
2913 	VN_RELE(srcvp);
2914 	srcvp = NULL;
2915 
2916 	fava.va_mask = AT_ALL;
2917 	favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2918 	tava.va_mask = AT_ALL;
2919 	tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2920 
2921 	/*
2922 	 * Force modified data and metadata out to stable storage.
2923 	 */
2924 	(void) VOP_FSYNC(fvp, 0, cr, NULL);
2925 	(void) VOP_FSYNC(tvp, 0, cr, NULL);
2926 
2927 	if (error)
2928 		goto err;
2929 
2930 	resp->status = NFS3_OK;
2931 	vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2932 	vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2933 	goto out;
2934 
2935 err:
2936 	if (curthread->t_flag & T_WOULDBLOCK) {
2937 		curthread->t_flag &= ~T_WOULDBLOCK;
2938 		resp->status = NFS3ERR_JUKEBOX;
2939 	} else {
2940 		resp->status = puterrno3(error);
2941 	}
2942 err1:
2943 	vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2944 	vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2945 
2946 out:
2947 	if (name != NULL && name != args->from.name)
2948 		kmem_free(name, MAXPATHLEN + 1);
2949 	if (toname != NULL && toname != args->to.name)
2950 		kmem_free(toname, MAXPATHLEN + 1);
2951 
2952 	DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
2953 	    cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
2954 	    RENAME3res *, resp);
2955 	if (fvp != NULL)
2956 		VN_RELE(fvp);
2957 	if (tvp != NULL)
2958 		VN_RELE(tvp);
2959 }
2960 
2961 void *
rfs3_rename_getfh(RENAME3args * args)2962 rfs3_rename_getfh(RENAME3args *args)
2963 {
2964 
2965 	return (&args->from.dir);
2966 }
2967 
2968 void
rfs3_link(LINK3args * args,LINK3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)2969 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2970     struct svc_req *req, cred_t *cr, bool_t ro)
2971 {
2972 	int error;
2973 	vnode_t *vp;
2974 	vnode_t *dvp;
2975 	struct vattr *vap;
2976 	struct vattr va;
2977 	struct vattr *bvap;
2978 	struct vattr bva;
2979 	struct vattr *avap;
2980 	struct vattr ava;
2981 	nfs_fh3	*fh3;
2982 	struct exportinfo *to_exi;
2983 	bslabel_t *clabel;
2984 	struct sockaddr *ca;
2985 	char *name = NULL;
2986 
2987 	vap = NULL;
2988 	bvap = NULL;
2989 	avap = NULL;
2990 	dvp = NULL;
2991 
2992 	vp = nfs3_fhtovp(&args->file, exi);
2993 
2994 	DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
2995 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
2996 	    LINK3args *, args);
2997 
2998 	if (vp == NULL) {
2999 		error = ESTALE;
3000 		goto out;
3001 	}
3002 
3003 	va.va_mask = AT_ALL;
3004 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3005 
3006 	fh3 = &args->link.dir;
3007 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
3008 	if (to_exi == NULL) {
3009 		resp->status = NFS3ERR_ACCES;
3010 		goto out1;
3011 	}
3012 	exi_rele(to_exi);
3013 
3014 	if (to_exi != exi) {
3015 		resp->status = NFS3ERR_XDEV;
3016 		goto out1;
3017 	}
3018 
3019 	if (is_system_labeled()) {
3020 		clabel = req->rq_label;
3021 
3022 		ASSERT(clabel != NULL);
3023 		DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3024 		    "got client label from request(1)", struct svc_req *, req);
3025 
3026 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3027 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3028 			    exi)) {
3029 				resp->status = NFS3ERR_ACCES;
3030 				goto out1;
3031 			}
3032 		}
3033 	}
3034 
3035 	dvp = nfs3_fhtovp(&args->link.dir, exi);
3036 	if (dvp == NULL) {
3037 		error = ESTALE;
3038 		goto out;
3039 	}
3040 
3041 	bva.va_mask = AT_ALL;
3042 	bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3043 
3044 	if (dvp->v_type != VDIR) {
3045 		resp->status = NFS3ERR_NOTDIR;
3046 		goto out1;
3047 	}
3048 
3049 	if (args->link.name == nfs3nametoolong) {
3050 		resp->status = NFS3ERR_NAMETOOLONG;
3051 		goto out1;
3052 	}
3053 
3054 	if (args->link.name == NULL || *(args->link.name) == '\0') {
3055 		resp->status = NFS3ERR_ACCES;
3056 		goto out1;
3057 	}
3058 
3059 	if (rdonly(ro, dvp)) {
3060 		resp->status = NFS3ERR_ROFS;
3061 		goto out1;
3062 	}
3063 
3064 	if (is_system_labeled()) {
3065 		DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3066 		    "got client label from request(1)", struct svc_req *, req);
3067 
3068 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3069 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3070 			    exi)) {
3071 				resp->status = NFS3ERR_ACCES;
3072 				goto out1;
3073 			}
3074 		}
3075 	}
3076 
3077 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3078 	name = nfscmd_convname(ca, exi, args->link.name,
3079 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3080 
3081 	if (name == NULL) {
3082 		resp->status = NFS3ERR_SERVERFAULT;
3083 		goto out1;
3084 	}
3085 
3086 	error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3087 
3088 	va.va_mask = AT_ALL;
3089 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3090 	ava.va_mask = AT_ALL;
3091 	avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3092 
3093 	/*
3094 	 * Force modified data and metadata out to stable storage.
3095 	 */
3096 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3097 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
3098 
3099 	if (error)
3100 		goto out;
3101 
3102 	VN_RELE(dvp);
3103 
3104 	resp->status = NFS3_OK;
3105 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3106 	vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3107 
3108 	DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3109 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3110 	    LINK3res *, resp);
3111 
3112 	VN_RELE(vp);
3113 
3114 	return;
3115 
3116 out:
3117 	if (curthread->t_flag & T_WOULDBLOCK) {
3118 		curthread->t_flag &= ~T_WOULDBLOCK;
3119 		resp->status = NFS3ERR_JUKEBOX;
3120 	} else
3121 		resp->status = puterrno3(error);
3122 out1:
3123 	if (name != NULL && name != args->link.name)
3124 		kmem_free(name, MAXPATHLEN + 1);
3125 
3126 	DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
3127 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3128 	    LINK3res *, resp);
3129 
3130 	if (vp != NULL)
3131 		VN_RELE(vp);
3132 	if (dvp != NULL)
3133 		VN_RELE(dvp);
3134 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3135 	vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3136 }
3137 
3138 void *
rfs3_link_getfh(LINK3args * args)3139 rfs3_link_getfh(LINK3args *args)
3140 {
3141 
3142 	return (&args->file);
3143 }
3144 
3145 /*
3146  * This macro defines the size of a response which contains attribute
3147  * information and one directory entry (whose length is specified by
3148  * the macro parameter).  If the incoming request is larger than this,
3149  * then we are guaranteed to be able to return at one directory entry
3150  * if one exists.  Therefore, we do not need to check for
3151  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3152  * is not, then we need to check to make sure that this error does not
3153  * need to be returned.
3154  *
3155  * NFS3_READDIR_MIN_COUNT is comprised of following :
3156  *
3157  * status - 1 * BYTES_PER_XDR_UNIT
3158  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3159  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3160  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3161  * boolean - 1 * BYTES_PER_XDR_UNIT
3162  * file id - 2 * BYTES_PER_XDR_UNIT
3163  * directory name length - 1 * BYTES_PER_XDR_UNIT
3164  * cookie - 2 * BYTES_PER_XDR_UNIT
3165  * end of list - 1 * BYTES_PER_XDR_UNIT
3166  * end of file - 1 * BYTES_PER_XDR_UNIT
3167  * Name length of directory to the nearest byte
3168  */
3169 
3170 #define	NFS3_READDIR_MIN_COUNT(length)	\
3171 	((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3172 		BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3173 
3174 /* ARGSUSED */
3175 void
rfs3_readdir(READDIR3args * args,READDIR3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)3176 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3177     struct svc_req *req, cred_t *cr, bool_t ro)
3178 {
3179 	int error;
3180 	vnode_t *vp;
3181 	struct vattr *vap;
3182 	struct vattr va;
3183 	struct iovec iov;
3184 	struct uio uio;
3185 	char *data;
3186 	int iseof;
3187 	int bufsize;
3188 	int namlen;
3189 	uint_t count;
3190 	struct sockaddr *ca;
3191 
3192 	vap = NULL;
3193 
3194 	vp = nfs3_fhtovp(&args->dir, exi);
3195 
3196 	DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
3197 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3198 	    READDIR3args *, args);
3199 
3200 	if (vp == NULL) {
3201 		error = ESTALE;
3202 		goto out;
3203 	}
3204 
3205 	if (is_system_labeled()) {
3206 		bslabel_t *clabel = req->rq_label;
3207 
3208 		ASSERT(clabel != NULL);
3209 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3210 		    "got client label from request(1)", struct svc_req *, req);
3211 
3212 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3213 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3214 			    exi)) {
3215 				resp->status = NFS3ERR_ACCES;
3216 				goto out1;
3217 			}
3218 		}
3219 	}
3220 
3221 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3222 
3223 	va.va_mask = AT_ALL;
3224 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3225 
3226 	if (vp->v_type != VDIR) {
3227 		resp->status = NFS3ERR_NOTDIR;
3228 		goto out1;
3229 	}
3230 
3231 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3232 	if (error)
3233 		goto out;
3234 
3235 	/*
3236 	 * Now don't allow arbitrary count to alloc;
3237 	 * allow the maximum not to exceed rfs3_tsize()
3238 	 */
3239 	if (args->count > rfs3_tsize(req))
3240 		args->count = rfs3_tsize(req);
3241 
3242 	/*
3243 	 * Make sure that there is room to read at least one entry
3244 	 * if any are available.
3245 	 */
3246 	if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3247 		count = DIRENT64_RECLEN(MAXNAMELEN);
3248 	else
3249 		count = args->count;
3250 
3251 	data = kmem_alloc(count, KM_SLEEP);
3252 
3253 	iov.iov_base = data;
3254 	iov.iov_len = count;
3255 	uio.uio_iov = &iov;
3256 	uio.uio_iovcnt = 1;
3257 	uio.uio_segflg = UIO_SYSSPACE;
3258 	uio.uio_extflg = UIO_COPY_CACHED;
3259 	uio.uio_loffset = (offset_t)args->cookie;
3260 	uio.uio_resid = count;
3261 
3262 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3263 
3264 	va.va_mask = AT_ALL;
3265 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3266 
3267 	if (error) {
3268 		kmem_free(data, count);
3269 		goto out;
3270 	}
3271 
3272 	/*
3273 	 * If the count was not large enough to be able to guarantee
3274 	 * to be able to return at least one entry, then need to
3275 	 * check to see if NFS3ERR_TOOSMALL should be returned.
3276 	 */
3277 	if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3278 		/*
3279 		 * bufsize is used to keep track of the size of the response.
3280 		 * It is primed with:
3281 		 *	1 for the status +
3282 		 *	1 for the dir_attributes.attributes boolean +
3283 		 *	2 for the cookie verifier
3284 		 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3285 		 * to bytes.  If there are directory attributes to be
3286 		 * returned, then:
3287 		 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3288 		 * time BYTES_PER_XDR_UNIT is added to account for them.
3289 		 */
3290 		bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3291 		if (vap != NULL)
3292 			bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3293 		/*
3294 		 * An entry is composed of:
3295 		 *	1 for the true/false list indicator +
3296 		 *	2 for the fileid +
3297 		 *	1 for the length of the name +
3298 		 *	2 for the cookie +
3299 		 * all times BYTES_PER_XDR_UNIT to convert from
3300 		 * XDR units to bytes, plus the length of the name
3301 		 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3302 		 */
3303 		if (count != uio.uio_resid) {
3304 			namlen = strlen(((struct dirent64 *)data)->d_name);
3305 			bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3306 			    roundup(namlen, BYTES_PER_XDR_UNIT);
3307 		}
3308 		/*
3309 		 * We need to check to see if the number of bytes left
3310 		 * to go into the buffer will actually fit into the
3311 		 * buffer.  This is calculated as the size of this
3312 		 * entry plus:
3313 		 *	1 for the true/false list indicator +
3314 		 *	1 for the eof indicator
3315 		 * times BYTES_PER_XDR_UNIT to convert from from
3316 		 * XDR units to bytes.
3317 		 */
3318 		bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3319 		if (bufsize > args->count) {
3320 			kmem_free(data, count);
3321 			resp->status = NFS3ERR_TOOSMALL;
3322 			goto out1;
3323 		}
3324 	}
3325 
3326 	/*
3327 	 * Have a valid readir buffer for the native character
3328 	 * set. Need to check if a conversion is necessary and
3329 	 * potentially rewrite the whole buffer. Note that if the
3330 	 * conversion expands names enough, the structure may not
3331 	 * fit. In this case, we need to drop entries until if fits
3332 	 * and patch the counts in order that the next readdir will
3333 	 * get the correct entries.
3334 	 */
3335 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3336 	data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3337 
3338 
3339 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3340 
3341 #if 0 /* notyet */
3342 	/*
3343 	 * Don't do this.  It causes local disk writes when just
3344 	 * reading the file and the overhead is deemed larger
3345 	 * than the benefit.
3346 	 */
3347 	/*
3348 	 * Force modified metadata out to stable storage.
3349 	 */
3350 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3351 #endif
3352 
3353 	resp->status = NFS3_OK;
3354 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3355 	resp->resok.cookieverf = 0;
3356 	resp->resok.reply.entries = (entry3 *)data;
3357 	resp->resok.reply.eof = iseof;
3358 	resp->resok.size = count - uio.uio_resid;
3359 	resp->resok.count = args->count;
3360 	resp->resok.freecount = count;
3361 
3362 	DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3363 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3364 	    READDIR3res *, resp);
3365 
3366 	VN_RELE(vp);
3367 
3368 	return;
3369 
3370 out:
3371 	if (curthread->t_flag & T_WOULDBLOCK) {
3372 		curthread->t_flag &= ~T_WOULDBLOCK;
3373 		resp->status = NFS3ERR_JUKEBOX;
3374 	} else
3375 		resp->status = puterrno3(error);
3376 out1:
3377 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3378 
3379 	DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
3380 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3381 	    READDIR3res *, resp);
3382 
3383 	if (vp != NULL) {
3384 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3385 		VN_RELE(vp);
3386 	}
3387 }
3388 
3389 void *
rfs3_readdir_getfh(READDIR3args * args)3390 rfs3_readdir_getfh(READDIR3args *args)
3391 {
3392 
3393 	return (&args->dir);
3394 }
3395 
3396 void
rfs3_readdir_free(READDIR3res * resp)3397 rfs3_readdir_free(READDIR3res *resp)
3398 {
3399 
3400 	if (resp->status == NFS3_OK)
3401 		kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3402 }
3403 
3404 #ifdef nextdp
3405 #undef nextdp
3406 #endif
3407 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3408 
3409 /*
3410  * This macro computes the size of a response which contains
3411  * one directory entry including the attributes as well as file handle.
3412  * If the incoming request is larger than this, then we are guaranteed to be
3413  * able to return at least one more directory entry if one exists.
3414  *
3415  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3416  *
3417  * boolean - 1 * BYTES_PER_XDR_UNIT
3418  * file id - 2 * BYTES_PER_XDR_UNIT
3419  * directory name length - 1 * BYTES_PER_XDR_UNIT
3420  * cookie - 2 * BYTES_PER_XDR_UNIT
3421  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3422  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3423  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3424  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3425  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3426  * name length of the entry to the nearest bytes
3427  */
3428 #define	NFS3_READDIRPLUS_ENTRY(namelen)	\
3429 	((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3430 		BYTES_PER_XDR_UNIT + \
3431 	NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3432 
3433 static int rfs3_readdir_unit = MAXBSIZE;
3434 
3435 /* ARGSUSED */
3436 void
rfs3_readdirplus(READDIRPLUS3args * args,READDIRPLUS3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)3437 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3438     struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3439 {
3440 	int error;
3441 	vnode_t *vp;
3442 	struct vattr *vap;
3443 	struct vattr va;
3444 	struct iovec iov;
3445 	struct uio uio;
3446 	char *data;
3447 	int iseof;
3448 	struct dirent64 *dp;
3449 	vnode_t *nvp;
3450 	struct vattr *nvap;
3451 	struct vattr nva;
3452 	entryplus3_info *infop = NULL;
3453 	int size = 0;
3454 	int nents = 0;
3455 	int bufsize = 0;
3456 	int entrysize = 0;
3457 	int tofit = 0;
3458 	int rd_unit = rfs3_readdir_unit;
3459 	int prev_len;
3460 	int space_left;
3461 	int i;
3462 	uint_t *namlen = NULL;
3463 	char *ndata = NULL;
3464 	struct sockaddr *ca;
3465 	size_t ret;
3466 
3467 	vap = NULL;
3468 
3469 	vp = nfs3_fhtovp(&args->dir, exi);
3470 
3471 	DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
3472 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3473 	    READDIRPLUS3args *, args);
3474 
3475 	if (vp == NULL) {
3476 		error = ESTALE;
3477 		goto out;
3478 	}
3479 
3480 	if (is_system_labeled()) {
3481 		bslabel_t *clabel = req->rq_label;
3482 
3483 		ASSERT(clabel != NULL);
3484 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3485 		    char *, "got client label from request(1)",
3486 		    struct svc_req *, req);
3487 
3488 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3489 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3490 			    exi)) {
3491 				resp->status = NFS3ERR_ACCES;
3492 				goto out1;
3493 			}
3494 		}
3495 	}
3496 
3497 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3498 
3499 	va.va_mask = AT_ALL;
3500 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3501 
3502 	if (vp->v_type != VDIR) {
3503 		error = ENOTDIR;
3504 		goto out;
3505 	}
3506 
3507 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3508 	if (error)
3509 		goto out;
3510 
3511 	/*
3512 	 * Don't allow arbitrary counts for allocation
3513 	 */
3514 	if (args->maxcount > rfs3_tsize(req))
3515 		args->maxcount = rfs3_tsize(req);
3516 
3517 	/*
3518 	 * Make sure that there is room to read at least one entry
3519 	 * if any are available
3520 	 */
3521 	args->dircount = MIN(args->dircount, args->maxcount);
3522 
3523 	if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3524 		args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3525 
3526 	/*
3527 	 * This allocation relies on a minimum directory entry
3528 	 * being roughly 24 bytes.  Therefore, the namlen array
3529 	 * will have enough space based on the maximum number of
3530 	 * entries to read.
3531 	 */
3532 	namlen = kmem_alloc(args->dircount, KM_SLEEP);
3533 
3534 	space_left = args->dircount;
3535 	data = kmem_alloc(args->dircount, KM_SLEEP);
3536 	dp = (struct dirent64 *)data;
3537 	uio.uio_iov = &iov;
3538 	uio.uio_iovcnt = 1;
3539 	uio.uio_segflg = UIO_SYSSPACE;
3540 	uio.uio_extflg = UIO_COPY_CACHED;
3541 	uio.uio_loffset = (offset_t)args->cookie;
3542 
3543 	/*
3544 	 * bufsize is used to keep track of the size of the response as we
3545 	 * get post op attributes and filehandles for each entry.  This is
3546 	 * an optimization as the server may have read more entries than will
3547 	 * fit in the buffer specified by maxcount.  We stop calculating
3548 	 * post op attributes and filehandles once we have exceeded maxcount.
3549 	 * This will minimize the effect of truncation.
3550 	 *
3551 	 * It is primed with:
3552 	 *	1 for the status +
3553 	 *	1 for the dir_attributes.attributes boolean +
3554 	 *	2 for the cookie verifier
3555 	 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3556 	 * to bytes.  If there are directory attributes to be
3557 	 * returned, then:
3558 	 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3559 	 * time BYTES_PER_XDR_UNIT is added to account for them.
3560 	 */
3561 	bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3562 	if (vap != NULL)
3563 		bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3564 
3565 getmoredents:
3566 	/*
3567 	 * Here we make a check so that our read unit is not larger than
3568 	 * the space left in the buffer.
3569 	 */
3570 	rd_unit = MIN(rd_unit, space_left);
3571 	iov.iov_base = (char *)dp;
3572 	iov.iov_len = rd_unit;
3573 	uio.uio_resid = rd_unit;
3574 	prev_len = rd_unit;
3575 
3576 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3577 
3578 	if (error) {
3579 		kmem_free(data, args->dircount);
3580 		goto out;
3581 	}
3582 
3583 	if (uio.uio_resid == prev_len && !iseof) {
3584 		if (nents == 0) {
3585 			kmem_free(data, args->dircount);
3586 			resp->status = NFS3ERR_TOOSMALL;
3587 			goto out1;
3588 		}
3589 
3590 		/*
3591 		 * We could not get any more entries, so get the attributes
3592 		 * and filehandle for the entries already obtained.
3593 		 */
3594 		goto good;
3595 	}
3596 
3597 	/*
3598 	 * We estimate the size of the response by assuming the
3599 	 * entry exists and attributes and filehandle are also valid
3600 	 */
3601 	for (size = prev_len - uio.uio_resid;
3602 	    size > 0;
3603 	    size -= dp->d_reclen, dp = nextdp(dp)) {
3604 
3605 		if (dp->d_ino == 0) {
3606 			nents++;
3607 			continue;
3608 		}
3609 
3610 		namlen[nents] = strlen(dp->d_name);
3611 		entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3612 
3613 		/*
3614 		 * We need to check to see if the number of bytes left
3615 		 * to go into the buffer will actually fit into the
3616 		 * buffer.  This is calculated as the size of this
3617 		 * entry plus:
3618 		 *	1 for the true/false list indicator +
3619 		 *	1 for the eof indicator
3620 		 * times BYTES_PER_XDR_UNIT to convert from XDR units
3621 		 * to bytes.
3622 		 *
3623 		 * Also check the dircount limit against the first entry read
3624 		 *
3625 		 */
3626 		tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3627 		if (bufsize + tofit > args->maxcount) {
3628 			/*
3629 			 * We make a check here to see if this was the
3630 			 * first entry being measured.  If so, then maxcount
3631 			 * was too small to begin with and so we need to
3632 			 * return with NFS3ERR_TOOSMALL.
3633 			 */
3634 			if (nents == 0) {
3635 				kmem_free(data, args->dircount);
3636 				resp->status = NFS3ERR_TOOSMALL;
3637 				goto out1;
3638 			}
3639 			iseof = FALSE;
3640 			goto good;
3641 		}
3642 		bufsize += entrysize;
3643 		nents++;
3644 	}
3645 
3646 	/*
3647 	 * If there is enough room to fit at least 1 more entry including
3648 	 * post op attributes and filehandle in the buffer AND that we haven't
3649 	 * exceeded dircount then go back and get some more.
3650 	 */
3651 	if (!iseof &&
3652 	    (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3653 		space_left -= (prev_len - uio.uio_resid);
3654 		if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3655 			goto getmoredents;
3656 
3657 		/* else, fall through */
3658 	}
3659 good:
3660 	va.va_mask = AT_ALL;
3661 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3662 
3663 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3664 
3665 	infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3666 	resp->resok.infop = infop;
3667 
3668 	dp = (struct dirent64 *)data;
3669 	for (i = 0; i < nents; i++) {
3670 
3671 		if (dp->d_ino == 0) {
3672 			infop[i].attr.attributes = FALSE;
3673 			infop[i].fh.handle_follows = FALSE;
3674 			dp = nextdp(dp);
3675 			continue;
3676 		}
3677 
3678 		infop[i].namelen = namlen[i];
3679 
3680 		error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3681 		    NULL, NULL, NULL);
3682 		if (error) {
3683 			infop[i].attr.attributes = FALSE;
3684 			infop[i].fh.handle_follows = FALSE;
3685 			dp = nextdp(dp);
3686 			continue;
3687 		}
3688 
3689 		nva.va_mask = AT_ALL;
3690 		nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3691 
3692 		/* Lie about the object type for a referral */
3693 		if (vn_is_nfs_reparse(nvp, cr))
3694 			nvap->va_type = VLNK;
3695 
3696 		if (vn_ismntpt(nvp)) {
3697 			infop[i].attr.attributes = FALSE;
3698 			infop[i].fh.handle_follows = FALSE;
3699 		} else {
3700 			vattr_to_post_op_attr(nvap, &infop[i].attr);
3701 
3702 			error = makefh3(&infop[i].fh.handle, nvp, exi);
3703 			if (!error)
3704 				infop[i].fh.handle_follows = TRUE;
3705 			else
3706 				infop[i].fh.handle_follows = FALSE;
3707 		}
3708 
3709 		VN_RELE(nvp);
3710 		dp = nextdp(dp);
3711 	}
3712 
3713 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3714 	ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3715 	if (ndata == NULL)
3716 		ndata = data;
3717 
3718 	if (ret > 0) {
3719 		/*
3720 		 * We had to drop one or more entries in order to fit
3721 		 * during the character conversion.  We need to patch
3722 		 * up the size and eof info.
3723 		 */
3724 		if (iseof)
3725 			iseof = FALSE;
3726 
3727 		ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3728 		    nents, ret);
3729 	}
3730 
3731 
3732 #if 0 /* notyet */
3733 	/*
3734 	 * Don't do this.  It causes local disk writes when just
3735 	 * reading the file and the overhead is deemed larger
3736 	 * than the benefit.
3737 	 */
3738 	/*
3739 	 * Force modified metadata out to stable storage.
3740 	 */
3741 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3742 #endif
3743 
3744 	kmem_free(namlen, args->dircount);
3745 	if (ndata != data)
3746 		kmem_free(data, args->dircount);
3747 
3748 	resp->status = NFS3_OK;
3749 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3750 	resp->resok.cookieverf = 0;
3751 	resp->resok.reply.entries = (entryplus3 *)ndata;
3752 	resp->resok.reply.eof = iseof;
3753 	resp->resok.size = nents;
3754 	resp->resok.count = args->dircount - ret;
3755 	resp->resok.maxcount = args->maxcount;
3756 
3757 	DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3758 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3759 	    READDIRPLUS3res *, resp);
3760 
3761 	VN_RELE(vp);
3762 
3763 	return;
3764 
3765 out:
3766 	if (curthread->t_flag & T_WOULDBLOCK) {
3767 		curthread->t_flag &= ~T_WOULDBLOCK;
3768 		resp->status = NFS3ERR_JUKEBOX;
3769 	} else {
3770 		resp->status = puterrno3(error);
3771 	}
3772 out1:
3773 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3774 
3775 	DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
3776 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3777 	    READDIRPLUS3res *, resp);
3778 
3779 	if (vp != NULL) {
3780 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3781 		VN_RELE(vp);
3782 	}
3783 
3784 	if (namlen != NULL)
3785 		kmem_free(namlen, args->dircount);
3786 }
3787 
3788 void *
rfs3_readdirplus_getfh(READDIRPLUS3args * args)3789 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3790 {
3791 
3792 	return (&args->dir);
3793 }
3794 
3795 void
rfs3_readdirplus_free(READDIRPLUS3res * resp)3796 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3797 {
3798 
3799 	if (resp->status == NFS3_OK) {
3800 		kmem_free(resp->resok.reply.entries, resp->resok.count);
3801 		kmem_free(resp->resok.infop,
3802 		    resp->resok.size * sizeof (struct entryplus3_info));
3803 	}
3804 }
3805 
3806 /* ARGSUSED */
3807 void
rfs3_fsstat(FSSTAT3args * args,FSSTAT3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)3808 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3809     struct svc_req *req, cred_t *cr, bool_t ro)
3810 {
3811 	int error;
3812 	vnode_t *vp;
3813 	struct vattr *vap;
3814 	struct vattr va;
3815 	struct statvfs64 sb;
3816 
3817 	vap = NULL;
3818 
3819 	vp = nfs3_fhtovp(&args->fsroot, exi);
3820 
3821 	DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
3822 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3823 	    FSSTAT3args *, args);
3824 
3825 	if (vp == NULL) {
3826 		error = ESTALE;
3827 		goto out;
3828 	}
3829 
3830 	if (is_system_labeled()) {
3831 		bslabel_t *clabel = req->rq_label;
3832 
3833 		ASSERT(clabel != NULL);
3834 		DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3835 		    "got client label from request(1)", struct svc_req *, req);
3836 
3837 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3838 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3839 			    exi)) {
3840 				resp->status = NFS3ERR_ACCES;
3841 				goto out1;
3842 			}
3843 		}
3844 	}
3845 
3846 	error = VFS_STATVFS(vp->v_vfsp, &sb);
3847 
3848 	va.va_mask = AT_ALL;
3849 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3850 
3851 	if (error)
3852 		goto out;
3853 
3854 	resp->status = NFS3_OK;
3855 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3856 	if (sb.f_blocks != (fsblkcnt64_t)-1)
3857 		resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3858 	else
3859 		resp->resok.tbytes = (size3)sb.f_blocks;
3860 	if (sb.f_bfree != (fsblkcnt64_t)-1)
3861 		resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3862 	else
3863 		resp->resok.fbytes = (size3)sb.f_bfree;
3864 	if (sb.f_bavail != (fsblkcnt64_t)-1)
3865 		resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3866 	else
3867 		resp->resok.abytes = (size3)sb.f_bavail;
3868 	resp->resok.tfiles = (size3)sb.f_files;
3869 	resp->resok.ffiles = (size3)sb.f_ffree;
3870 	resp->resok.afiles = (size3)sb.f_favail;
3871 	resp->resok.invarsec = 0;
3872 
3873 	DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3874 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3875 	    FSSTAT3res *, resp);
3876 	VN_RELE(vp);
3877 
3878 	return;
3879 
3880 out:
3881 	if (curthread->t_flag & T_WOULDBLOCK) {
3882 		curthread->t_flag &= ~T_WOULDBLOCK;
3883 		resp->status = NFS3ERR_JUKEBOX;
3884 	} else
3885 		resp->status = puterrno3(error);
3886 out1:
3887 	DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
3888 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3889 	    FSSTAT3res *, resp);
3890 
3891 	if (vp != NULL)
3892 		VN_RELE(vp);
3893 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3894 }
3895 
3896 void *
rfs3_fsstat_getfh(FSSTAT3args * args)3897 rfs3_fsstat_getfh(FSSTAT3args *args)
3898 {
3899 
3900 	return (&args->fsroot);
3901 }
3902 
3903 /* ARGSUSED */
3904 void
rfs3_fsinfo(FSINFO3args * args,FSINFO3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)3905 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3906     struct svc_req *req, cred_t *cr, bool_t ro)
3907 {
3908 	vnode_t *vp;
3909 	struct vattr *vap;
3910 	struct vattr va;
3911 	uint32_t xfer_size;
3912 	ulong_t l = 0;
3913 	int error;
3914 
3915 	vp = nfs3_fhtovp(&args->fsroot, exi);
3916 
3917 	DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
3918 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3919 	    FSINFO3args *, args);
3920 
3921 	if (vp == NULL) {
3922 		if (curthread->t_flag & T_WOULDBLOCK) {
3923 			curthread->t_flag &= ~T_WOULDBLOCK;
3924 			resp->status = NFS3ERR_JUKEBOX;
3925 		} else
3926 			resp->status = NFS3ERR_STALE;
3927 		vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3928 		goto out;
3929 	}
3930 
3931 	if (is_system_labeled()) {
3932 		bslabel_t *clabel = req->rq_label;
3933 
3934 		ASSERT(clabel != NULL);
3935 		DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3936 		    "got client label from request(1)", struct svc_req *, req);
3937 
3938 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3939 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3940 			    exi)) {
3941 				resp->status = NFS3ERR_STALE;
3942 				vattr_to_post_op_attr(NULL,
3943 				    &resp->resfail.obj_attributes);
3944 				goto out;
3945 			}
3946 		}
3947 	}
3948 
3949 	va.va_mask = AT_ALL;
3950 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3951 
3952 	resp->status = NFS3_OK;
3953 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3954 	xfer_size = rfs3_tsize(req);
3955 	resp->resok.rtmax = xfer_size;
3956 	resp->resok.rtpref = xfer_size;
3957 	resp->resok.rtmult = DEV_BSIZE;
3958 	resp->resok.wtmax = xfer_size;
3959 	resp->resok.wtpref = xfer_size;
3960 	resp->resok.wtmult = DEV_BSIZE;
3961 	resp->resok.dtpref = MAXBSIZE;
3962 
3963 	/*
3964 	 * Large file spec: want maxfilesize based on limit of
3965 	 * underlying filesystem.  We can guess 2^31-1 if need be.
3966 	 */
3967 	error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3968 	if (error) {
3969 		resp->status = puterrno3(error);
3970 		goto out;
3971 	}
3972 
3973 	/*
3974 	 * If the underlying file system does not support _PC_FILESIZEBITS,
3975 	 * return a reasonable default. Note that error code on VOP_PATHCONF
3976 	 * will be 0, even if the underlying file system does not support
3977 	 * _PC_FILESIZEBITS.
3978 	 */
3979 	if (l == (ulong_t)-1) {
3980 		resp->resok.maxfilesize = MAXOFF32_T;
3981 	} else {
3982 		if (l >= (sizeof (uint64_t) * 8))
3983 			resp->resok.maxfilesize = INT64_MAX;
3984 		else
3985 			resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3986 	}
3987 
3988 	resp->resok.time_delta.seconds = 0;
3989 	resp->resok.time_delta.nseconds = 1000;
3990 	resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3991 	    FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3992 
3993 	DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
3994 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
3995 	    FSINFO3res *, resp);
3996 
3997 	VN_RELE(vp);
3998 
3999 	return;
4000 
4001 out:
4002 	DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
4003 	    cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
4004 	    FSINFO3res *, resp);
4005 	if (vp != NULL)
4006 		VN_RELE(vp);
4007 }
4008 
4009 void *
rfs3_fsinfo_getfh(FSINFO3args * args)4010 rfs3_fsinfo_getfh(FSINFO3args *args)
4011 {
4012 	return (&args->fsroot);
4013 }
4014 
4015 /* ARGSUSED */
4016 void
rfs3_pathconf(PATHCONF3args * args,PATHCONF3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)4017 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
4018     struct svc_req *req, cred_t *cr, bool_t ro)
4019 {
4020 	int error;
4021 	vnode_t *vp;
4022 	struct vattr *vap;
4023 	struct vattr va;
4024 	ulong_t val;
4025 
4026 	vap = NULL;
4027 
4028 	vp = nfs3_fhtovp(&args->object, exi);
4029 
4030 	DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
4031 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4032 	    PATHCONF3args *, args);
4033 
4034 	if (vp == NULL) {
4035 		error = ESTALE;
4036 		goto out;
4037 	}
4038 
4039 	if (is_system_labeled()) {
4040 		bslabel_t *clabel = req->rq_label;
4041 
4042 		ASSERT(clabel != NULL);
4043 		DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4044 		    "got client label from request(1)", struct svc_req *, req);
4045 
4046 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4047 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4048 			    exi)) {
4049 				resp->status = NFS3ERR_ACCES;
4050 				goto out1;
4051 			}
4052 		}
4053 	}
4054 
4055 	va.va_mask = AT_ALL;
4056 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4057 
4058 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4059 	if (error)
4060 		goto out;
4061 	resp->resok.info.link_max = (uint32)val;
4062 
4063 	error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4064 	if (error)
4065 		goto out;
4066 	resp->resok.info.name_max = (uint32)val;
4067 
4068 	error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4069 	if (error)
4070 		goto out;
4071 	if (val == 1)
4072 		resp->resok.info.no_trunc = TRUE;
4073 	else
4074 		resp->resok.info.no_trunc = FALSE;
4075 
4076 	error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4077 	if (error)
4078 		goto out;
4079 	if (val == 1)
4080 		resp->resok.info.chown_restricted = TRUE;
4081 	else
4082 		resp->resok.info.chown_restricted = FALSE;
4083 
4084 	resp->status = NFS3_OK;
4085 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4086 	resp->resok.info.case_insensitive = FALSE;
4087 	resp->resok.info.case_preserving = TRUE;
4088 	DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4089 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4090 	    PATHCONF3res *, resp);
4091 	VN_RELE(vp);
4092 	return;
4093 
4094 out:
4095 	if (curthread->t_flag & T_WOULDBLOCK) {
4096 		curthread->t_flag &= ~T_WOULDBLOCK;
4097 		resp->status = NFS3ERR_JUKEBOX;
4098 	} else
4099 		resp->status = puterrno3(error);
4100 out1:
4101 	DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
4102 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4103 	    PATHCONF3res *, resp);
4104 	if (vp != NULL)
4105 		VN_RELE(vp);
4106 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4107 }
4108 
4109 void *
rfs3_pathconf_getfh(PATHCONF3args * args)4110 rfs3_pathconf_getfh(PATHCONF3args *args)
4111 {
4112 
4113 	return (&args->object);
4114 }
4115 
4116 void
rfs3_commit(COMMIT3args * args,COMMIT3res * resp,struct exportinfo * exi,struct svc_req * req,cred_t * cr,bool_t ro)4117 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4118     struct svc_req *req, cred_t *cr, bool_t ro)
4119 {
4120 	nfs3_srv_t *ns;
4121 	int error;
4122 	vnode_t *vp;
4123 	struct vattr *bvap;
4124 	struct vattr bva;
4125 	struct vattr *avap;
4126 	struct vattr ava;
4127 
4128 	bvap = NULL;
4129 	avap = NULL;
4130 
4131 	vp = nfs3_fhtovp(&args->file, exi);
4132 
4133 	DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
4134 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4135 	    COMMIT3args *, args);
4136 
4137 	if (vp == NULL) {
4138 		error = ESTALE;
4139 		goto out;
4140 	}
4141 
4142 	ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL. */
4143 	ns = nfs3_get_srv();
4144 	bva.va_mask = AT_ALL;
4145 	error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4146 
4147 	/*
4148 	 * If we can't get the attributes, then we can't do the
4149 	 * right access checking.  So, we'll fail the request.
4150 	 */
4151 	if (error)
4152 		goto out;
4153 
4154 	bvap = &bva;
4155 
4156 	if (rdonly(ro, vp)) {
4157 		resp->status = NFS3ERR_ROFS;
4158 		goto out1;
4159 	}
4160 
4161 	if (vp->v_type != VREG) {
4162 		resp->status = NFS3ERR_INVAL;
4163 		goto out1;
4164 	}
4165 
4166 	if (is_system_labeled()) {
4167 		bslabel_t *clabel = req->rq_label;
4168 
4169 		ASSERT(clabel != NULL);
4170 		DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4171 		    "got client label from request(1)", struct svc_req *, req);
4172 
4173 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4174 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4175 			    exi)) {
4176 				resp->status = NFS3ERR_ACCES;
4177 				goto out1;
4178 			}
4179 		}
4180 	}
4181 
4182 	if (crgetuid(cr) != bva.va_uid &&
4183 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4184 		goto out;
4185 
4186 	error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4187 
4188 	ava.va_mask = AT_ALL;
4189 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4190 
4191 	if (error)
4192 		goto out;
4193 
4194 	resp->status = NFS3_OK;
4195 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4196 	resp->resok.verf = ns->write3verf;
4197 
4198 	DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4199 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4200 	    COMMIT3res *, resp);
4201 
4202 	VN_RELE(vp);
4203 
4204 	return;
4205 
4206 out:
4207 	if (curthread->t_flag & T_WOULDBLOCK) {
4208 		curthread->t_flag &= ~T_WOULDBLOCK;
4209 		resp->status = NFS3ERR_JUKEBOX;
4210 	} else
4211 		resp->status = puterrno3(error);
4212 out1:
4213 	DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
4214 	    cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
4215 	    COMMIT3res *, resp);
4216 
4217 	if (vp != NULL)
4218 		VN_RELE(vp);
4219 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4220 }
4221 
4222 void *
rfs3_commit_getfh(COMMIT3args * args)4223 rfs3_commit_getfh(COMMIT3args *args)
4224 {
4225 
4226 	return (&args->file);
4227 }
4228 
4229 static int
sattr3_to_vattr(sattr3 * sap,struct vattr * vap)4230 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4231 {
4232 
4233 	vap->va_mask = 0;
4234 
4235 	if (sap->mode.set_it) {
4236 		vap->va_mode = (mode_t)sap->mode.mode;
4237 		vap->va_mask |= AT_MODE;
4238 	}
4239 	if (sap->uid.set_it) {
4240 		vap->va_uid = (uid_t)sap->uid.uid;
4241 		vap->va_mask |= AT_UID;
4242 	}
4243 	if (sap->gid.set_it) {
4244 		vap->va_gid = (gid_t)sap->gid.gid;
4245 		vap->va_mask |= AT_GID;
4246 	}
4247 	if (sap->size.set_it) {
4248 		if (sap->size.size > (size3)((u_longlong_t)-1))
4249 			return (EINVAL);
4250 		vap->va_size = sap->size.size;
4251 		vap->va_mask |= AT_SIZE;
4252 	}
4253 	if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4254 #ifndef _LP64
4255 		/* check time validity */
4256 		if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4257 			return (EOVERFLOW);
4258 #endif
4259 		/*
4260 		 * nfs protocol defines times as unsigned so don't extend sign,
4261 		 * unless sysadmin set nfs_allow_preepoch_time.
4262 		 */
4263 		NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4264 		    sap->atime.atime.seconds);
4265 		vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4266 		vap->va_mask |= AT_ATIME;
4267 	} else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4268 		gethrestime(&vap->va_atime);
4269 		vap->va_mask |= AT_ATIME;
4270 	}
4271 	if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4272 #ifndef _LP64
4273 		/* check time validity */
4274 		if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4275 			return (EOVERFLOW);
4276 #endif
4277 		/*
4278 		 * nfs protocol defines times as unsigned so don't extend sign,
4279 		 * unless sysadmin set nfs_allow_preepoch_time.
4280 		 */
4281 		NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4282 		    sap->mtime.mtime.seconds);
4283 		vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4284 		vap->va_mask |= AT_MTIME;
4285 	} else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4286 		gethrestime(&vap->va_mtime);
4287 		vap->va_mask |= AT_MTIME;
4288 	}
4289 
4290 	return (0);
4291 }
4292 
4293 static const ftype3 vt_to_nf3[] = {
4294 	0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4295 };
4296 
4297 static int
vattr_to_fattr3(struct vattr * vap,fattr3 * fap)4298 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4299 {
4300 
4301 	ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4302 	/* Return error if time or size overflow */
4303 	if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4304 		return (EOVERFLOW);
4305 	}
4306 	fap->type = vt_to_nf3[vap->va_type];
4307 	fap->mode = (mode3)(vap->va_mode & MODEMASK);
4308 	fap->nlink = (uint32)vap->va_nlink;
4309 	if (vap->va_uid == UID_NOBODY)
4310 		fap->uid = (uid3)NFS_UID_NOBODY;
4311 	else
4312 		fap->uid = (uid3)vap->va_uid;
4313 	if (vap->va_gid == GID_NOBODY)
4314 		fap->gid = (gid3)NFS_GID_NOBODY;
4315 	else
4316 		fap->gid = (gid3)vap->va_gid;
4317 	fap->size = (size3)vap->va_size;
4318 	fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4319 	fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4320 	fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4321 	fap->fsid = (uint64)vap->va_fsid;
4322 	fap->fileid = (fileid3)vap->va_nodeid;
4323 	fap->atime.seconds = vap->va_atime.tv_sec;
4324 	fap->atime.nseconds = vap->va_atime.tv_nsec;
4325 	fap->mtime.seconds = vap->va_mtime.tv_sec;
4326 	fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4327 	fap->ctime.seconds = vap->va_ctime.tv_sec;
4328 	fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4329 	return (0);
4330 }
4331 
4332 static int
vattr_to_wcc_attr(struct vattr * vap,wcc_attr * wccap)4333 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4334 {
4335 
4336 	/* Return error if time or size overflow */
4337 	if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4338 	    NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4339 	    NFS3_SIZE_OK(vap->va_size))) {
4340 		return (EOVERFLOW);
4341 	}
4342 	wccap->size = (size3)vap->va_size;
4343 	wccap->mtime.seconds = vap->va_mtime.tv_sec;
4344 	wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4345 	wccap->ctime.seconds = vap->va_ctime.tv_sec;
4346 	wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4347 	return (0);
4348 }
4349 
4350 static void
vattr_to_pre_op_attr(struct vattr * vap,pre_op_attr * poap)4351 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4352 {
4353 
4354 	/* don't return attrs if time overflow */
4355 	if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4356 		poap->attributes = TRUE;
4357 	} else
4358 		poap->attributes = FALSE;
4359 }
4360 
4361 void
vattr_to_post_op_attr(struct vattr * vap,post_op_attr * poap)4362 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4363 {
4364 
4365 	/* don't return attrs if time overflow */
4366 	if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4367 		poap->attributes = TRUE;
4368 	} else
4369 		poap->attributes = FALSE;
4370 }
4371 
4372 static void
vattr_to_wcc_data(struct vattr * bvap,struct vattr * avap,wcc_data * wccp)4373 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4374 {
4375 	vattr_to_pre_op_attr(bvap, &wccp->before);
4376 	vattr_to_post_op_attr(avap, &wccp->after);
4377 }
4378 
4379 static int
rdma_setup_read_data3(READ3args * args,READ3resok * rok)4380 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4381 {
4382 	struct clist	*wcl;
4383 	int		wlist_len;
4384 	count3		count = rok->count;
4385 
4386 	wcl = args->wlist;
4387 	if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
4388 		return (FALSE);
4389 
4390 	wcl = args->wlist;
4391 	rok->wlist_len = wlist_len;
4392 	rok->wlist = wcl;
4393 	return (TRUE);
4394 }
4395 
4396 void
rfs3_srv_zone_init(nfs_globals_t * ng)4397 rfs3_srv_zone_init(nfs_globals_t *ng)
4398 {
4399 	nfs3_srv_t *ns;
4400 	struct rfs3_verf_overlay {
4401 		uint_t id; /* a "unique" identifier */
4402 		int ts; /* a unique timestamp */
4403 	} *verfp;
4404 	timestruc_t now;
4405 
4406 	ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
4407 
4408 	/*
4409 	 * The following algorithm attempts to find a unique verifier
4410 	 * to be used as the write verifier returned from the server
4411 	 * to the client.  It is important that this verifier change
4412 	 * whenever the server reboots.  Of secondary importance, it
4413 	 * is important for the verifier to be unique between two
4414 	 * different servers.
4415 	 *
4416 	 * Thus, an attempt is made to use the system hostid and the
4417 	 * current time in seconds when the nfssrv kernel module is
4418 	 * loaded.  It is assumed that an NFS server will not be able
4419 	 * to boot and then to reboot in less than a second.  If the
4420 	 * hostid has not been set, then the current high resolution
4421 	 * time is used.  This will ensure different verifiers each
4422 	 * time the server reboots and minimize the chances that two
4423 	 * different servers will have the same verifier.
4424 	 */
4425 
4426 #ifndef	lint
4427 	/*
4428 	 * We ASSERT that this constant logic expression is
4429 	 * always true because in the past, it wasn't.
4430 	 */
4431 	ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
4432 #endif
4433 
4434 	gethrestime(&now);
4435 	verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
4436 	verfp->ts = (int)now.tv_sec;
4437 	verfp->id = zone_get_hostid(NULL);
4438 
4439 	if (verfp->id == 0)
4440 		verfp->id = (uint_t)now.tv_nsec;
4441 
4442 	ng->nfs3_srv = ns;
4443 }
4444 
4445 void
rfs3_srv_zone_fini(nfs_globals_t * ng)4446 rfs3_srv_zone_fini(nfs_globals_t *ng)
4447 {
4448 	nfs3_srv_t *ns = ng->nfs3_srv;
4449 
4450 	ng->nfs3_srv = NULL;
4451 
4452 	kmem_free(ns, sizeof (*ns));
4453 }
4454 
4455 void
rfs3_srvrinit(void)4456 rfs3_srvrinit(void)
4457 {
4458 	nfs3_srv_caller_id = fs_new_caller_id();
4459 }
4460 
4461 void
rfs3_srvrfini(void)4462 rfs3_srvrfini(void)
4463 {
4464 	/* Nothing to do */
4465 }
4466