1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright 2018 Nexenta Systems, Inc.
29  */
30 
31 #include <sys/systm.h>
32 #include <rpc/auth.h>
33 #include <rpc/clnt.h>
34 #include <nfs/nfs4_kprot.h>
35 #include <nfs/nfs4.h>
36 #include <nfs/lm.h>
37 #include <sys/cmn_err.h>
38 #include <sys/disp.h>
39 #include <sys/sdt.h>
40 
41 #include <sys/pathname.h>
42 
43 #include <sys/strsubr.h>
44 #include <sys/ddi.h>
45 
46 #include <sys/vnode.h>
47 #include <sys/sdt.h>
48 #include <inet/common.h>
49 #include <inet/ip.h>
50 #include <inet/ip6.h>
51 
52 #define	MAX_READ_DELEGATIONS 5
53 
54 static int rfs4_deleg_disabled;
55 static int rfs4_max_setup_cb_tries = 5;
56 
57 #ifdef DEBUG
58 
59 int rfs4_cb_null;
60 int rfs4_cb_debug;
61 int rfs4_deleg_debug;
62 
63 #endif
64 
65 static void rfs4_recall_file(rfs4_file_t *,
66     void (*recall)(rfs4_deleg_state_t *, bool_t),
67     bool_t, rfs4_client_t *);
68 static	void		rfs4_revoke_file(rfs4_file_t *);
69 static	void		rfs4_cb_chflush(rfs4_cbinfo_t *);
70 static	CLIENT		*rfs4_cb_getch(rfs4_cbinfo_t *);
71 static	void		rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t);
72 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *,
73     open_delegation_type4, int *);
74 
75 /*
76  * Convert a universal address to an transport specific
77  * address using inet_pton.
78  */
79 static int
uaddr2sockaddr(int af,char * ua,void * ap,in_port_t * pp)80 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp)
81 {
82 	int dots = 0, i, j, len, k;
83 	unsigned char c;
84 	in_port_t port = 0;
85 
86 	len = strlen(ua);
87 
88 	for (i = len-1; i >= 0; i--) {
89 
90 		if (ua[i] == '.')
91 			dots++;
92 
93 		if (dots == 2) {
94 
95 			ua[i] = '\0';
96 			/*
97 			 * We use k to remember were to stick '.' back, since
98 			 * ua was kmem_allocateded from the pool len+1.
99 			 */
100 			k = i;
101 			if (inet_pton(af, ua, ap) == 1) {
102 
103 				c = 0;
104 
105 				for (j = i+1; j < len; j++) {
106 					if (ua[j] == '.') {
107 						port = c << 8;
108 						c = 0;
109 					} else if (ua[j] >= '0' &&
110 					    ua[j] <= '9') {
111 						c *= 10;
112 						c += ua[j] - '0';
113 					} else {
114 						ua[k] = '.';
115 						return (EINVAL);
116 					}
117 				}
118 				port += c;
119 
120 				*pp = htons(port);
121 
122 				ua[k] = '.';
123 				return (0);
124 			} else {
125 				ua[k] = '.';
126 				return (EINVAL);
127 			}
128 		}
129 	}
130 
131 	return (EINVAL);
132 }
133 
134 /*
135  * Update the delegation policy with the
136  * value of "new_policy"
137  */
138 void
rfs4_set_deleg_policy(nfs4_srv_t * nsrv4,srv_deleg_policy_t new_policy)139 rfs4_set_deleg_policy(nfs4_srv_t *nsrv4, srv_deleg_policy_t new_policy)
140 {
141 	rw_enter(&nsrv4->deleg_policy_lock, RW_WRITER);
142 	nsrv4->nfs4_deleg_policy = new_policy;
143 	rw_exit(&nsrv4->deleg_policy_lock);
144 }
145 
146 void
rfs4_hold_deleg_policy(nfs4_srv_t * nsrv4)147 rfs4_hold_deleg_policy(nfs4_srv_t *nsrv4)
148 {
149 	rw_enter(&nsrv4->deleg_policy_lock, RW_READER);
150 }
151 
152 void
rfs4_rele_deleg_policy(nfs4_srv_t * nsrv4)153 rfs4_rele_deleg_policy(nfs4_srv_t *nsrv4)
154 {
155 	rw_exit(&nsrv4->deleg_policy_lock);
156 }
157 
158 srv_deleg_policy_t
nfs4_get_deleg_policy()159 nfs4_get_deleg_policy()
160 {
161 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
162 	return (nsrv4->nfs4_deleg_policy);
163 }
164 
165 
166 /*
167  * This free function is to be used when the client struct is being
168  * released and nothing at all is needed of the callback info any
169  * longer.
170  */
171 void
rfs4_cbinfo_free(rfs4_cbinfo_t * cbp)172 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp)
173 {
174 	char *addr = cbp->cb_callback.cb_location.r_addr;
175 	char *netid = cbp->cb_callback.cb_location.r_netid;
176 
177 	/* Free old address if any */
178 
179 	if (addr)
180 		kmem_free(addr, strlen(addr) + 1);
181 	if (netid)
182 		kmem_free(netid, strlen(netid) + 1);
183 
184 	addr = cbp->cb_newer.cb_callback.cb_location.r_addr;
185 	netid = cbp->cb_newer.cb_callback.cb_location.r_netid;
186 
187 	if (addr)
188 		kmem_free(addr, strlen(addr) + 1);
189 	if (netid)
190 		kmem_free(netid, strlen(netid) + 1);
191 
192 	if (cbp->cb_chc_free) {
193 		rfs4_cb_chflush(cbp);
194 	}
195 }
196 
197 /*
198  * The server uses this to check the callback path supplied by the
199  * client.  The callback connection is marked "in progress" while this
200  * work is going on and then eventually marked either OK or FAILED.
201  * This work can be done as part of a separate thread and at the end
202  * of this the thread will exit or it may be done such that the caller
203  * will continue with other work.
204  */
205 static void
rfs4_do_cb_null(rfs4_client_t * cp)206 rfs4_do_cb_null(rfs4_client_t *cp)
207 {
208 	struct timeval tv;
209 	CLIENT *ch;
210 	rfs4_cbstate_t newstate;
211 	rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
212 
213 	mutex_enter(cbp->cb_lock);
214 	/* If another thread is doing CB_NULL RPC then return */
215 	if (cbp->cb_nullcaller == TRUE) {
216 		mutex_exit(cbp->cb_lock);
217 		rfs4_client_rele(cp);
218 		zthread_exit();
219 	}
220 
221 	/* Mark the cbinfo as having a thread in the NULL callback */
222 	cbp->cb_nullcaller = TRUE;
223 
224 	/*
225 	 * Are there other threads still using the cbinfo client
226 	 * handles?  If so, this thread must wait before going and
227 	 * mucking aroiund with the callback information
228 	 */
229 	while (cbp->cb_refcnt != 0)
230 		cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock);
231 
232 	/*
233 	 * This thread itself may find that new callback info has
234 	 * arrived and is set up to handle this case and redrive the
235 	 * call to the client's callback server.
236 	 */
237 retry:
238 	if (cbp->cb_newer.cb_new == TRUE &&
239 	    cbp->cb_newer.cb_confirmed == TRUE) {
240 		char *addr = cbp->cb_callback.cb_location.r_addr;
241 		char *netid = cbp->cb_callback.cb_location.r_netid;
242 
243 		/*
244 		 * Free the old stuff if it exists; may be the first
245 		 * time through this path
246 		 */
247 		if (addr)
248 			kmem_free(addr, strlen(addr) + 1);
249 		if (netid)
250 			kmem_free(netid, strlen(netid) + 1);
251 
252 		/* Move over the addr/netid */
253 		cbp->cb_callback.cb_location.r_addr =
254 		    cbp->cb_newer.cb_callback.cb_location.r_addr;
255 		cbp->cb_newer.cb_callback.cb_location.r_addr = NULL;
256 		cbp->cb_callback.cb_location.r_netid =
257 		    cbp->cb_newer.cb_callback.cb_location.r_netid;
258 		cbp->cb_newer.cb_callback.cb_location.r_netid = NULL;
259 
260 		/* Get the program number */
261 		cbp->cb_callback.cb_program =
262 		    cbp->cb_newer.cb_callback.cb_program;
263 		cbp->cb_newer.cb_callback.cb_program = 0;
264 
265 		/* Don't forget the protocol's "cb_ident" field */
266 		cbp->cb_ident = cbp->cb_newer.cb_ident;
267 		cbp->cb_newer.cb_ident = 0;
268 
269 		/* no longer new */
270 		cbp->cb_newer.cb_new = FALSE;
271 		cbp->cb_newer.cb_confirmed = FALSE;
272 
273 		/* get rid of the old client handles that may exist */
274 		rfs4_cb_chflush(cbp);
275 
276 		cbp->cb_state = CB_NONE;
277 		cbp->cb_timefailed = 0; /* reset the clock */
278 		cbp->cb_notified_of_cb_path_down = TRUE;
279 	}
280 
281 	if (cbp->cb_state != CB_NONE) {
282 		cv_broadcast(cbp->cb_cv);	/* let the others know */
283 		cbp->cb_nullcaller = FALSE;
284 		mutex_exit(cbp->cb_lock);
285 		rfs4_client_rele(cp);
286 		zthread_exit();
287 	}
288 
289 	/* mark rfs4_client_t as CALLBACK NULL in progress */
290 	cbp->cb_state = CB_INPROG;
291 	mutex_exit(cbp->cb_lock);
292 
293 	/* get/generate a client handle */
294 	if ((ch = rfs4_cb_getch(cbp)) == NULL) {
295 		mutex_enter(cbp->cb_lock);
296 		cbp->cb_state = CB_BAD;
297 		cbp->cb_timefailed = gethrestime_sec(); /* observability */
298 		goto retry;
299 	}
300 
301 
302 	tv.tv_sec = 30;
303 	tv.tv_usec = 0;
304 	if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) {
305 		newstate = CB_BAD;
306 	} else {
307 		newstate = CB_OK;
308 #ifdef	DEBUG
309 		rfs4_cb_null++;
310 #endif
311 	}
312 
313 	/* Check to see if the client has specified new callback info */
314 	mutex_enter(cbp->cb_lock);
315 	rfs4_cb_freech(cbp, ch, TRUE);
316 	if (cbp->cb_newer.cb_new == TRUE &&
317 	    cbp->cb_newer.cb_confirmed == TRUE) {
318 		goto retry;	/* give the CB_NULL another chance */
319 	}
320 
321 	cbp->cb_state = newstate;
322 	if (cbp->cb_state == CB_BAD)
323 		cbp->cb_timefailed = gethrestime_sec(); /* observability */
324 
325 	cv_broadcast(cbp->cb_cv);	/* start up the other threads */
326 	cbp->cb_nullcaller = FALSE;
327 	mutex_exit(cbp->cb_lock);
328 	rfs4_client_rele(cp);
329 	zthread_exit();
330 }
331 
332 /*
333  * Given a client struct, inspect the callback info to see if the
334  * callback path is up and available.
335  *
336  * If new callback path is available and no one has set it up then
337  * try to set it up. If setup is not successful after 5 tries (5 secs)
338  * then gives up and returns NULL.
339  *
340  * If callback path is being initialized, then wait for the CB_NULL RPC
341  * call to occur.
342  */
343 static rfs4_cbinfo_t *
rfs4_cbinfo_hold(rfs4_client_t * cp)344 rfs4_cbinfo_hold(rfs4_client_t *cp)
345 {
346 	rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
347 	int retries = 0;
348 
349 	mutex_enter(cbp->cb_lock);
350 
351 	while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) {
352 		/*
353 		 * Looks like a new callback path may be available and
354 		 * noone has set it up.
355 		 */
356 		mutex_exit(cbp->cb_lock);
357 		rfs4_dbe_hold(cp->rc_dbe);
358 		rfs4_do_cb_null(cp); /* caller will release client hold */
359 
360 		mutex_enter(cbp->cb_lock);
361 		/*
362 		 * If callback path is no longer new, or it's being setup
363 		 * then stop and wait for it to be done.
364 		 */
365 		if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE)
366 			break;
367 		mutex_exit(cbp->cb_lock);
368 
369 		if (++retries >= rfs4_max_setup_cb_tries)
370 			return (NULL);
371 		delay(hz);
372 		mutex_enter(cbp->cb_lock);
373 	}
374 
375 	/* Is there a thread working on doing the CB_NULL RPC? */
376 	if (cbp->cb_nullcaller == TRUE)
377 		cv_wait(cbp->cb_cv, cbp->cb_lock);  /* if so, wait on it */
378 
379 	/* If the callback path is not okay (up and running), just quit */
380 	if (cbp->cb_state != CB_OK) {
381 		mutex_exit(cbp->cb_lock);
382 		return (NULL);
383 	}
384 
385 	/* Let someone know we are using the current callback info */
386 	cbp->cb_refcnt++;
387 	mutex_exit(cbp->cb_lock);
388 	return (cbp);
389 }
390 
391 /*
392  * The caller is done with the callback info.  It may be that the
393  * caller's RPC failed and the NFSv4 client has actually provided new
394  * callback information.  If so, let the caller know so they can
395  * advantage of this and maybe retry the RPC that originally failed.
396  */
397 static int
rfs4_cbinfo_rele(rfs4_cbinfo_t * cbp,rfs4_cbstate_t newstate)398 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate)
399 {
400 	int cb_new = FALSE;
401 
402 	mutex_enter(cbp->cb_lock);
403 
404 	/* The caller gets a chance to mark the callback info as bad */
405 	if (newstate != CB_NOCHANGE)
406 		cbp->cb_state = newstate;
407 	if (newstate == CB_FAILED) {
408 		cbp->cb_timefailed = gethrestime_sec(); /* observability */
409 		cbp->cb_notified_of_cb_path_down = FALSE;
410 	}
411 
412 	cbp->cb_refcnt--;	/* no longer using the information */
413 
414 	/*
415 	 * A thread may be waiting on this one to finish and if so,
416 	 * let it know that it is okay to do the CB_NULL to the
417 	 * client's callback server.
418 	 */
419 	if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller)
420 		cv_broadcast(cbp->cb_cv_nullcaller);
421 
422 	/*
423 	 * If this is the last thread to use the callback info and
424 	 * there is new callback information to try and no thread is
425 	 * there ready to do the CB_NULL, then return true to teh
426 	 * caller so they can do the CB_NULL
427 	 */
428 	if (cbp->cb_refcnt == 0 &&
429 	    cbp->cb_nullcaller == FALSE &&
430 	    cbp->cb_newer.cb_new == TRUE &&
431 	    cbp->cb_newer.cb_confirmed == TRUE)
432 		cb_new = TRUE;
433 
434 	mutex_exit(cbp->cb_lock);
435 
436 	return (cb_new);
437 }
438 
439 /*
440  * Given the information in the callback info struct, create a client
441  * handle that can be used by the server for its callback path.
442  */
443 static CLIENT *
rfs4_cbch_init(rfs4_cbinfo_t * cbp)444 rfs4_cbch_init(rfs4_cbinfo_t *cbp)
445 {
446 	struct knetconfig knc;
447 	vnode_t *vp;
448 	struct sockaddr_in addr4;
449 	struct sockaddr_in6 addr6;
450 	void *addr, *taddr;
451 	in_port_t *pp;
452 	int af;
453 	char *devnam;
454 	struct netbuf nb;
455 	int size;
456 	CLIENT *ch = NULL;
457 	int useresvport = 0;
458 
459 	mutex_enter(cbp->cb_lock);
460 
461 	if (cbp->cb_callback.cb_location.r_netid == NULL ||
462 	    cbp->cb_callback.cb_location.r_addr == NULL) {
463 		goto cb_init_out;
464 	}
465 
466 	if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) {
467 		knc.knc_semantics = NC_TPI_COTS;
468 		knc.knc_protofmly = "inet";
469 		knc.knc_proto = "tcp";
470 		devnam = "/dev/tcp";
471 		af = AF_INET;
472 	} else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp")
473 	    == 0) {
474 		knc.knc_semantics = NC_TPI_CLTS;
475 		knc.knc_protofmly = "inet";
476 		knc.knc_proto = "udp";
477 		devnam = "/dev/udp";
478 		af = AF_INET;
479 	} else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6")
480 	    == 0) {
481 		knc.knc_semantics = NC_TPI_COTS;
482 		knc.knc_protofmly = "inet6";
483 		knc.knc_proto = "tcp";
484 		devnam = "/dev/tcp6";
485 		af = AF_INET6;
486 	} else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6")
487 	    == 0) {
488 		knc.knc_semantics = NC_TPI_CLTS;
489 		knc.knc_protofmly = "inet6";
490 		knc.knc_proto = "udp";
491 		devnam = "/dev/udp6";
492 		af = AF_INET6;
493 	} else {
494 		goto cb_init_out;
495 	}
496 
497 	if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) {
498 
499 		goto cb_init_out;
500 	}
501 
502 	if (vp->v_type != VCHR) {
503 		VN_RELE(vp);
504 		goto cb_init_out;
505 	}
506 
507 	knc.knc_rdev = vp->v_rdev;
508 
509 	VN_RELE(vp);
510 
511 	if (af == AF_INET) {
512 		size = sizeof (addr4);
513 		bzero(&addr4, size);
514 		addr4.sin_family = (sa_family_t)af;
515 		addr = &addr4.sin_addr;
516 		pp = &addr4.sin_port;
517 		taddr = &addr4;
518 	} else /* AF_INET6 */ {
519 		size = sizeof (addr6);
520 		bzero(&addr6, size);
521 		addr6.sin6_family = (sa_family_t)af;
522 		addr = &addr6.sin6_addr;
523 		pp = &addr6.sin6_port;
524 		taddr = &addr6;
525 	}
526 
527 	if (uaddr2sockaddr(af,
528 	    cbp->cb_callback.cb_location.r_addr, addr, pp)) {
529 
530 		goto cb_init_out;
531 	}
532 
533 
534 	nb.maxlen = nb.len = size;
535 	nb.buf = (char *)taddr;
536 
537 	if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program,
538 	    NFS_CB, 0, 0, curthread->t_cred, &ch)) {
539 
540 		ch = NULL;
541 	}
542 
543 	/* turn off reserved port usage */
544 	(void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport);
545 
546 cb_init_out:
547 	mutex_exit(cbp->cb_lock);
548 	return (ch);
549 }
550 
551 /*
552  * Iterate over the client handle cache and
553  * destroy it.
554  */
555 static void
rfs4_cb_chflush(rfs4_cbinfo_t * cbp)556 rfs4_cb_chflush(rfs4_cbinfo_t *cbp)
557 {
558 	CLIENT *ch;
559 
560 	while (cbp->cb_chc_free) {
561 		cbp->cb_chc_free--;
562 		ch = cbp->cb_chc[cbp->cb_chc_free];
563 		cbp->cb_chc[cbp->cb_chc_free] = NULL;
564 		if (ch) {
565 			if (ch->cl_auth)
566 				auth_destroy(ch->cl_auth);
567 			clnt_destroy(ch);
568 		}
569 	}
570 }
571 
572 /*
573  * Return a client handle, either from a the small
574  * rfs4_client_t cache or one that we just created.
575  */
576 static CLIENT *
rfs4_cb_getch(rfs4_cbinfo_t * cbp)577 rfs4_cb_getch(rfs4_cbinfo_t *cbp)
578 {
579 	CLIENT *cbch = NULL;
580 	uint32_t zilch = 0;
581 
582 	mutex_enter(cbp->cb_lock);
583 
584 	if (cbp->cb_chc_free) {
585 		cbp->cb_chc_free--;
586 		cbch = cbp->cb_chc[ cbp->cb_chc_free ];
587 		mutex_exit(cbp->cb_lock);
588 		(void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch);
589 		return (cbch);
590 	}
591 
592 	mutex_exit(cbp->cb_lock);
593 
594 	/* none free so make it now */
595 	cbch = rfs4_cbch_init(cbp);
596 
597 	return (cbch);
598 }
599 
600 /*
601  * Return the client handle to the small cache or
602  * destroy it.
603  */
604 static void
rfs4_cb_freech(rfs4_cbinfo_t * cbp,CLIENT * ch,bool_t lockheld)605 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld)
606 {
607 	if (lockheld == FALSE)
608 		mutex_enter(cbp->cb_lock);
609 
610 	if (cbp->cb_chc_free < RFS4_CBCH_MAX) {
611 		cbp->cb_chc[ cbp->cb_chc_free++ ] = ch;
612 		if (lockheld == FALSE)
613 			mutex_exit(cbp->cb_lock);
614 		return;
615 	}
616 	if (lockheld == FALSE)
617 		mutex_exit(cbp->cb_lock);
618 
619 	/*
620 	 * cache maxed out of free entries, obliterate
621 	 * this client handle, destroy it, throw it away.
622 	 */
623 	if (ch->cl_auth)
624 		auth_destroy(ch->cl_auth);
625 	clnt_destroy(ch);
626 }
627 
628 /*
629  * With the supplied callback information - initialize the client
630  * callback data.  If there is a callback in progress, save the
631  * callback info so that a thread can pick it up in the future.
632  */
633 void
rfs4_client_setcb(rfs4_client_t * cp,cb_client4 * cb,uint32_t cb_ident)634 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident)
635 {
636 	char *addr = NULL;
637 	char *netid = NULL;
638 	rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
639 	size_t len;
640 
641 	/* Set the call back for the client */
642 	if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' &&
643 	    cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') {
644 		len = strlen(cb->cb_location.r_addr) + 1;
645 		addr = kmem_alloc(len, KM_SLEEP);
646 		bcopy(cb->cb_location.r_addr, addr, len);
647 		len = strlen(cb->cb_location.r_netid) + 1;
648 		netid = kmem_alloc(len, KM_SLEEP);
649 		bcopy(cb->cb_location.r_netid, netid, len);
650 	}
651 	/* ready to save the new information but first free old, if exists */
652 	mutex_enter(cbp->cb_lock);
653 
654 	cbp->cb_newer.cb_callback.cb_program = cb->cb_program;
655 
656 	if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL)
657 		kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr,
658 		    strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1);
659 	cbp->cb_newer.cb_callback.cb_location.r_addr = addr;
660 
661 	if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL)
662 		kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid,
663 		    strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1);
664 	cbp->cb_newer.cb_callback.cb_location.r_netid = netid;
665 
666 	cbp->cb_newer.cb_ident = cb_ident;
667 
668 	if (addr && *addr && netid && *netid) {
669 		cbp->cb_newer.cb_new = TRUE;
670 		cbp->cb_newer.cb_confirmed = FALSE;
671 	} else {
672 		cbp->cb_newer.cb_new = FALSE;
673 		cbp->cb_newer.cb_confirmed = FALSE;
674 	}
675 
676 	mutex_exit(cbp->cb_lock);
677 }
678 
679 /*
680  * The server uses this when processing SETCLIENTID_CONFIRM.  Callback
681  * information may have been provided on SETCLIENTID and this call
682  * marks that information as confirmed and then starts a thread to
683  * test the callback path.
684  */
685 void
rfs4_deleg_cb_check(rfs4_client_t * cp)686 rfs4_deleg_cb_check(rfs4_client_t *cp)
687 {
688 	if (cp->rc_cbinfo.cb_newer.cb_new == FALSE)
689 		return;
690 
691 	cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE;
692 
693 	rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */
694 
695 	(void) zthread_create(NULL, 0, rfs4_do_cb_null, cp, 0,
696 	    minclsyspri);
697 }
698 
699 static void
rfs4args_cb_recall_free(nfs_cb_argop4 * argop)700 rfs4args_cb_recall_free(nfs_cb_argop4 *argop)
701 {
702 	CB_RECALL4args	*rec_argp;
703 
704 	rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
705 	if (rec_argp->fh.nfs_fh4_val)
706 		kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len);
707 }
708 
709 /* ARGSUSED */
710 static void
rfs4args_cb_getattr_free(nfs_cb_argop4 * argop)711 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop)
712 {
713 	CB_GETATTR4args *argp;
714 
715 	argp = &argop->nfs_cb_argop4_u.opcbgetattr;
716 	if (argp->fh.nfs_fh4_val)
717 		kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len);
718 }
719 
720 static void
rfs4freeargres(CB_COMPOUND4args * args,CB_COMPOUND4res * resp)721 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp)
722 {
723 	int i, arglen;
724 	nfs_cb_argop4 *argop;
725 
726 	/*
727 	 * First free any special args alloc'd for specific ops.
728 	 */
729 	arglen = args->array_len;
730 	argop = args->array;
731 	for (i = 0; i < arglen; i++, argop++) {
732 
733 		switch (argop->argop) {
734 		case OP_CB_RECALL:
735 			rfs4args_cb_recall_free(argop);
736 			break;
737 
738 		case OP_CB_GETATTR:
739 			rfs4args_cb_getattr_free(argop);
740 			break;
741 
742 		default:
743 			return;
744 		}
745 	}
746 
747 	if (args->tag.utf8string_len > 0)
748 		UTF8STRING_FREE(args->tag)
749 
750 	kmem_free(args->array, arglen * sizeof (nfs_cb_argop4));
751 	if (resp)
752 		xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp);
753 }
754 
755 /*
756  * General callback routine for the server to the client.
757  */
758 static enum clnt_stat
rfs4_do_callback(rfs4_client_t * cp,CB_COMPOUND4args * args,CB_COMPOUND4res * res,struct timeval timeout)759 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args,
760     CB_COMPOUND4res *res, struct timeval timeout)
761 {
762 	rfs4_cbinfo_t *cbp;
763 	CLIENT *ch;
764 	/* start with this in case cb_getch() fails */
765 	enum clnt_stat	stat = RPC_FAILED;
766 
767 	res->tag.utf8string_val = NULL;
768 	res->array = NULL;
769 
770 retry:
771 	cbp = rfs4_cbinfo_hold(cp);
772 	if (cbp == NULL)
773 		return (stat);
774 
775 	/* get a client handle */
776 	if ((ch = rfs4_cb_getch(cbp)) != NULL) {
777 		/*
778 		 * reset the cb_ident since it may have changed in
779 		 * rfs4_cbinfo_hold()
780 		 */
781 		args->callback_ident = cbp->cb_ident;
782 
783 		stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv,
784 		    (caddr_t)args, xdr_CB_COMPOUND4res,
785 		    (caddr_t)res, timeout);
786 
787 		/* free client handle */
788 		rfs4_cb_freech(cbp, ch, FALSE);
789 	}
790 
791 	/*
792 	 * If the rele says that there may be new callback info then
793 	 * retry this sequence and it may succeed as a result of the
794 	 * new callback path
795 	 */
796 	if (rfs4_cbinfo_rele(cbp,
797 	    (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE)
798 		goto retry;
799 
800 	return (stat);
801 }
802 
803 /*
804  * Used by the NFSv4 server to get attributes for a file while
805  * handling the case where a file has been write delegated.  For the
806  * time being, VOP_GETATTR() is called and CB_GETATTR processing is
807  * not undertaken.  This call site is maintained in case the server is
808  * updated in the future to handle write delegation space guarantees.
809  */
810 nfsstat4
rfs4_vop_getattr(vnode_t * vp,vattr_t * vap,int flag,cred_t * cr)811 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
812 {
813 
814 	int error;
815 
816 	error = VOP_GETATTR(vp, vap, flag, cr, NULL);
817 	return (puterrno4(error));
818 }
819 
820 /*
821  * This is used everywhere in the v2/v3 server to allow the
822  * integration of all NFS versions and the support of delegation.  For
823  * now, just call the VOP_GETATTR().  If the NFSv4 server is enhanced
824  * in the future to provide space guarantees for write delegations
825  * then this call site should be expanded to interact with the client.
826  */
827 int
rfs4_delegated_getattr(vnode_t * vp,vattr_t * vap,int flag,cred_t * cr)828 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
829 {
830 	return (VOP_GETATTR(vp, vap, flag, cr, NULL));
831 }
832 
833 /*
834  * Place the actual cb_recall otw call to client.
835  */
836 static void
rfs4_do_cb_recall(rfs4_deleg_state_t * dsp,bool_t trunc)837 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc)
838 {
839 	CB_COMPOUND4args	cb4_args;
840 	CB_COMPOUND4res		cb4_res;
841 	CB_RECALL4args		*rec_argp;
842 	CB_RECALL4res		*rec_resp;
843 	nfs_cb_argop4		*argop;
844 	int			numops;
845 	int			argoplist_size;
846 	struct timeval		timeout;
847 	nfs_fh4			*fhp;
848 	enum clnt_stat		call_stat;
849 
850 	/*
851 	 * set up the compound args
852 	 */
853 	numops = 1;	/* CB_RECALL only */
854 
855 	argoplist_size = numops * sizeof (nfs_cb_argop4);
856 	argop = kmem_zalloc(argoplist_size, KM_SLEEP);
857 	argop->argop = OP_CB_RECALL;
858 	rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
859 
860 	(void) str_to_utf8("cb_recall", &cb4_args.tag);
861 	cb4_args.minorversion = CB4_MINORVERSION;
862 	/* cb4_args.callback_ident is set in rfs4_do_callback() */
863 	cb4_args.array_len = numops;
864 	cb4_args.array = argop;
865 
866 	/*
867 	 * fill in the args struct
868 	 */
869 	bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4));
870 	rec_argp->truncate = trunc;
871 
872 	fhp = &dsp->rds_finfo->rf_filehandle;
873 	rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) *
874 	    fhp->nfs_fh4_len, KM_SLEEP);
875 	nfs_fh4_copy(fhp, &rec_argp->fh);
876 
877 	/* Keep track of when we did this for observability */
878 	dsp->rds_time_recalled = gethrestime_sec();
879 
880 	/*
881 	 * Set up the timeout for the callback and make the actual call.
882 	 * Timeout will be 80% of the lease period for this server.
883 	 */
884 	timeout.tv_sec = (rfs4_lease_time * 80) / 100;
885 	timeout.tv_usec = 0;
886 
887 	DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client,
888 	    rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp);
889 
890 	call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res,
891 	    timeout);
892 
893 	rec_resp = (cb4_res.array_len == 0) ? NULL :
894 	    &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall;
895 
896 	DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client,
897 	    rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp);
898 
899 	if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) {
900 		rfs4_return_deleg(dsp, TRUE);
901 	}
902 
903 	rfs4freeargres(&cb4_args, &cb4_res);
904 }
905 
906 struct recall_arg {
907 	rfs4_deleg_state_t *dsp;
908 	void (*recall)(rfs4_deleg_state_t *, bool_t trunc);
909 	bool_t trunc;
910 };
911 
912 static void
do_recall(struct recall_arg * arg)913 do_recall(struct recall_arg *arg)
914 {
915 	rfs4_deleg_state_t *dsp = arg->dsp;
916 	rfs4_file_t *fp = dsp->rds_finfo;
917 	callb_cpr_t cpr_info;
918 	kmutex_t cpr_lock;
919 
920 	mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
921 	CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall");
922 
923 	/*
924 	 * It is possible that before this thread starts
925 	 * the client has send us a return_delegation, and
926 	 * if that is the case we do not need to send the
927 	 * recall callback.
928 	 */
929 	if (dsp->rds_dtype != OPEN_DELEGATE_NONE) {
930 		DTRACE_PROBE3(nfss__i__recall,
931 		    struct recall_arg *, arg,
932 		    struct rfs4_deleg_state_t *, dsp,
933 		    struct rfs4_file_t *, fp);
934 
935 		if (arg->recall)
936 			(void) (*arg->recall)(dsp, arg->trunc);
937 	}
938 
939 	mutex_enter(fp->rf_dinfo.rd_recall_lock);
940 	/*
941 	 * Recall count may go negative if the parent thread that is
942 	 * creating the individual callback threads does not modify
943 	 * the recall_count field before the callback thread actually
944 	 * gets a response from the CB_RECALL
945 	 */
946 	fp->rf_dinfo.rd_recall_count--;
947 	if (fp->rf_dinfo.rd_recall_count == 0)
948 		cv_signal(fp->rf_dinfo.rd_recall_cv);
949 	mutex_exit(fp->rf_dinfo.rd_recall_lock);
950 
951 	mutex_enter(&cpr_lock);
952 	CALLB_CPR_EXIT(&cpr_info);
953 	mutex_destroy(&cpr_lock);
954 
955 	rfs4_deleg_state_rele(dsp); /* release the hold for this thread */
956 	kmem_free(arg, sizeof (struct recall_arg));
957 	zthread_exit();
958 }
959 
960 struct master_recall_args {
961     rfs4_file_t *fp;
962     void (*recall)(rfs4_deleg_state_t *, bool_t);
963     bool_t trunc;
964 };
965 
966 static void
do_recall_file(struct master_recall_args * map)967 do_recall_file(struct master_recall_args *map)
968 {
969 	rfs4_file_t *fp = map->fp;
970 	rfs4_deleg_state_t *dsp;
971 	struct recall_arg *arg;
972 	callb_cpr_t cpr_info;
973 	kmutex_t cpr_lock;
974 	int32_t recall_count;
975 
976 	rfs4_dbe_lock(fp->rf_dbe);
977 
978 	/* Recall already in progress ? */
979 	mutex_enter(fp->rf_dinfo.rd_recall_lock);
980 	if (fp->rf_dinfo.rd_recall_count != 0) {
981 		mutex_exit(fp->rf_dinfo.rd_recall_lock);
982 		rfs4_dbe_rele_nolock(fp->rf_dbe);
983 		rfs4_dbe_unlock(fp->rf_dbe);
984 		kmem_free(map, sizeof (struct master_recall_args));
985 		zthread_exit();
986 	}
987 
988 	mutex_exit(fp->rf_dinfo.rd_recall_lock);
989 
990 	mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
991 	CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr,	"v4RecallFile");
992 
993 	recall_count = 0;
994 	for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
995 	    dsp = list_next(&fp->rf_delegstatelist, dsp)) {
996 
997 		rfs4_dbe_lock(dsp->rds_dbe);
998 		/*
999 		 * if this delegation state
1000 		 * is being reaped skip it
1001 		 */
1002 		if (rfs4_dbe_is_invalid(dsp->rds_dbe)) {
1003 			rfs4_dbe_unlock(dsp->rds_dbe);
1004 			continue;
1005 		}
1006 
1007 		/* hold for receiving thread */
1008 		rfs4_dbe_hold(dsp->rds_dbe);
1009 		rfs4_dbe_unlock(dsp->rds_dbe);
1010 
1011 		arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP);
1012 		arg->recall = map->recall;
1013 		arg->trunc = map->trunc;
1014 		arg->dsp = dsp;
1015 
1016 		recall_count++;
1017 
1018 		(void) zthread_create(NULL, 0, do_recall, arg, 0,
1019 		    minclsyspri);
1020 	}
1021 
1022 	rfs4_dbe_unlock(fp->rf_dbe);
1023 
1024 	mutex_enter(fp->rf_dinfo.rd_recall_lock);
1025 	/*
1026 	 * Recall count may go negative if the parent thread that is
1027 	 * creating the individual callback threads does not modify
1028 	 * the recall_count field before the callback thread actually
1029 	 * gets a response from the CB_RECALL
1030 	 */
1031 	fp->rf_dinfo.rd_recall_count += recall_count;
1032 	while (fp->rf_dinfo.rd_recall_count)
1033 		cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock);
1034 
1035 	mutex_exit(fp->rf_dinfo.rd_recall_lock);
1036 
1037 	DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp);
1038 	rfs4_file_rele(fp);
1039 	kmem_free(map, sizeof (struct master_recall_args));
1040 	mutex_enter(&cpr_lock);
1041 	CALLB_CPR_EXIT(&cpr_info);
1042 	mutex_destroy(&cpr_lock);
1043 	zthread_exit();
1044 }
1045 
1046 static void
rfs4_recall_file(rfs4_file_t * fp,void (* recall)(rfs4_deleg_state_t *,bool_t trunc),bool_t trunc,rfs4_client_t * cp)1047 rfs4_recall_file(rfs4_file_t *fp,
1048     void (*recall)(rfs4_deleg_state_t *, bool_t trunc),
1049     bool_t trunc, rfs4_client_t *cp)
1050 {
1051 	struct master_recall_args *args;
1052 
1053 	rfs4_dbe_lock(fp->rf_dbe);
1054 	if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1055 		rfs4_dbe_unlock(fp->rf_dbe);
1056 		return;
1057 	}
1058 	rfs4_dbe_hold(fp->rf_dbe);	/* hold for new thread */
1059 
1060 	/*
1061 	 * Mark the time we started the recall processing.
1062 	 * If it has been previously recalled, do not reset the
1063 	 * timer since this is used for the revocation decision.
1064 	 */
1065 	if (fp->rf_dinfo.rd_time_recalled == 0)
1066 		fp->rf_dinfo.rd_time_recalled = gethrestime_sec();
1067 	fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */
1068 	/* Client causing recall not always available */
1069 	if (cp)
1070 		fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid;
1071 
1072 	rfs4_dbe_unlock(fp->rf_dbe);
1073 
1074 	args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP);
1075 	args->fp = fp;
1076 	args->recall = recall;
1077 	args->trunc = trunc;
1078 
1079 	(void) zthread_create(NULL, 0, do_recall_file, args, 0,
1080 	    minclsyspri);
1081 }
1082 
1083 void
rfs4_recall_deleg(rfs4_file_t * fp,bool_t trunc,rfs4_client_t * cp)1084 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp)
1085 {
1086 	time_t elapsed1, elapsed2;
1087 
1088 	if (fp->rf_dinfo.rd_time_recalled != 0) {
1089 		elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled;
1090 		elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite;
1091 		/* First check to see if a revocation should occur */
1092 		if (elapsed1 > rfs4_lease_time &&
1093 		    elapsed2 > rfs4_lease_time) {
1094 			rfs4_revoke_file(fp);
1095 			return;
1096 		}
1097 		/*
1098 		 * Next check to see if a recall should be done again
1099 		 * so quickly.
1100 		 */
1101 		if (elapsed1 <= ((rfs4_lease_time * 20) / 100))
1102 			return;
1103 	}
1104 	rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp);
1105 }
1106 
1107 /*
1108  * rfs4_check_recall is called from rfs4_do_open to determine if the current
1109  * open conflicts with the delegation.
1110  * Return true if we need recall otherwise false.
1111  * Assumes entry locks for sp and sp->rs_finfo are held.
1112  */
1113 bool_t
rfs4_check_recall(rfs4_state_t * sp,uint32_t access)1114 rfs4_check_recall(rfs4_state_t *sp, uint32_t access)
1115 {
1116 	open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype;
1117 
1118 	switch (dtype) {
1119 	case OPEN_DELEGATE_NONE:
1120 		/* Not currently delegated so there is nothing to do */
1121 		return (FALSE);
1122 	case OPEN_DELEGATE_READ:
1123 		/*
1124 		 * If the access is only asking for READ then there is
1125 		 * no conflict and nothing to do.  If it is asking
1126 		 * for write, then there will be conflict and the read
1127 		 * delegation should be recalled.
1128 		 */
1129 		if (access == OPEN4_SHARE_ACCESS_READ)
1130 			return (FALSE);
1131 		else
1132 			return (TRUE);
1133 	case OPEN_DELEGATE_WRITE:
1134 		/* Check to see if this client has the delegation */
1135 		return (rfs4_is_deleg(sp));
1136 	}
1137 
1138 	return (FALSE);
1139 }
1140 
1141 /*
1142  * Return the "best" allowable delegation available given the current
1143  * delegation type and the desired access and deny modes on the file.
1144  * At the point that this routine is called we know that the access and
1145  * deny modes are consistent with the file modes.
1146  */
1147 static open_delegation_type4
rfs4_check_delegation(rfs4_state_t * sp,rfs4_file_t * fp)1148 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp)
1149 {
1150 	open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype;
1151 	uint32_t access = sp->rs_share_access;
1152 	uint32_t deny = sp->rs_share_deny;
1153 	int readcnt = 0;
1154 	int writecnt = 0;
1155 
1156 	switch (dtype) {
1157 	case OPEN_DELEGATE_NONE:
1158 		/*
1159 		 * Determine if more than just this OPEN have the file
1160 		 * open and if so, no delegation may be provided to
1161 		 * the client.
1162 		 */
1163 		if (access & OPEN4_SHARE_ACCESS_WRITE)
1164 			writecnt++;
1165 		if (access & OPEN4_SHARE_ACCESS_READ)
1166 			readcnt++;
1167 
1168 		if (fp->rf_access_read > readcnt ||
1169 		    fp->rf_access_write > writecnt)
1170 			return (OPEN_DELEGATE_NONE);
1171 
1172 		/*
1173 		 * If the client is going to write, or if the client
1174 		 * has exclusive access, return a write delegation.
1175 		 */
1176 		if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1177 		    (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE)))
1178 			return (OPEN_DELEGATE_WRITE);
1179 		/*
1180 		 * If we don't want to write or we've haven't denied read
1181 		 * access to others, return a read delegation.
1182 		 */
1183 		if ((access & ~OPEN4_SHARE_ACCESS_WRITE) ||
1184 		    (deny & ~OPEN4_SHARE_DENY_READ))
1185 			return (OPEN_DELEGATE_READ);
1186 
1187 		/* Shouldn't get here */
1188 		return (OPEN_DELEGATE_NONE);
1189 
1190 	case OPEN_DELEGATE_READ:
1191 		/*
1192 		 * If the file is delegated for read but we wan't to
1193 		 * write or deny others to read then we can't delegate
1194 		 * the file. We shouldn't get here since the delegation should
1195 		 * have been recalled already.
1196 		 */
1197 		if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1198 		    (deny & OPEN4_SHARE_DENY_READ))
1199 			return (OPEN_DELEGATE_NONE);
1200 		return (OPEN_DELEGATE_READ);
1201 
1202 	case OPEN_DELEGATE_WRITE:
1203 		return (OPEN_DELEGATE_WRITE);
1204 	}
1205 
1206 	/* Shouldn't get here */
1207 	return (OPEN_DELEGATE_NONE);
1208 }
1209 
1210 /*
1211  * Given the desired delegation type and the "history" of the file
1212  * determine the actual delegation type to return.
1213  */
1214 static open_delegation_type4
rfs4_delegation_policy(nfs4_srv_t * nsrv4,open_delegation_type4 dtype,rfs4_dinfo_t * dinfo,clientid4 cid)1215 rfs4_delegation_policy(nfs4_srv_t *nsrv4, open_delegation_type4 dtype,
1216     rfs4_dinfo_t *dinfo, clientid4 cid)
1217 {
1218 	time_t elapsed;
1219 
1220 	if (nsrv4->nfs4_deleg_policy != SRV_NORMAL_DELEGATE)
1221 		return (OPEN_DELEGATE_NONE);
1222 
1223 	/*
1224 	 * Has this file/delegation ever been recalled?  If not then
1225 	 * no further checks for a delegation race need to be done.
1226 	 * However if a recall has occurred, then check to see if a
1227 	 * client has caused its own delegation recall to occur.  If
1228 	 * not, then has a delegation for this file been returned
1229 	 * recently?  If so, then do not assign a new delegation to
1230 	 * avoid a "delegation race" between the original client and
1231 	 * the new/conflicting client.
1232 	 */
1233 	if (dinfo->rd_ever_recalled == TRUE) {
1234 		if (dinfo->rd_conflicted_client != cid) {
1235 			elapsed = gethrestime_sec() - dinfo->rd_time_returned;
1236 			if (elapsed < rfs4_lease_time)
1237 				return (OPEN_DELEGATE_NONE);
1238 		}
1239 	}
1240 
1241 	/* Limit the number of read grants */
1242 	if (dtype == OPEN_DELEGATE_READ &&
1243 	    dinfo->rd_rdgrants > MAX_READ_DELEGATIONS)
1244 		return (OPEN_DELEGATE_NONE);
1245 
1246 	/*
1247 	 * Should consider limiting total number of read/write
1248 	 * delegations the server will permit.
1249 	 */
1250 
1251 	return (dtype);
1252 }
1253 
1254 /*
1255  * Try and grant a delegation for an open give the state. The routine
1256  * returns the delegation type granted. This could be OPEN_DELEGATE_NONE.
1257  *
1258  * The state and associate file entry must be locked
1259  */
1260 rfs4_deleg_state_t *
rfs4_grant_delegation(delegreq_t dreq,rfs4_state_t * sp,int * recall)1261 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
1262 {
1263 	nfs4_srv_t *nsrv4;
1264 	rfs4_file_t *fp = sp->rs_finfo;
1265 	open_delegation_type4 dtype;
1266 	int no_delegation;
1267 
1268 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1269 	ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1270 
1271 	nsrv4 = nfs4_get_srv();
1272 
1273 	/* Is the server even providing delegations? */
1274 	if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE ||
1275 	    dreq == DELEG_NONE) {
1276 		return (NULL);
1277 	}
1278 
1279 	/* Check to see if delegations have been temporarily disabled */
1280 	mutex_enter(&nsrv4->deleg_lock);
1281 	no_delegation = rfs4_deleg_disabled;
1282 	mutex_exit(&nsrv4->deleg_lock);
1283 
1284 	if (no_delegation)
1285 		return (NULL);
1286 
1287 	/* Don't grant a delegation if a deletion is impending. */
1288 	if (fp->rf_dinfo.rd_hold_grant > 0) {
1289 		return (NULL);
1290 	}
1291 
1292 	/*
1293 	 * Don't grant a delegation if there are any lock manager
1294 	 * (NFSv2/v3) locks for the file.  This is a bit of a hack (e.g.,
1295 	 * if there are only read locks we should be able to grant a
1296 	 * read-only delegation), but it's good enough for now.
1297 	 *
1298 	 * MT safety: the lock manager checks for conflicting delegations
1299 	 * before processing a lock request.  That check will block until
1300 	 * we are done here.  So if the lock manager acquires a lock after
1301 	 * we decide to grant the delegation, the delegation will get
1302 	 * immediately recalled (if there's a conflict), so we're safe.
1303 	 */
1304 	if (lm_vp_active(fp->rf_vp)) {
1305 		return (NULL);
1306 	}
1307 
1308 	/*
1309 	 * Based on the type of delegation request passed in, take the
1310 	 * appropriate action (DELEG_NONE is handled above)
1311 	 */
1312 	switch (dreq) {
1313 
1314 	case DELEG_READ:
1315 	case DELEG_WRITE:
1316 		/*
1317 		 * The server "must" grant the delegation in this case.
1318 		 * Client is using open previous
1319 		 */
1320 		dtype = (open_delegation_type4)dreq;
1321 		*recall = 1;
1322 		break;
1323 	case DELEG_ANY:
1324 		/*
1325 		 * If a valid callback path does not exist, no delegation may
1326 		 * be granted.
1327 		 */
1328 		if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK)
1329 			return (NULL);
1330 
1331 		/*
1332 		 * If the original operation which caused time_rm_delayed
1333 		 * to be set hasn't been retried and completed for one
1334 		 * full lease period, clear it and allow delegations to
1335 		 * get granted again.
1336 		 */
1337 		if (fp->rf_dinfo.rd_time_rm_delayed > 0 &&
1338 		    gethrestime_sec() >
1339 		    fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time)
1340 			fp->rf_dinfo.rd_time_rm_delayed = 0;
1341 
1342 		/*
1343 		 * If we are waiting for a delegation to be returned then
1344 		 * don't delegate this file. We do this for correctness as
1345 		 * well as if the file is being recalled we would likely
1346 		 * recall this file again.
1347 		 */
1348 
1349 		if (fp->rf_dinfo.rd_time_recalled != 0 ||
1350 		    fp->rf_dinfo.rd_time_rm_delayed != 0)
1351 			return (NULL);
1352 
1353 		/* Get the "best" delegation candidate */
1354 		dtype = rfs4_check_delegation(sp, fp);
1355 
1356 		if (dtype == OPEN_DELEGATE_NONE)
1357 			return (NULL);
1358 
1359 		/*
1360 		 * Based on policy and the history of the file get the
1361 		 * actual delegation.
1362 		 */
1363 		dtype = rfs4_delegation_policy(nsrv4, dtype, &fp->rf_dinfo,
1364 		    sp->rs_owner->ro_client->rc_clientid);
1365 
1366 		if (dtype == OPEN_DELEGATE_NONE)
1367 			return (NULL);
1368 		break;
1369 	default:
1370 		return (NULL);
1371 	}
1372 
1373 	/* set the delegation for the state */
1374 	return (rfs4_deleg_state(sp, dtype, recall));
1375 }
1376 
1377 void
rfs4_set_deleg_response(rfs4_deleg_state_t * dsp,open_delegation4 * dp,nfsace4 * ace,int recall)1378 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp,
1379     nfsace4 *ace,  int recall)
1380 {
1381 	open_write_delegation4 *wp;
1382 	open_read_delegation4 *rp;
1383 	nfs_space_limit4 *spl;
1384 	nfsace4 nace;
1385 
1386 	/*
1387 	 * We need to allocate a new copy of the who string.
1388 	 * this string will be freed by the rfs4_op_open dis_resfree
1389 	 * routine. We need to do this allocation since replays will
1390 	 * be allocated and rfs4_compound can't tell the difference from
1391 	 * a replay and an inital open. N.B. if an ace is passed in, it
1392 	 * the caller's responsibility to free it.
1393 	 */
1394 
1395 	if (ace == NULL) {
1396 		/*
1397 		 * Default is to deny all access, the client will have
1398 		 * to contact the server.  XXX Do we want to actually
1399 		 * set a deny for every one, or do we simply want to
1400 		 * construct an entity that will match no one?
1401 		 */
1402 		nace.type = ACE4_ACCESS_DENIED_ACE_TYPE;
1403 		nace.flag = 0;
1404 		nace.access_mask = ACE4_VALID_MASK_BITS;
1405 		(void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who);
1406 	} else {
1407 		nace.type = ace->type;
1408 		nace.flag = ace->flag;
1409 		nace.access_mask = ace->access_mask;
1410 		(void) utf8_copy(&ace->who, &nace.who);
1411 	}
1412 
1413 	dp->delegation_type = dsp->rds_dtype;
1414 
1415 	switch (dsp->rds_dtype) {
1416 	case OPEN_DELEGATE_NONE:
1417 		break;
1418 	case OPEN_DELEGATE_READ:
1419 		rp = &dp->open_delegation4_u.read;
1420 		rp->stateid = dsp->rds_delegid.stateid;
1421 		rp->recall = (bool_t)recall;
1422 		rp->permissions = nace;
1423 		break;
1424 	case OPEN_DELEGATE_WRITE:
1425 		wp = &dp->open_delegation4_u.write;
1426 		wp->stateid = dsp->rds_delegid.stateid;
1427 		wp->recall = (bool_t)recall;
1428 		spl = &wp->space_limit;
1429 		spl->limitby = NFS_LIMIT_SIZE;
1430 		spl->nfs_space_limit4_u.filesize = 0;
1431 		wp->permissions = nace;
1432 		break;
1433 	}
1434 }
1435 
1436 /*
1437  * Check if the file is delegated via the provided file struct.
1438  * Return TRUE if it is delegated.  This is intended for use by
1439  * the v4 server.  The v2/v3 server code should use rfs4_check_delegated().
1440  *
1441  * Note that if the file is found to have a delegation, it is
1442  * recalled, unless the clientid of the caller matches the clientid of the
1443  * delegation. If the caller has specified, there is a slight delay
1444  * inserted in the hopes that the delegation will be returned quickly.
1445  */
1446 bool_t
rfs4_check_delegated_byfp(int mode,rfs4_file_t * fp,bool_t trunc,bool_t do_delay,bool_t is_rm,clientid4 * cp)1447 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp,
1448     bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp)
1449 {
1450 	rfs4_deleg_state_t *dsp;
1451 
1452 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
1453 
1454 	/* Is delegation enabled? */
1455 	if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE)
1456 		return (FALSE);
1457 
1458 	/* do we have a delegation on this file? */
1459 	rfs4_dbe_lock(fp->rf_dbe);
1460 	if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1461 		if (is_rm)
1462 			fp->rf_dinfo.rd_hold_grant++;
1463 		rfs4_dbe_unlock(fp->rf_dbe);
1464 		return (FALSE);
1465 	}
1466 	/*
1467 	 * do we have a write delegation on this file or are we
1468 	 * requesting write access to a file with any type of existing
1469 	 * delegation?
1470 	 */
1471 	if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
1472 		if (cp != NULL) {
1473 			dsp = list_head(&fp->rf_delegstatelist);
1474 			if (dsp == NULL) {
1475 				rfs4_dbe_unlock(fp->rf_dbe);
1476 				return (FALSE);
1477 			}
1478 			/*
1479 			 * Does the requestor already own the delegation?
1480 			 */
1481 			if (dsp->rds_client->rc_clientid == *(cp)) {
1482 				rfs4_dbe_unlock(fp->rf_dbe);
1483 				return (FALSE);
1484 			}
1485 		}
1486 
1487 		rfs4_dbe_unlock(fp->rf_dbe);
1488 		rfs4_recall_deleg(fp, trunc, NULL);
1489 
1490 		if (!do_delay) {
1491 			rfs4_dbe_lock(fp->rf_dbe);
1492 			fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1493 			rfs4_dbe_unlock(fp->rf_dbe);
1494 			return (TRUE);
1495 		}
1496 
1497 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
1498 
1499 		rfs4_dbe_lock(fp->rf_dbe);
1500 		if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) {
1501 			fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1502 			rfs4_dbe_unlock(fp->rf_dbe);
1503 			return (TRUE);
1504 		}
1505 	}
1506 	if (is_rm)
1507 		fp->rf_dinfo.rd_hold_grant++;
1508 	rfs4_dbe_unlock(fp->rf_dbe);
1509 	return (FALSE);
1510 }
1511 
1512 /*
1513  * Check if the file is delegated in the case of a v2 or v3 access.
1514  * Return TRUE if it is delegated which in turn means that v2 should
1515  * drop the request and in the case of v3 JUKEBOX should be returned.
1516  */
1517 bool_t
rfs4_check_delegated(int mode,vnode_t * vp,bool_t trunc)1518 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
1519 {
1520 	nfs4_srv_t *nsrv4;
1521 	rfs4_file_t *fp;
1522 	bool_t create = FALSE;
1523 	bool_t rc = FALSE;
1524 
1525 	nsrv4 = nfs4_get_srv();
1526 	rfs4_hold_deleg_policy(nsrv4);
1527 
1528 	/* Is delegation enabled? */
1529 	if (nsrv4->nfs4_deleg_policy != SRV_NEVER_DELEGATE) {
1530 		fp = rfs4_findfile(vp, NULL, &create);
1531 		if (fp != NULL) {
1532 			if (rfs4_check_delegated_byfp(mode, fp, trunc,
1533 			    TRUE, FALSE, NULL)) {
1534 				rc = TRUE;
1535 			}
1536 			rfs4_file_rele(fp);
1537 		}
1538 	}
1539 	rfs4_rele_deleg_policy(nsrv4);
1540 	return (rc);
1541 }
1542 
1543 /*
1544  * Release a hold on the hold_grant counter which
1545  * prevents delegation from being granted while a remove
1546  * or a rename is in progress.
1547  */
1548 void
rfs4_clear_dont_grant(rfs4_file_t * fp)1549 rfs4_clear_dont_grant(rfs4_file_t *fp)
1550 {
1551 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
1552 
1553 	if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE)
1554 		return;
1555 	rfs4_dbe_lock(fp->rf_dbe);
1556 	ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
1557 	fp->rf_dinfo.rd_hold_grant--;
1558 	fp->rf_dinfo.rd_time_rm_delayed = 0;
1559 	rfs4_dbe_unlock(fp->rf_dbe);
1560 }
1561 
1562 /*
1563  * State support for delegation.
1564  * Set the state delegation type for this state;
1565  * This routine is called from open via rfs4_grant_delegation and the entry
1566  * locks on sp and sp->rs_finfo are assumed.
1567  */
1568 static rfs4_deleg_state_t *
rfs4_deleg_state(rfs4_state_t * sp,open_delegation_type4 dtype,int * recall)1569 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall)
1570 {
1571 	rfs4_file_t *fp = sp->rs_finfo;
1572 	bool_t create = TRUE;
1573 	rfs4_deleg_state_t *dsp;
1574 	vnode_t *vp;
1575 	int open_prev = *recall;
1576 	int ret;
1577 	int fflags = 0;
1578 
1579 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1580 	ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1581 
1582 	/* Shouldn't happen */
1583 	if (fp->rf_dinfo.rd_recall_count != 0 ||
1584 	    (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1585 	    dtype != OPEN_DELEGATE_READ)) {
1586 		return (NULL);
1587 	}
1588 
1589 	/* Unlock to avoid deadlock */
1590 	rfs4_dbe_unlock(fp->rf_dbe);
1591 	rfs4_dbe_unlock(sp->rs_dbe);
1592 
1593 	dsp = rfs4_finddeleg(sp, &create);
1594 
1595 	rfs4_dbe_lock(sp->rs_dbe);
1596 	rfs4_dbe_lock(fp->rf_dbe);
1597 
1598 	if (dsp == NULL)
1599 		return (NULL);
1600 
1601 	/*
1602 	 * It is possible that since we dropped the lock
1603 	 * in order to call finddeleg, the rfs4_file_t
1604 	 * was marked such that we should not grant a
1605 	 * delegation, if so bail out.
1606 	 */
1607 	if (fp->rf_dinfo.rd_hold_grant > 0) {
1608 		rfs4_deleg_state_rele(dsp);
1609 		return (NULL);
1610 	}
1611 
1612 	if (create == FALSE) {
1613 		if (sp->rs_owner->ro_client == dsp->rds_client &&
1614 		    dsp->rds_dtype == dtype) {
1615 			return (dsp);
1616 		} else {
1617 			rfs4_deleg_state_rele(dsp);
1618 			return (NULL);
1619 		}
1620 	}
1621 
1622 	/*
1623 	 * Check that this file has not been delegated to another
1624 	 * client
1625 	 */
1626 	if (fp->rf_dinfo.rd_recall_count != 0 ||
1627 	    fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE ||
1628 	    (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1629 	    dtype != OPEN_DELEGATE_READ)) {
1630 		rfs4_deleg_state_rele(dsp);
1631 		return (NULL);
1632 	}
1633 
1634 	vp = fp->rf_vp;
1635 	/* vnevent_support returns 0 if file system supports vnevents */
1636 	if (vnevent_support(vp, NULL)) {
1637 		rfs4_deleg_state_rele(dsp);
1638 		return (NULL);
1639 	}
1640 
1641 	/* Calculate the fflags for this OPEN. */
1642 	if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)
1643 		fflags |= FREAD;
1644 	if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)
1645 		fflags |= FWRITE;
1646 
1647 	*recall = 0;
1648 	/*
1649 	 * Before granting a delegation we need to know if anyone else has
1650 	 * opened the file in a conflicting mode.  However, first we need to
1651 	 * know how we opened the file to check the counts properly.
1652 	 */
1653 	if (dtype == OPEN_DELEGATE_READ) {
1654 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1655 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1656 		    vn_is_mapped(vp, V_WRITE)) {
1657 			if (open_prev) {
1658 				*recall = 1;
1659 			} else {
1660 				rfs4_deleg_state_rele(dsp);
1661 				return (NULL);
1662 			}
1663 		}
1664 		ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ,
1665 		    rfs4_mon_hold, rfs4_mon_rele);
1666 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1667 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1668 		    vn_is_mapped(vp, V_WRITE)) {
1669 			if (open_prev) {
1670 				*recall = 1;
1671 			} else {
1672 				(void) fem_uninstall(vp, deleg_rdops,
1673 				    (void *)fp);
1674 				rfs4_deleg_state_rele(dsp);
1675 				return (NULL);
1676 			}
1677 		}
1678 		/*
1679 		 * Because a client can hold onto a delegation after the
1680 		 * file has been closed, we need to keep track of the
1681 		 * access to this file.  Otherwise the CIFS server would
1682 		 * not know about the client accessing the file and could
1683 		 * inappropriately grant an OPLOCK.
1684 		 * fem_install() returns EBUSY when asked to install a
1685 		 * OPUNIQ monitor more than once.  Therefore, check the
1686 		 * return code because we only want this done once.
1687 		 */
1688 		if (ret == 0)
1689 			vn_open_upgrade(vp, FREAD);
1690 	} else { /* WRITE */
1691 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1692 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1693 		    ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1694 		    (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1695 		    vn_is_mapped(vp, V_RDORWR)) {
1696 			if (open_prev) {
1697 				*recall = 1;
1698 			} else {
1699 				rfs4_deleg_state_rele(dsp);
1700 				return (NULL);
1701 			}
1702 		}
1703 		ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ,
1704 		    rfs4_mon_hold, rfs4_mon_rele);
1705 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1706 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1707 		    ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1708 		    (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1709 		    vn_is_mapped(vp, V_RDORWR)) {
1710 			if (open_prev) {
1711 				*recall = 1;
1712 			} else {
1713 				(void) fem_uninstall(vp, deleg_wrops,
1714 				    (void *)fp);
1715 				rfs4_deleg_state_rele(dsp);
1716 				return (NULL);
1717 			}
1718 		}
1719 		/*
1720 		 * Because a client can hold onto a delegation after the
1721 		 * file has been closed, we need to keep track of the
1722 		 * access to this file.  Otherwise the CIFS server would
1723 		 * not know about the client accessing the file and could
1724 		 * inappropriately grant an OPLOCK.
1725 		 * fem_install() returns EBUSY when asked to install a
1726 		 * OPUNIQ monitor more than once.  Therefore, check the
1727 		 * return code because we only want this done once.
1728 		 */
1729 		if (ret == 0)
1730 			vn_open_upgrade(vp, FREAD|FWRITE);
1731 	}
1732 	/* Place on delegation list for file */
1733 	ASSERT(!list_link_active(&dsp->rds_node));
1734 	list_insert_tail(&fp->rf_delegstatelist, dsp);
1735 
1736 	dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype;
1737 
1738 	/* Update delegation stats for this file */
1739 	fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec();
1740 
1741 	/* reset since this is a new delegation */
1742 	fp->rf_dinfo.rd_conflicted_client = 0;
1743 	fp->rf_dinfo.rd_ever_recalled = FALSE;
1744 
1745 	if (dtype == OPEN_DELEGATE_READ)
1746 		fp->rf_dinfo.rd_rdgrants++;
1747 	else
1748 		fp->rf_dinfo.rd_wrgrants++;
1749 
1750 	return (dsp);
1751 }
1752 
1753 /*
1754  * State routine for the server when a delegation is returned.
1755  */
1756 void
rfs4_return_deleg(rfs4_deleg_state_t * dsp,bool_t revoked)1757 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked)
1758 {
1759 	rfs4_file_t *fp = dsp->rds_finfo;
1760 	open_delegation_type4 dtypewas;
1761 
1762 	rfs4_dbe_lock(fp->rf_dbe);
1763 
1764 	/* nothing to do if no longer on list */
1765 	if (!list_link_active(&dsp->rds_node)) {
1766 		rfs4_dbe_unlock(fp->rf_dbe);
1767 		return;
1768 	}
1769 
1770 	/* Remove state from recall list */
1771 	list_remove(&fp->rf_delegstatelist, dsp);
1772 
1773 	if (list_is_empty(&fp->rf_delegstatelist)) {
1774 		dtypewas = fp->rf_dinfo.rd_dtype;
1775 		fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
1776 		rfs4_dbe_cv_broadcast(fp->rf_dbe);
1777 
1778 		/* if file system was unshared, the vp will be NULL */
1779 		if (fp->rf_vp != NULL) {
1780 			/*
1781 			 * Once a delegation is no longer held by any client,
1782 			 * the monitor is uninstalled.  At this point, the
1783 			 * client must send OPEN otw, so we don't need the
1784 			 * reference on the vnode anymore.  The open
1785 			 * downgrade removes the reference put on earlier.
1786 			 */
1787 			if (dtypewas == OPEN_DELEGATE_READ) {
1788 				(void) fem_uninstall(fp->rf_vp, deleg_rdops,
1789 				    (void *)fp);
1790 				vn_open_downgrade(fp->rf_vp, FREAD);
1791 			} else if (dtypewas == OPEN_DELEGATE_WRITE) {
1792 				(void) fem_uninstall(fp->rf_vp, deleg_wrops,
1793 				    (void *)fp);
1794 				vn_open_downgrade(fp->rf_vp, FREAD|FWRITE);
1795 			}
1796 		}
1797 	}
1798 
1799 	switch (dsp->rds_dtype) {
1800 	case OPEN_DELEGATE_READ:
1801 		fp->rf_dinfo.rd_rdgrants--;
1802 		break;
1803 	case OPEN_DELEGATE_WRITE:
1804 		fp->rf_dinfo.rd_wrgrants--;
1805 		break;
1806 	default:
1807 		break;
1808 	}
1809 
1810 	/* used in the policy decision */
1811 	fp->rf_dinfo.rd_time_returned = gethrestime_sec();
1812 
1813 	/*
1814 	 * reset the time_recalled field so future delegations are not
1815 	 * accidentally revoked
1816 	 */
1817 	if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0)
1818 		fp->rf_dinfo.rd_time_recalled = 0;
1819 
1820 	rfs4_dbe_unlock(fp->rf_dbe);
1821 
1822 	rfs4_dbe_lock(dsp->rds_dbe);
1823 
1824 	dsp->rds_dtype = OPEN_DELEGATE_NONE;
1825 
1826 	if (revoked == TRUE)
1827 		dsp->rds_time_revoked = gethrestime_sec();
1828 
1829 	rfs4_dbe_invalidate(dsp->rds_dbe);
1830 
1831 	rfs4_dbe_unlock(dsp->rds_dbe);
1832 
1833 	if (revoked == TRUE) {
1834 		rfs4_dbe_lock(dsp->rds_client->rc_dbe);
1835 		dsp->rds_client->rc_deleg_revoked++;	/* observability */
1836 		rfs4_dbe_unlock(dsp->rds_client->rc_dbe);
1837 	}
1838 }
1839 
1840 static void
rfs4_revoke_file(rfs4_file_t * fp)1841 rfs4_revoke_file(rfs4_file_t *fp)
1842 {
1843 	rfs4_deleg_state_t *dsp;
1844 
1845 	/*
1846 	 * The lock for rfs4_file_t must be held when traversing the
1847 	 * delegation list but that lock needs to be released to call
1848 	 * rfs4_return_deleg()
1849 	 */
1850 	rfs4_dbe_lock(fp->rf_dbe);
1851 	while ((dsp = list_head(&fp->rf_delegstatelist)) != NULL) {
1852 		rfs4_dbe_hold(dsp->rds_dbe);
1853 		rfs4_dbe_unlock(fp->rf_dbe);
1854 		rfs4_return_deleg(dsp, TRUE);
1855 		rfs4_deleg_state_rele(dsp);
1856 		rfs4_dbe_lock(fp->rf_dbe);
1857 	}
1858 	rfs4_dbe_unlock(fp->rf_dbe);
1859 }
1860 
1861 /*
1862  * A delegation is assumed to be present on the file associated with
1863  * "sp".  Check to see if the delegation matches is associated with
1864  * the same client as referenced by "sp".  If it is not, TRUE is
1865  * returned.  If the delegation DOES match the client (or no
1866  * delegation is present), return FALSE.
1867  * Assume the state entry and file entry are locked.
1868  */
1869 bool_t
rfs4_is_deleg(rfs4_state_t * sp)1870 rfs4_is_deleg(rfs4_state_t *sp)
1871 {
1872 	rfs4_deleg_state_t *dsp;
1873 	rfs4_file_t *fp = sp->rs_finfo;
1874 	rfs4_client_t *cp = sp->rs_owner->ro_client;
1875 
1876 	ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1877 	for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
1878 	    dsp = list_next(&fp->rf_delegstatelist, dsp)) {
1879 		if (cp != dsp->rds_client) {
1880 			return (TRUE);
1881 		}
1882 	}
1883 	return (FALSE);
1884 }
1885 
1886 void
rfs4_disable_delegation(void)1887 rfs4_disable_delegation(void)
1888 {
1889 	nfs4_srv_t *nsrv4;
1890 
1891 	nsrv4 = nfs4_get_srv();
1892 	mutex_enter(&nsrv4->deleg_lock);
1893 	rfs4_deleg_disabled++;
1894 	mutex_exit(&nsrv4->deleg_lock);
1895 }
1896 
1897 void
rfs4_enable_delegation(void)1898 rfs4_enable_delegation(void)
1899 {
1900 	nfs4_srv_t *nsrv4;
1901 
1902 	nsrv4 = nfs4_get_srv();
1903 	mutex_enter(&nsrv4->deleg_lock);
1904 	ASSERT(rfs4_deleg_disabled > 0);
1905 	rfs4_deleg_disabled--;
1906 	mutex_exit(&nsrv4->deleg_lock);
1907 }
1908 
1909 void
rfs4_mon_hold(void * arg)1910 rfs4_mon_hold(void *arg)
1911 {
1912 	rfs4_file_t *fp = arg;
1913 
1914 	rfs4_dbe_hold(fp->rf_dbe);
1915 }
1916 
1917 void
rfs4_mon_rele(void * arg)1918 rfs4_mon_rele(void *arg)
1919 {
1920 	rfs4_file_t *fp = arg;
1921 
1922 	rfs4_dbe_rele_nolock(fp->rf_dbe);
1923 }
1924