1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright 2018 Nexenta Systems, Inc.
29  */
30 
31 #include <sys/systm.h>
32 #include <rpc/auth.h>
33 #include <rpc/clnt.h>
34 #include <nfs/nfs4_kprot.h>
35 #include <nfs/nfs4.h>
36 #include <nfs/lm.h>
37 #include <sys/cmn_err.h>
38 #include <sys/disp.h>
39 #include <sys/sdt.h>
40 
41 #include <sys/pathname.h>
42 
43 #include <sys/strsubr.h>
44 #include <sys/ddi.h>
45 
46 #include <sys/vnode.h>
47 #include <sys/sdt.h>
48 #include <inet/common.h>
49 #include <inet/ip.h>
50 #include <inet/ip6.h>
51 
52 #define	MAX_READ_DELEGATIONS 5
53 
54 static int rfs4_deleg_wlp = 5;
55 static int rfs4_deleg_disabled;
56 static int rfs4_max_setup_cb_tries = 5;
57 
58 #ifdef DEBUG
59 
60 static int rfs4_test_cbgetattr_fail = 0;
61 int rfs4_cb_null;
62 int rfs4_cb_debug;
63 int rfs4_deleg_debug;
64 
65 #endif
66 
67 static void rfs4_recall_file(rfs4_file_t *,
68     void (*recall)(rfs4_deleg_state_t *, bool_t),
69     bool_t, rfs4_client_t *);
70 static	void		rfs4_revoke_file(rfs4_file_t *);
71 static	void		rfs4_cb_chflush(rfs4_cbinfo_t *);
72 static	CLIENT		*rfs4_cb_getch(rfs4_cbinfo_t *);
73 static	void		rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t);
74 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *,
75     open_delegation_type4, int *);
76 
77 /*
78  * Convert a universal address to an transport specific
79  * address using inet_pton.
80  */
81 static int
uaddr2sockaddr(int af,char * ua,void * ap,in_port_t * pp)82 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp)
83 {
84 	int dots = 0, i, j, len, k;
85 	unsigned char c;
86 	in_port_t port = 0;
87 
88 	len = strlen(ua);
89 
90 	for (i = len-1; i >= 0; i--) {
91 
92 		if (ua[i] == '.')
93 			dots++;
94 
95 		if (dots == 2) {
96 
97 			ua[i] = '\0';
98 			/*
99 			 * We use k to remember were to stick '.' back, since
100 			 * ua was kmem_allocateded from the pool len+1.
101 			 */
102 			k = i;
103 			if (inet_pton(af, ua, ap) == 1) {
104 
105 				c = 0;
106 
107 				for (j = i+1; j < len; j++) {
108 					if (ua[j] == '.') {
109 						port = c << 8;
110 						c = 0;
111 					} else if (ua[j] >= '0' &&
112 					    ua[j] <= '9') {
113 						c *= 10;
114 						c += ua[j] - '0';
115 					} else {
116 						ua[k] = '.';
117 						return (EINVAL);
118 					}
119 				}
120 				port += c;
121 
122 				*pp = htons(port);
123 
124 				ua[k] = '.';
125 				return (0);
126 			} else {
127 				ua[k] = '.';
128 				return (EINVAL);
129 			}
130 		}
131 	}
132 
133 	return (EINVAL);
134 }
135 
136 /*
137  * Update the delegation policy with the
138  * value of "new_policy"
139  */
140 void
rfs4_set_deleg_policy(nfs4_srv_t * nsrv4,srv_deleg_policy_t new_policy)141 rfs4_set_deleg_policy(nfs4_srv_t *nsrv4, srv_deleg_policy_t new_policy)
142 {
143 	rw_enter(&nsrv4->deleg_policy_lock, RW_WRITER);
144 	nsrv4->nfs4_deleg_policy = new_policy;
145 	rw_exit(&nsrv4->deleg_policy_lock);
146 }
147 
148 void
rfs4_hold_deleg_policy(nfs4_srv_t * nsrv4)149 rfs4_hold_deleg_policy(nfs4_srv_t *nsrv4)
150 {
151 	rw_enter(&nsrv4->deleg_policy_lock, RW_READER);
152 }
153 
154 void
rfs4_rele_deleg_policy(nfs4_srv_t * nsrv4)155 rfs4_rele_deleg_policy(nfs4_srv_t *nsrv4)
156 {
157 	rw_exit(&nsrv4->deleg_policy_lock);
158 }
159 
160 srv_deleg_policy_t
nfs4_get_deleg_policy()161 nfs4_get_deleg_policy()
162 {
163 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
164 	return (nsrv4->nfs4_deleg_policy);
165 }
166 
167 
168 /*
169  * This free function is to be used when the client struct is being
170  * released and nothing at all is needed of the callback info any
171  * longer.
172  */
173 void
rfs4_cbinfo_free(rfs4_cbinfo_t * cbp)174 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp)
175 {
176 	char *addr = cbp->cb_callback.cb_location.r_addr;
177 	char *netid = cbp->cb_callback.cb_location.r_netid;
178 
179 	/* Free old address if any */
180 
181 	if (addr)
182 		kmem_free(addr, strlen(addr) + 1);
183 	if (netid)
184 		kmem_free(netid, strlen(netid) + 1);
185 
186 	addr = cbp->cb_newer.cb_callback.cb_location.r_addr;
187 	netid = cbp->cb_newer.cb_callback.cb_location.r_netid;
188 
189 	if (addr)
190 		kmem_free(addr, strlen(addr) + 1);
191 	if (netid)
192 		kmem_free(netid, strlen(netid) + 1);
193 
194 	if (cbp->cb_chc_free) {
195 		rfs4_cb_chflush(cbp);
196 	}
197 }
198 
199 /*
200  * The server uses this to check the callback path supplied by the
201  * client.  The callback connection is marked "in progress" while this
202  * work is going on and then eventually marked either OK or FAILED.
203  * This work can be done as part of a separate thread and at the end
204  * of this the thread will exit or it may be done such that the caller
205  * will continue with other work.
206  */
207 static void
rfs4_do_cb_null(rfs4_client_t * cp)208 rfs4_do_cb_null(rfs4_client_t *cp)
209 {
210 	struct timeval tv;
211 	CLIENT *ch;
212 	rfs4_cbstate_t newstate;
213 	rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
214 
215 	mutex_enter(cbp->cb_lock);
216 	/* If another thread is doing CB_NULL RPC then return */
217 	if (cbp->cb_nullcaller == TRUE) {
218 		mutex_exit(cbp->cb_lock);
219 		rfs4_client_rele(cp);
220 		zthread_exit();
221 	}
222 
223 	/* Mark the cbinfo as having a thread in the NULL callback */
224 	cbp->cb_nullcaller = TRUE;
225 
226 	/*
227 	 * Are there other threads still using the cbinfo client
228 	 * handles?  If so, this thread must wait before going and
229 	 * mucking aroiund with the callback information
230 	 */
231 	while (cbp->cb_refcnt != 0)
232 		cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock);
233 
234 	/*
235 	 * This thread itself may find that new callback info has
236 	 * arrived and is set up to handle this case and redrive the
237 	 * call to the client's callback server.
238 	 */
239 retry:
240 	if (cbp->cb_newer.cb_new == TRUE &&
241 	    cbp->cb_newer.cb_confirmed == TRUE) {
242 		char *addr = cbp->cb_callback.cb_location.r_addr;
243 		char *netid = cbp->cb_callback.cb_location.r_netid;
244 
245 		/*
246 		 * Free the old stuff if it exists; may be the first
247 		 * time through this path
248 		 */
249 		if (addr)
250 			kmem_free(addr, strlen(addr) + 1);
251 		if (netid)
252 			kmem_free(netid, strlen(netid) + 1);
253 
254 		/* Move over the addr/netid */
255 		cbp->cb_callback.cb_location.r_addr =
256 		    cbp->cb_newer.cb_callback.cb_location.r_addr;
257 		cbp->cb_newer.cb_callback.cb_location.r_addr = NULL;
258 		cbp->cb_callback.cb_location.r_netid =
259 		    cbp->cb_newer.cb_callback.cb_location.r_netid;
260 		cbp->cb_newer.cb_callback.cb_location.r_netid = NULL;
261 
262 		/* Get the program number */
263 		cbp->cb_callback.cb_program =
264 		    cbp->cb_newer.cb_callback.cb_program;
265 		cbp->cb_newer.cb_callback.cb_program = 0;
266 
267 		/* Don't forget the protocol's "cb_ident" field */
268 		cbp->cb_ident = cbp->cb_newer.cb_ident;
269 		cbp->cb_newer.cb_ident = 0;
270 
271 		/* no longer new */
272 		cbp->cb_newer.cb_new = FALSE;
273 		cbp->cb_newer.cb_confirmed = FALSE;
274 
275 		/* get rid of the old client handles that may exist */
276 		rfs4_cb_chflush(cbp);
277 
278 		cbp->cb_state = CB_NONE;
279 		cbp->cb_timefailed = 0; /* reset the clock */
280 		cbp->cb_notified_of_cb_path_down = TRUE;
281 	}
282 
283 	if (cbp->cb_state != CB_NONE) {
284 		cv_broadcast(cbp->cb_cv);	/* let the others know */
285 		cbp->cb_nullcaller = FALSE;
286 		mutex_exit(cbp->cb_lock);
287 		rfs4_client_rele(cp);
288 		zthread_exit();
289 	}
290 
291 	/* mark rfs4_client_t as CALLBACK NULL in progress */
292 	cbp->cb_state = CB_INPROG;
293 	mutex_exit(cbp->cb_lock);
294 
295 	/* get/generate a client handle */
296 	if ((ch = rfs4_cb_getch(cbp)) == NULL) {
297 		mutex_enter(cbp->cb_lock);
298 		cbp->cb_state = CB_BAD;
299 		cbp->cb_timefailed = gethrestime_sec(); /* observability */
300 		goto retry;
301 	}
302 
303 
304 	tv.tv_sec = 30;
305 	tv.tv_usec = 0;
306 	if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) {
307 		newstate = CB_BAD;
308 	} else {
309 		newstate = CB_OK;
310 #ifdef	DEBUG
311 		rfs4_cb_null++;
312 #endif
313 	}
314 
315 	/* Check to see if the client has specified new callback info */
316 	mutex_enter(cbp->cb_lock);
317 	rfs4_cb_freech(cbp, ch, TRUE);
318 	if (cbp->cb_newer.cb_new == TRUE &&
319 	    cbp->cb_newer.cb_confirmed == TRUE) {
320 		goto retry;	/* give the CB_NULL another chance */
321 	}
322 
323 	cbp->cb_state = newstate;
324 	if (cbp->cb_state == CB_BAD)
325 		cbp->cb_timefailed = gethrestime_sec(); /* observability */
326 
327 	cv_broadcast(cbp->cb_cv);	/* start up the other threads */
328 	cbp->cb_nullcaller = FALSE;
329 	mutex_exit(cbp->cb_lock);
330 	rfs4_client_rele(cp);
331 	zthread_exit();
332 }
333 
334 /*
335  * Given a client struct, inspect the callback info to see if the
336  * callback path is up and available.
337  *
338  * If new callback path is available and no one has set it up then
339  * try to set it up. If setup is not successful after 5 tries (5 secs)
340  * then gives up and returns NULL.
341  *
342  * If callback path is being initialized, then wait for the CB_NULL RPC
343  * call to occur.
344  */
345 static rfs4_cbinfo_t *
rfs4_cbinfo_hold(rfs4_client_t * cp)346 rfs4_cbinfo_hold(rfs4_client_t *cp)
347 {
348 	rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
349 	int retries = 0;
350 
351 	mutex_enter(cbp->cb_lock);
352 
353 	while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) {
354 		/*
355 		 * Looks like a new callback path may be available and
356 		 * noone has set it up.
357 		 */
358 		mutex_exit(cbp->cb_lock);
359 		rfs4_dbe_hold(cp->rc_dbe);
360 		rfs4_do_cb_null(cp); /* caller will release client hold */
361 
362 		mutex_enter(cbp->cb_lock);
363 		/*
364 		 * If callback path is no longer new, or it's being setup
365 		 * then stop and wait for it to be done.
366 		 */
367 		if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE)
368 			break;
369 		mutex_exit(cbp->cb_lock);
370 
371 		if (++retries >= rfs4_max_setup_cb_tries)
372 			return (NULL);
373 		delay(hz);
374 		mutex_enter(cbp->cb_lock);
375 	}
376 
377 	/* Is there a thread working on doing the CB_NULL RPC? */
378 	if (cbp->cb_nullcaller == TRUE)
379 		cv_wait(cbp->cb_cv, cbp->cb_lock);  /* if so, wait on it */
380 
381 	/* If the callback path is not okay (up and running), just quit */
382 	if (cbp->cb_state != CB_OK) {
383 		mutex_exit(cbp->cb_lock);
384 		return (NULL);
385 	}
386 
387 	/* Let someone know we are using the current callback info */
388 	cbp->cb_refcnt++;
389 	mutex_exit(cbp->cb_lock);
390 	return (cbp);
391 }
392 
393 /*
394  * The caller is done with the callback info.  It may be that the
395  * caller's RPC failed and the NFSv4 client has actually provided new
396  * callback information.  If so, let the caller know so they can
397  * advantage of this and maybe retry the RPC that originally failed.
398  */
399 static int
rfs4_cbinfo_rele(rfs4_cbinfo_t * cbp,rfs4_cbstate_t newstate)400 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate)
401 {
402 	int cb_new = FALSE;
403 
404 	mutex_enter(cbp->cb_lock);
405 
406 	/* The caller gets a chance to mark the callback info as bad */
407 	if (newstate != CB_NOCHANGE)
408 		cbp->cb_state = newstate;
409 	if (newstate == CB_FAILED) {
410 		cbp->cb_timefailed = gethrestime_sec(); /* observability */
411 		cbp->cb_notified_of_cb_path_down = FALSE;
412 	}
413 
414 	cbp->cb_refcnt--;	/* no longer using the information */
415 
416 	/*
417 	 * A thread may be waiting on this one to finish and if so,
418 	 * let it know that it is okay to do the CB_NULL to the
419 	 * client's callback server.
420 	 */
421 	if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller)
422 		cv_broadcast(cbp->cb_cv_nullcaller);
423 
424 	/*
425 	 * If this is the last thread to use the callback info and
426 	 * there is new callback information to try and no thread is
427 	 * there ready to do the CB_NULL, then return true to teh
428 	 * caller so they can do the CB_NULL
429 	 */
430 	if (cbp->cb_refcnt == 0 &&
431 	    cbp->cb_nullcaller == FALSE &&
432 	    cbp->cb_newer.cb_new == TRUE &&
433 	    cbp->cb_newer.cb_confirmed == TRUE)
434 		cb_new = TRUE;
435 
436 	mutex_exit(cbp->cb_lock);
437 
438 	return (cb_new);
439 }
440 
441 /*
442  * Given the information in the callback info struct, create a client
443  * handle that can be used by the server for its callback path.
444  */
445 static CLIENT *
rfs4_cbch_init(rfs4_cbinfo_t * cbp)446 rfs4_cbch_init(rfs4_cbinfo_t *cbp)
447 {
448 	struct knetconfig knc;
449 	vnode_t *vp;
450 	struct sockaddr_in addr4;
451 	struct sockaddr_in6 addr6;
452 	void *addr, *taddr;
453 	in_port_t *pp;
454 	int af;
455 	char *devnam;
456 	struct netbuf nb;
457 	int size;
458 	CLIENT *ch = NULL;
459 	int useresvport = 0;
460 
461 	mutex_enter(cbp->cb_lock);
462 
463 	if (cbp->cb_callback.cb_location.r_netid == NULL ||
464 	    cbp->cb_callback.cb_location.r_addr == NULL) {
465 		goto cb_init_out;
466 	}
467 
468 	if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) {
469 		knc.knc_semantics = NC_TPI_COTS;
470 		knc.knc_protofmly = "inet";
471 		knc.knc_proto = "tcp";
472 		devnam = "/dev/tcp";
473 		af = AF_INET;
474 	} else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp")
475 	    == 0) {
476 		knc.knc_semantics = NC_TPI_CLTS;
477 		knc.knc_protofmly = "inet";
478 		knc.knc_proto = "udp";
479 		devnam = "/dev/udp";
480 		af = AF_INET;
481 	} else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6")
482 	    == 0) {
483 		knc.knc_semantics = NC_TPI_COTS;
484 		knc.knc_protofmly = "inet6";
485 		knc.knc_proto = "tcp";
486 		devnam = "/dev/tcp6";
487 		af = AF_INET6;
488 	} else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6")
489 	    == 0) {
490 		knc.knc_semantics = NC_TPI_CLTS;
491 		knc.knc_protofmly = "inet6";
492 		knc.knc_proto = "udp";
493 		devnam = "/dev/udp6";
494 		af = AF_INET6;
495 	} else {
496 		goto cb_init_out;
497 	}
498 
499 	if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) {
500 
501 		goto cb_init_out;
502 	}
503 
504 	if (vp->v_type != VCHR) {
505 		VN_RELE(vp);
506 		goto cb_init_out;
507 	}
508 
509 	knc.knc_rdev = vp->v_rdev;
510 
511 	VN_RELE(vp);
512 
513 	if (af == AF_INET) {
514 		size = sizeof (addr4);
515 		bzero(&addr4, size);
516 		addr4.sin_family = (sa_family_t)af;
517 		addr = &addr4.sin_addr;
518 		pp = &addr4.sin_port;
519 		taddr = &addr4;
520 	} else /* AF_INET6 */ {
521 		size = sizeof (addr6);
522 		bzero(&addr6, size);
523 		addr6.sin6_family = (sa_family_t)af;
524 		addr = &addr6.sin6_addr;
525 		pp = &addr6.sin6_port;
526 		taddr = &addr6;
527 	}
528 
529 	if (uaddr2sockaddr(af,
530 	    cbp->cb_callback.cb_location.r_addr, addr, pp)) {
531 
532 		goto cb_init_out;
533 	}
534 
535 
536 	nb.maxlen = nb.len = size;
537 	nb.buf = (char *)taddr;
538 
539 	if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program,
540 	    NFS_CB, 0, 0, curthread->t_cred, &ch)) {
541 
542 		ch = NULL;
543 	}
544 
545 	/* turn off reserved port usage */
546 	(void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport);
547 
548 cb_init_out:
549 	mutex_exit(cbp->cb_lock);
550 	return (ch);
551 }
552 
553 /*
554  * Iterate over the client handle cache and
555  * destroy it.
556  */
557 static void
rfs4_cb_chflush(rfs4_cbinfo_t * cbp)558 rfs4_cb_chflush(rfs4_cbinfo_t *cbp)
559 {
560 	CLIENT *ch;
561 
562 	while (cbp->cb_chc_free) {
563 		cbp->cb_chc_free--;
564 		ch = cbp->cb_chc[cbp->cb_chc_free];
565 		cbp->cb_chc[cbp->cb_chc_free] = NULL;
566 		if (ch) {
567 			if (ch->cl_auth)
568 				auth_destroy(ch->cl_auth);
569 			clnt_destroy(ch);
570 		}
571 	}
572 }
573 
574 /*
575  * Return a client handle, either from a the small
576  * rfs4_client_t cache or one that we just created.
577  */
578 static CLIENT *
rfs4_cb_getch(rfs4_cbinfo_t * cbp)579 rfs4_cb_getch(rfs4_cbinfo_t *cbp)
580 {
581 	CLIENT *cbch = NULL;
582 	uint32_t zilch = 0;
583 
584 	mutex_enter(cbp->cb_lock);
585 
586 	if (cbp->cb_chc_free) {
587 		cbp->cb_chc_free--;
588 		cbch = cbp->cb_chc[ cbp->cb_chc_free ];
589 		mutex_exit(cbp->cb_lock);
590 		(void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch);
591 		return (cbch);
592 	}
593 
594 	mutex_exit(cbp->cb_lock);
595 
596 	/* none free so make it now */
597 	cbch = rfs4_cbch_init(cbp);
598 
599 	return (cbch);
600 }
601 
602 /*
603  * Return the client handle to the small cache or
604  * destroy it.
605  */
606 static void
rfs4_cb_freech(rfs4_cbinfo_t * cbp,CLIENT * ch,bool_t lockheld)607 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld)
608 {
609 	if (lockheld == FALSE)
610 		mutex_enter(cbp->cb_lock);
611 
612 	if (cbp->cb_chc_free < RFS4_CBCH_MAX) {
613 		cbp->cb_chc[ cbp->cb_chc_free++ ] = ch;
614 		if (lockheld == FALSE)
615 			mutex_exit(cbp->cb_lock);
616 		return;
617 	}
618 	if (lockheld == FALSE)
619 		mutex_exit(cbp->cb_lock);
620 
621 	/*
622 	 * cache maxed out of free entries, obliterate
623 	 * this client handle, destroy it, throw it away.
624 	 */
625 	if (ch->cl_auth)
626 		auth_destroy(ch->cl_auth);
627 	clnt_destroy(ch);
628 }
629 
630 /*
631  * With the supplied callback information - initialize the client
632  * callback data.  If there is a callback in progress, save the
633  * callback info so that a thread can pick it up in the future.
634  */
635 void
rfs4_client_setcb(rfs4_client_t * cp,cb_client4 * cb,uint32_t cb_ident)636 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident)
637 {
638 	char *addr = NULL;
639 	char *netid = NULL;
640 	rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
641 	size_t len;
642 
643 	/* Set the call back for the client */
644 	if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' &&
645 	    cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') {
646 		len = strlen(cb->cb_location.r_addr) + 1;
647 		addr = kmem_alloc(len, KM_SLEEP);
648 		bcopy(cb->cb_location.r_addr, addr, len);
649 		len = strlen(cb->cb_location.r_netid) + 1;
650 		netid = kmem_alloc(len, KM_SLEEP);
651 		bcopy(cb->cb_location.r_netid, netid, len);
652 	}
653 	/* ready to save the new information but first free old, if exists */
654 	mutex_enter(cbp->cb_lock);
655 
656 	cbp->cb_newer.cb_callback.cb_program = cb->cb_program;
657 
658 	if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL)
659 		kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr,
660 		    strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1);
661 	cbp->cb_newer.cb_callback.cb_location.r_addr = addr;
662 
663 	if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL)
664 		kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid,
665 		    strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1);
666 	cbp->cb_newer.cb_callback.cb_location.r_netid = netid;
667 
668 	cbp->cb_newer.cb_ident = cb_ident;
669 
670 	if (addr && *addr && netid && *netid) {
671 		cbp->cb_newer.cb_new = TRUE;
672 		cbp->cb_newer.cb_confirmed = FALSE;
673 	} else {
674 		cbp->cb_newer.cb_new = FALSE;
675 		cbp->cb_newer.cb_confirmed = FALSE;
676 	}
677 
678 	mutex_exit(cbp->cb_lock);
679 }
680 
681 /*
682  * The server uses this when processing SETCLIENTID_CONFIRM.  Callback
683  * information may have been provided on SETCLIENTID and this call
684  * marks that information as confirmed and then starts a thread to
685  * test the callback path.
686  */
687 void
rfs4_deleg_cb_check(rfs4_client_t * cp)688 rfs4_deleg_cb_check(rfs4_client_t *cp)
689 {
690 	if (cp->rc_cbinfo.cb_newer.cb_new == FALSE)
691 		return;
692 
693 	cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE;
694 
695 	rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */
696 
697 	(void) zthread_create(NULL, 0, rfs4_do_cb_null, cp, 0,
698 	    minclsyspri);
699 }
700 
701 static void
rfs4args_cb_recall_free(nfs_cb_argop4 * argop)702 rfs4args_cb_recall_free(nfs_cb_argop4 *argop)
703 {
704 	CB_RECALL4args	*rec_argp;
705 
706 	rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
707 	if (rec_argp->fh.nfs_fh4_val)
708 		kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len);
709 }
710 
711 /* ARGSUSED */
712 static void
rfs4args_cb_getattr_free(nfs_cb_argop4 * argop)713 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop)
714 {
715 	CB_GETATTR4args *argp;
716 
717 	argp = &argop->nfs_cb_argop4_u.opcbgetattr;
718 	if (argp->fh.nfs_fh4_val)
719 		kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len);
720 }
721 
722 static void
rfs4freeargres(CB_COMPOUND4args * args,CB_COMPOUND4res * resp)723 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp)
724 {
725 	int i, arglen;
726 	nfs_cb_argop4 *argop;
727 
728 	/*
729 	 * First free any special args alloc'd for specific ops.
730 	 */
731 	arglen = args->array_len;
732 	argop = args->array;
733 	for (i = 0; i < arglen; i++, argop++) {
734 
735 		switch (argop->argop) {
736 		case OP_CB_RECALL:
737 			rfs4args_cb_recall_free(argop);
738 			break;
739 
740 		case OP_CB_GETATTR:
741 			rfs4args_cb_getattr_free(argop);
742 			break;
743 
744 		default:
745 			return;
746 		}
747 	}
748 
749 	if (args->tag.utf8string_len > 0)
750 		UTF8STRING_FREE(args->tag)
751 
752 	kmem_free(args->array, arglen * sizeof (nfs_cb_argop4));
753 	if (resp)
754 		xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp);
755 }
756 
757 /*
758  * General callback routine for the server to the client.
759  */
760 static enum clnt_stat
rfs4_do_callback(rfs4_client_t * cp,CB_COMPOUND4args * args,CB_COMPOUND4res * res,struct timeval timeout)761 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args,
762     CB_COMPOUND4res *res, struct timeval timeout)
763 {
764 	rfs4_cbinfo_t *cbp;
765 	CLIENT *ch;
766 	/* start with this in case cb_getch() fails */
767 	enum clnt_stat	stat = RPC_FAILED;
768 
769 	res->tag.utf8string_val = NULL;
770 	res->array = NULL;
771 
772 retry:
773 	cbp = rfs4_cbinfo_hold(cp);
774 	if (cbp == NULL)
775 		return (stat);
776 
777 	/* get a client handle */
778 	if ((ch = rfs4_cb_getch(cbp)) != NULL) {
779 		/*
780 		 * reset the cb_ident since it may have changed in
781 		 * rfs4_cbinfo_hold()
782 		 */
783 		args->callback_ident = cbp->cb_ident;
784 
785 		stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv,
786 		    (caddr_t)args, xdr_CB_COMPOUND4res,
787 		    (caddr_t)res, timeout);
788 
789 		/* free client handle */
790 		rfs4_cb_freech(cbp, ch, FALSE);
791 	}
792 
793 	/*
794 	 * If the rele says that there may be new callback info then
795 	 * retry this sequence and it may succeed as a result of the
796 	 * new callback path
797 	 */
798 	if (rfs4_cbinfo_rele(cbp,
799 	    (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE)
800 		goto retry;
801 
802 	return (stat);
803 }
804 
805 /*
806  * Used by the NFSv4 server to get attributes for a file while
807  * handling the case where a file has been write delegated.  For the
808  * time being, VOP_GETATTR() is called and CB_GETATTR processing is
809  * not undertaken.  This call site is maintained in case the server is
810  * updated in the future to handle write delegation space guarantees.
811  */
812 nfsstat4
rfs4_vop_getattr(vnode_t * vp,vattr_t * vap,int flag,cred_t * cr)813 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
814 {
815 
816 	int error;
817 
818 	error = VOP_GETATTR(vp, vap, flag, cr, NULL);
819 	return (puterrno4(error));
820 }
821 
822 /*
823  * This is used everywhere in the v2/v3 server to allow the
824  * integration of all NFS versions and the support of delegation.  For
825  * now, just call the VOP_GETATTR().  If the NFSv4 server is enhanced
826  * in the future to provide space guarantees for write delegations
827  * then this call site should be expanded to interact with the client.
828  */
829 int
rfs4_delegated_getattr(vnode_t * vp,vattr_t * vap,int flag,cred_t * cr)830 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
831 {
832 	return (VOP_GETATTR(vp, vap, flag, cr, NULL));
833 }
834 
835 /*
836  * Place the actual cb_recall otw call to client.
837  */
838 static void
rfs4_do_cb_recall(rfs4_deleg_state_t * dsp,bool_t trunc)839 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc)
840 {
841 	CB_COMPOUND4args	cb4_args;
842 	CB_COMPOUND4res		cb4_res;
843 	CB_RECALL4args		*rec_argp;
844 	CB_RECALL4res		*rec_resp;
845 	nfs_cb_argop4		*argop;
846 	int			numops;
847 	int			argoplist_size;
848 	struct timeval		timeout;
849 	nfs_fh4			*fhp;
850 	enum clnt_stat		call_stat;
851 
852 	/*
853 	 * set up the compound args
854 	 */
855 	numops = 1;	/* CB_RECALL only */
856 
857 	argoplist_size = numops * sizeof (nfs_cb_argop4);
858 	argop = kmem_zalloc(argoplist_size, KM_SLEEP);
859 	argop->argop = OP_CB_RECALL;
860 	rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
861 
862 	(void) str_to_utf8("cb_recall", &cb4_args.tag);
863 	cb4_args.minorversion = CB4_MINORVERSION;
864 	/* cb4_args.callback_ident is set in rfs4_do_callback() */
865 	cb4_args.array_len = numops;
866 	cb4_args.array = argop;
867 
868 	/*
869 	 * fill in the args struct
870 	 */
871 	bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4));
872 	rec_argp->truncate = trunc;
873 
874 	fhp = &dsp->rds_finfo->rf_filehandle;
875 	rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) *
876 	    fhp->nfs_fh4_len, KM_SLEEP);
877 	nfs_fh4_copy(fhp, &rec_argp->fh);
878 
879 	/* Keep track of when we did this for observability */
880 	dsp->rds_time_recalled = gethrestime_sec();
881 
882 	/*
883 	 * Set up the timeout for the callback and make the actual call.
884 	 * Timeout will be 80% of the lease period for this server.
885 	 */
886 	timeout.tv_sec = (rfs4_lease_time * 80) / 100;
887 	timeout.tv_usec = 0;
888 
889 	DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client,
890 	    rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp);
891 
892 	call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res,
893 	    timeout);
894 
895 	rec_resp = (cb4_res.array_len == 0) ? NULL :
896 	    &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall;
897 
898 	DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client,
899 	    rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp);
900 
901 	if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) {
902 		rfs4_return_deleg(dsp, TRUE);
903 	}
904 
905 	rfs4freeargres(&cb4_args, &cb4_res);
906 }
907 
908 struct recall_arg {
909 	rfs4_deleg_state_t *dsp;
910 	void (*recall)(rfs4_deleg_state_t *, bool_t trunc);
911 	bool_t trunc;
912 };
913 
914 static void
do_recall(struct recall_arg * arg)915 do_recall(struct recall_arg *arg)
916 {
917 	rfs4_deleg_state_t *dsp = arg->dsp;
918 	rfs4_file_t *fp = dsp->rds_finfo;
919 	callb_cpr_t cpr_info;
920 	kmutex_t cpr_lock;
921 
922 	mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
923 	CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall");
924 
925 	/*
926 	 * It is possible that before this thread starts
927 	 * the client has send us a return_delegation, and
928 	 * if that is the case we do not need to send the
929 	 * recall callback.
930 	 */
931 	if (dsp->rds_dtype != OPEN_DELEGATE_NONE) {
932 		DTRACE_PROBE3(nfss__i__recall,
933 		    struct recall_arg *, arg,
934 		    struct rfs4_deleg_state_t *, dsp,
935 		    struct rfs4_file_t *, fp);
936 
937 		if (arg->recall)
938 			(void) (*arg->recall)(dsp, arg->trunc);
939 	}
940 
941 	mutex_enter(fp->rf_dinfo.rd_recall_lock);
942 	/*
943 	 * Recall count may go negative if the parent thread that is
944 	 * creating the individual callback threads does not modify
945 	 * the recall_count field before the callback thread actually
946 	 * gets a response from the CB_RECALL
947 	 */
948 	fp->rf_dinfo.rd_recall_count--;
949 	if (fp->rf_dinfo.rd_recall_count == 0)
950 		cv_signal(fp->rf_dinfo.rd_recall_cv);
951 	mutex_exit(fp->rf_dinfo.rd_recall_lock);
952 
953 	mutex_enter(&cpr_lock);
954 	CALLB_CPR_EXIT(&cpr_info);
955 	mutex_destroy(&cpr_lock);
956 
957 	rfs4_deleg_state_rele(dsp); /* release the hold for this thread */
958 	kmem_free(arg, sizeof (struct recall_arg));
959 	zthread_exit();
960 }
961 
962 struct master_recall_args {
963     rfs4_file_t *fp;
964     void (*recall)(rfs4_deleg_state_t *, bool_t);
965     bool_t trunc;
966 };
967 
968 static void
do_recall_file(struct master_recall_args * map)969 do_recall_file(struct master_recall_args *map)
970 {
971 	rfs4_file_t *fp = map->fp;
972 	rfs4_deleg_state_t *dsp;
973 	struct recall_arg *arg;
974 	callb_cpr_t cpr_info;
975 	kmutex_t cpr_lock;
976 	int32_t recall_count;
977 
978 	rfs4_dbe_lock(fp->rf_dbe);
979 
980 	/* Recall already in progress ? */
981 	mutex_enter(fp->rf_dinfo.rd_recall_lock);
982 	if (fp->rf_dinfo.rd_recall_count != 0) {
983 		mutex_exit(fp->rf_dinfo.rd_recall_lock);
984 		rfs4_dbe_rele_nolock(fp->rf_dbe);
985 		rfs4_dbe_unlock(fp->rf_dbe);
986 		kmem_free(map, sizeof (struct master_recall_args));
987 		zthread_exit();
988 	}
989 
990 	mutex_exit(fp->rf_dinfo.rd_recall_lock);
991 
992 	mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
993 	CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr,	"v4RecallFile");
994 
995 	recall_count = 0;
996 	for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
997 	    dsp = list_next(&fp->rf_delegstatelist, dsp)) {
998 
999 		rfs4_dbe_lock(dsp->rds_dbe);
1000 		/*
1001 		 * if this delegation state
1002 		 * is being reaped skip it
1003 		 */
1004 		if (rfs4_dbe_is_invalid(dsp->rds_dbe)) {
1005 			rfs4_dbe_unlock(dsp->rds_dbe);
1006 			continue;
1007 		}
1008 
1009 		/* hold for receiving thread */
1010 		rfs4_dbe_hold(dsp->rds_dbe);
1011 		rfs4_dbe_unlock(dsp->rds_dbe);
1012 
1013 		arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP);
1014 		arg->recall = map->recall;
1015 		arg->trunc = map->trunc;
1016 		arg->dsp = dsp;
1017 
1018 		recall_count++;
1019 
1020 		(void) zthread_create(NULL, 0, do_recall, arg, 0,
1021 		    minclsyspri);
1022 	}
1023 
1024 	rfs4_dbe_unlock(fp->rf_dbe);
1025 
1026 	mutex_enter(fp->rf_dinfo.rd_recall_lock);
1027 	/*
1028 	 * Recall count may go negative if the parent thread that is
1029 	 * creating the individual callback threads does not modify
1030 	 * the recall_count field before the callback thread actually
1031 	 * gets a response from the CB_RECALL
1032 	 */
1033 	fp->rf_dinfo.rd_recall_count += recall_count;
1034 	while (fp->rf_dinfo.rd_recall_count)
1035 		cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock);
1036 
1037 	mutex_exit(fp->rf_dinfo.rd_recall_lock);
1038 
1039 	DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp);
1040 	rfs4_file_rele(fp);
1041 	kmem_free(map, sizeof (struct master_recall_args));
1042 	mutex_enter(&cpr_lock);
1043 	CALLB_CPR_EXIT(&cpr_info);
1044 	mutex_destroy(&cpr_lock);
1045 	zthread_exit();
1046 }
1047 
1048 static void
rfs4_recall_file(rfs4_file_t * fp,void (* recall)(rfs4_deleg_state_t *,bool_t trunc),bool_t trunc,rfs4_client_t * cp)1049 rfs4_recall_file(rfs4_file_t *fp,
1050     void (*recall)(rfs4_deleg_state_t *, bool_t trunc),
1051     bool_t trunc, rfs4_client_t *cp)
1052 {
1053 	struct master_recall_args *args;
1054 
1055 	rfs4_dbe_lock(fp->rf_dbe);
1056 	if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1057 		rfs4_dbe_unlock(fp->rf_dbe);
1058 		return;
1059 	}
1060 	rfs4_dbe_hold(fp->rf_dbe);	/* hold for new thread */
1061 
1062 	/*
1063 	 * Mark the time we started the recall processing.
1064 	 * If it has been previously recalled, do not reset the
1065 	 * timer since this is used for the revocation decision.
1066 	 */
1067 	if (fp->rf_dinfo.rd_time_recalled == 0)
1068 		fp->rf_dinfo.rd_time_recalled = gethrestime_sec();
1069 	fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */
1070 	/* Client causing recall not always available */
1071 	if (cp)
1072 		fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid;
1073 
1074 	rfs4_dbe_unlock(fp->rf_dbe);
1075 
1076 	args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP);
1077 	args->fp = fp;
1078 	args->recall = recall;
1079 	args->trunc = trunc;
1080 
1081 	(void) zthread_create(NULL, 0, do_recall_file, args, 0,
1082 	    minclsyspri);
1083 }
1084 
1085 void
rfs4_recall_deleg(rfs4_file_t * fp,bool_t trunc,rfs4_client_t * cp)1086 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp)
1087 {
1088 	time_t elapsed1, elapsed2;
1089 
1090 	if (fp->rf_dinfo.rd_time_recalled != 0) {
1091 		elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled;
1092 		elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite;
1093 		/* First check to see if a revocation should occur */
1094 		if (elapsed1 > rfs4_lease_time &&
1095 		    elapsed2 > rfs4_lease_time) {
1096 			rfs4_revoke_file(fp);
1097 			return;
1098 		}
1099 		/*
1100 		 * Next check to see if a recall should be done again
1101 		 * so quickly.
1102 		 */
1103 		if (elapsed1 <= ((rfs4_lease_time * 20) / 100))
1104 			return;
1105 	}
1106 	rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp);
1107 }
1108 
1109 /*
1110  * rfs4_check_recall is called from rfs4_do_open to determine if the current
1111  * open conflicts with the delegation.
1112  * Return true if we need recall otherwise false.
1113  * Assumes entry locks for sp and sp->rs_finfo are held.
1114  */
1115 bool_t
rfs4_check_recall(rfs4_state_t * sp,uint32_t access)1116 rfs4_check_recall(rfs4_state_t *sp, uint32_t access)
1117 {
1118 	open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype;
1119 
1120 	switch (dtype) {
1121 	case OPEN_DELEGATE_NONE:
1122 		/* Not currently delegated so there is nothing to do */
1123 		return (FALSE);
1124 	case OPEN_DELEGATE_READ:
1125 		/*
1126 		 * If the access is only asking for READ then there is
1127 		 * no conflict and nothing to do.  If it is asking
1128 		 * for write, then there will be conflict and the read
1129 		 * delegation should be recalled.
1130 		 */
1131 		if (access == OPEN4_SHARE_ACCESS_READ)
1132 			return (FALSE);
1133 		else
1134 			return (TRUE);
1135 	case OPEN_DELEGATE_WRITE:
1136 		/* Check to see if this client has the delegation */
1137 		return (rfs4_is_deleg(sp));
1138 	}
1139 
1140 	return (FALSE);
1141 }
1142 
1143 /*
1144  * Return the "best" allowable delegation available given the current
1145  * delegation type and the desired access and deny modes on the file.
1146  * At the point that this routine is called we know that the access and
1147  * deny modes are consistent with the file modes.
1148  */
1149 static open_delegation_type4
rfs4_check_delegation(rfs4_state_t * sp,rfs4_file_t * fp)1150 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp)
1151 {
1152 	open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype;
1153 	uint32_t access = sp->rs_share_access;
1154 	uint32_t deny = sp->rs_share_deny;
1155 	int readcnt = 0;
1156 	int writecnt = 0;
1157 
1158 	switch (dtype) {
1159 	case OPEN_DELEGATE_NONE:
1160 		/*
1161 		 * Determine if more than just this OPEN have the file
1162 		 * open and if so, no delegation may be provided to
1163 		 * the client.
1164 		 */
1165 		if (access & OPEN4_SHARE_ACCESS_WRITE)
1166 			writecnt++;
1167 		if (access & OPEN4_SHARE_ACCESS_READ)
1168 			readcnt++;
1169 
1170 		if (fp->rf_access_read > readcnt ||
1171 		    fp->rf_access_write > writecnt)
1172 			return (OPEN_DELEGATE_NONE);
1173 
1174 		/*
1175 		 * If the client is going to write, or if the client
1176 		 * has exclusive access, return a write delegation.
1177 		 */
1178 		if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1179 		    (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE)))
1180 			return (OPEN_DELEGATE_WRITE);
1181 		/*
1182 		 * If we don't want to write or we've haven't denied read
1183 		 * access to others, return a read delegation.
1184 		 */
1185 		if ((access & ~OPEN4_SHARE_ACCESS_WRITE) ||
1186 		    (deny & ~OPEN4_SHARE_DENY_READ))
1187 			return (OPEN_DELEGATE_READ);
1188 
1189 		/* Shouldn't get here */
1190 		return (OPEN_DELEGATE_NONE);
1191 
1192 	case OPEN_DELEGATE_READ:
1193 		/*
1194 		 * If the file is delegated for read but we wan't to
1195 		 * write or deny others to read then we can't delegate
1196 		 * the file. We shouldn't get here since the delegation should
1197 		 * have been recalled already.
1198 		 */
1199 		if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1200 		    (deny & OPEN4_SHARE_DENY_READ))
1201 			return (OPEN_DELEGATE_NONE);
1202 		return (OPEN_DELEGATE_READ);
1203 
1204 	case OPEN_DELEGATE_WRITE:
1205 		return (OPEN_DELEGATE_WRITE);
1206 	}
1207 
1208 	/* Shouldn't get here */
1209 	return (OPEN_DELEGATE_NONE);
1210 }
1211 
1212 /*
1213  * Given the desired delegation type and the "history" of the file
1214  * determine the actual delegation type to return.
1215  */
1216 static open_delegation_type4
rfs4_delegation_policy(nfs4_srv_t * nsrv4,open_delegation_type4 dtype,rfs4_dinfo_t * dinfo,clientid4 cid)1217 rfs4_delegation_policy(nfs4_srv_t *nsrv4, open_delegation_type4 dtype,
1218     rfs4_dinfo_t *dinfo, clientid4 cid)
1219 {
1220 	time_t elapsed;
1221 
1222 	if (nsrv4->nfs4_deleg_policy != SRV_NORMAL_DELEGATE)
1223 		return (OPEN_DELEGATE_NONE);
1224 
1225 	/*
1226 	 * Has this file/delegation ever been recalled?  If not then
1227 	 * no further checks for a delegation race need to be done.
1228 	 * However if a recall has occurred, then check to see if a
1229 	 * client has caused its own delegation recall to occur.  If
1230 	 * not, then has a delegation for this file been returned
1231 	 * recently?  If so, then do not assign a new delegation to
1232 	 * avoid a "delegation race" between the original client and
1233 	 * the new/conflicting client.
1234 	 */
1235 	if (dinfo->rd_ever_recalled == TRUE) {
1236 		if (dinfo->rd_conflicted_client != cid) {
1237 			elapsed = gethrestime_sec() - dinfo->rd_time_returned;
1238 			if (elapsed < rfs4_lease_time)
1239 				return (OPEN_DELEGATE_NONE);
1240 		}
1241 	}
1242 
1243 	/* Limit the number of read grants */
1244 	if (dtype == OPEN_DELEGATE_READ &&
1245 	    dinfo->rd_rdgrants > MAX_READ_DELEGATIONS)
1246 		return (OPEN_DELEGATE_NONE);
1247 
1248 	/*
1249 	 * Should consider limiting total number of read/write
1250 	 * delegations the server will permit.
1251 	 */
1252 
1253 	return (dtype);
1254 }
1255 
1256 /*
1257  * Try and grant a delegation for an open give the state. The routine
1258  * returns the delegation type granted. This could be OPEN_DELEGATE_NONE.
1259  *
1260  * The state and associate file entry must be locked
1261  */
1262 rfs4_deleg_state_t *
rfs4_grant_delegation(delegreq_t dreq,rfs4_state_t * sp,int * recall)1263 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
1264 {
1265 	nfs4_srv_t *nsrv4;
1266 	rfs4_file_t *fp = sp->rs_finfo;
1267 	open_delegation_type4 dtype;
1268 	int no_delegation;
1269 
1270 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1271 	ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1272 
1273 	nsrv4 = nfs4_get_srv();
1274 
1275 	/* Is the server even providing delegations? */
1276 	if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE ||
1277 	    dreq == DELEG_NONE) {
1278 		return (NULL);
1279 	}
1280 
1281 	/* Check to see if delegations have been temporarily disabled */
1282 	mutex_enter(&nsrv4->deleg_lock);
1283 	no_delegation = rfs4_deleg_disabled;
1284 	mutex_exit(&nsrv4->deleg_lock);
1285 
1286 	if (no_delegation)
1287 		return (NULL);
1288 
1289 	/* Don't grant a delegation if a deletion is impending. */
1290 	if (fp->rf_dinfo.rd_hold_grant > 0) {
1291 		return (NULL);
1292 	}
1293 
1294 	/*
1295 	 * Don't grant a delegation if there are any lock manager
1296 	 * (NFSv2/v3) locks for the file.  This is a bit of a hack (e.g.,
1297 	 * if there are only read locks we should be able to grant a
1298 	 * read-only delegation), but it's good enough for now.
1299 	 *
1300 	 * MT safety: the lock manager checks for conflicting delegations
1301 	 * before processing a lock request.  That check will block until
1302 	 * we are done here.  So if the lock manager acquires a lock after
1303 	 * we decide to grant the delegation, the delegation will get
1304 	 * immediately recalled (if there's a conflict), so we're safe.
1305 	 */
1306 	if (lm_vp_active(fp->rf_vp)) {
1307 		return (NULL);
1308 	}
1309 
1310 	/*
1311 	 * Based on the type of delegation request passed in, take the
1312 	 * appropriate action (DELEG_NONE is handled above)
1313 	 */
1314 	switch (dreq) {
1315 
1316 	case DELEG_READ:
1317 	case DELEG_WRITE:
1318 		/*
1319 		 * The server "must" grant the delegation in this case.
1320 		 * Client is using open previous
1321 		 */
1322 		dtype = (open_delegation_type4)dreq;
1323 		*recall = 1;
1324 		break;
1325 	case DELEG_ANY:
1326 		/*
1327 		 * If a valid callback path does not exist, no delegation may
1328 		 * be granted.
1329 		 */
1330 		if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK)
1331 			return (NULL);
1332 
1333 		/*
1334 		 * If the original operation which caused time_rm_delayed
1335 		 * to be set hasn't been retried and completed for one
1336 		 * full lease period, clear it and allow delegations to
1337 		 * get granted again.
1338 		 */
1339 		if (fp->rf_dinfo.rd_time_rm_delayed > 0 &&
1340 		    gethrestime_sec() >
1341 		    fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time)
1342 			fp->rf_dinfo.rd_time_rm_delayed = 0;
1343 
1344 		/*
1345 		 * If we are waiting for a delegation to be returned then
1346 		 * don't delegate this file. We do this for correctness as
1347 		 * well as if the file is being recalled we would likely
1348 		 * recall this file again.
1349 		 */
1350 
1351 		if (fp->rf_dinfo.rd_time_recalled != 0 ||
1352 		    fp->rf_dinfo.rd_time_rm_delayed != 0)
1353 			return (NULL);
1354 
1355 		/* Get the "best" delegation candidate */
1356 		dtype = rfs4_check_delegation(sp, fp);
1357 
1358 		if (dtype == OPEN_DELEGATE_NONE)
1359 			return (NULL);
1360 
1361 		/*
1362 		 * Based on policy and the history of the file get the
1363 		 * actual delegation.
1364 		 */
1365 		dtype = rfs4_delegation_policy(nsrv4, dtype, &fp->rf_dinfo,
1366 		    sp->rs_owner->ro_client->rc_clientid);
1367 
1368 		if (dtype == OPEN_DELEGATE_NONE)
1369 			return (NULL);
1370 		break;
1371 	default:
1372 		return (NULL);
1373 	}
1374 
1375 	/* set the delegation for the state */
1376 	return (rfs4_deleg_state(sp, dtype, recall));
1377 }
1378 
1379 void
rfs4_set_deleg_response(rfs4_deleg_state_t * dsp,open_delegation4 * dp,nfsace4 * ace,int recall)1380 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp,
1381     nfsace4 *ace,  int recall)
1382 {
1383 	open_write_delegation4 *wp;
1384 	open_read_delegation4 *rp;
1385 	nfs_space_limit4 *spl;
1386 	nfsace4 nace;
1387 
1388 	/*
1389 	 * We need to allocate a new copy of the who string.
1390 	 * this string will be freed by the rfs4_op_open dis_resfree
1391 	 * routine. We need to do this allocation since replays will
1392 	 * be allocated and rfs4_compound can't tell the difference from
1393 	 * a replay and an inital open. N.B. if an ace is passed in, it
1394 	 * the caller's responsibility to free it.
1395 	 */
1396 
1397 	if (ace == NULL) {
1398 		/*
1399 		 * Default is to deny all access, the client will have
1400 		 * to contact the server.  XXX Do we want to actually
1401 		 * set a deny for every one, or do we simply want to
1402 		 * construct an entity that will match no one?
1403 		 */
1404 		nace.type = ACE4_ACCESS_DENIED_ACE_TYPE;
1405 		nace.flag = 0;
1406 		nace.access_mask = ACE4_VALID_MASK_BITS;
1407 		(void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who);
1408 	} else {
1409 		nace.type = ace->type;
1410 		nace.flag = ace->flag;
1411 		nace.access_mask = ace->access_mask;
1412 		(void) utf8_copy(&ace->who, &nace.who);
1413 	}
1414 
1415 	dp->delegation_type = dsp->rds_dtype;
1416 
1417 	switch (dsp->rds_dtype) {
1418 	case OPEN_DELEGATE_NONE:
1419 		break;
1420 	case OPEN_DELEGATE_READ:
1421 		rp = &dp->open_delegation4_u.read;
1422 		rp->stateid = dsp->rds_delegid.stateid;
1423 		rp->recall = (bool_t)recall;
1424 		rp->permissions = nace;
1425 		break;
1426 	case OPEN_DELEGATE_WRITE:
1427 		wp = &dp->open_delegation4_u.write;
1428 		wp->stateid = dsp->rds_delegid.stateid;
1429 		wp->recall = (bool_t)recall;
1430 		spl = &wp->space_limit;
1431 		spl->limitby = NFS_LIMIT_SIZE;
1432 		spl->nfs_space_limit4_u.filesize = 0;
1433 		wp->permissions = nace;
1434 		break;
1435 	}
1436 }
1437 
1438 /*
1439  * Check if the file is delegated via the provided file struct.
1440  * Return TRUE if it is delegated.  This is intended for use by
1441  * the v4 server.  The v2/v3 server code should use rfs4_check_delegated().
1442  *
1443  * Note that if the file is found to have a delegation, it is
1444  * recalled, unless the clientid of the caller matches the clientid of the
1445  * delegation. If the caller has specified, there is a slight delay
1446  * inserted in the hopes that the delegation will be returned quickly.
1447  */
1448 bool_t
rfs4_check_delegated_byfp(int mode,rfs4_file_t * fp,bool_t trunc,bool_t do_delay,bool_t is_rm,clientid4 * cp)1449 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp,
1450     bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp)
1451 {
1452 	rfs4_deleg_state_t *dsp;
1453 
1454 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
1455 
1456 	/* Is delegation enabled? */
1457 	if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE)
1458 		return (FALSE);
1459 
1460 	/* do we have a delegation on this file? */
1461 	rfs4_dbe_lock(fp->rf_dbe);
1462 	if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1463 		if (is_rm)
1464 			fp->rf_dinfo.rd_hold_grant++;
1465 		rfs4_dbe_unlock(fp->rf_dbe);
1466 		return (FALSE);
1467 	}
1468 	/*
1469 	 * do we have a write delegation on this file or are we
1470 	 * requesting write access to a file with any type of existing
1471 	 * delegation?
1472 	 */
1473 	if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
1474 		if (cp != NULL) {
1475 			dsp = list_head(&fp->rf_delegstatelist);
1476 			if (dsp == NULL) {
1477 				rfs4_dbe_unlock(fp->rf_dbe);
1478 				return (FALSE);
1479 			}
1480 			/*
1481 			 * Does the requestor already own the delegation?
1482 			 */
1483 			if (dsp->rds_client->rc_clientid == *(cp)) {
1484 				rfs4_dbe_unlock(fp->rf_dbe);
1485 				return (FALSE);
1486 			}
1487 		}
1488 
1489 		rfs4_dbe_unlock(fp->rf_dbe);
1490 		rfs4_recall_deleg(fp, trunc, NULL);
1491 
1492 		if (!do_delay) {
1493 			rfs4_dbe_lock(fp->rf_dbe);
1494 			fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1495 			rfs4_dbe_unlock(fp->rf_dbe);
1496 			return (TRUE);
1497 		}
1498 
1499 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
1500 
1501 		rfs4_dbe_lock(fp->rf_dbe);
1502 		if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) {
1503 			fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1504 			rfs4_dbe_unlock(fp->rf_dbe);
1505 			return (TRUE);
1506 		}
1507 	}
1508 	if (is_rm)
1509 		fp->rf_dinfo.rd_hold_grant++;
1510 	rfs4_dbe_unlock(fp->rf_dbe);
1511 	return (FALSE);
1512 }
1513 
1514 /*
1515  * Check if the file is delegated in the case of a v2 or v3 access.
1516  * Return TRUE if it is delegated which in turn means that v2 should
1517  * drop the request and in the case of v3 JUKEBOX should be returned.
1518  */
1519 bool_t
rfs4_check_delegated(int mode,vnode_t * vp,bool_t trunc)1520 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
1521 {
1522 	nfs4_srv_t *nsrv4;
1523 	rfs4_file_t *fp;
1524 	bool_t create = FALSE;
1525 	bool_t rc = FALSE;
1526 
1527 	nsrv4 = nfs4_get_srv();
1528 	rfs4_hold_deleg_policy(nsrv4);
1529 
1530 	/* Is delegation enabled? */
1531 	if (nsrv4->nfs4_deleg_policy != SRV_NEVER_DELEGATE) {
1532 		fp = rfs4_findfile(vp, NULL, &create);
1533 		if (fp != NULL) {
1534 			if (rfs4_check_delegated_byfp(mode, fp, trunc,
1535 			    TRUE, FALSE, NULL)) {
1536 				rc = TRUE;
1537 			}
1538 			rfs4_file_rele(fp);
1539 		}
1540 	}
1541 	rfs4_rele_deleg_policy(nsrv4);
1542 	return (rc);
1543 }
1544 
1545 /*
1546  * Release a hold on the hold_grant counter which
1547  * prevents delegation from being granted while a remove
1548  * or a rename is in progress.
1549  */
1550 void
rfs4_clear_dont_grant(rfs4_file_t * fp)1551 rfs4_clear_dont_grant(rfs4_file_t *fp)
1552 {
1553 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
1554 
1555 	if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE)
1556 		return;
1557 	rfs4_dbe_lock(fp->rf_dbe);
1558 	ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
1559 	fp->rf_dinfo.rd_hold_grant--;
1560 	fp->rf_dinfo.rd_time_rm_delayed = 0;
1561 	rfs4_dbe_unlock(fp->rf_dbe);
1562 }
1563 
1564 /*
1565  * State support for delegation.
1566  * Set the state delegation type for this state;
1567  * This routine is called from open via rfs4_grant_delegation and the entry
1568  * locks on sp and sp->rs_finfo are assumed.
1569  */
1570 static rfs4_deleg_state_t *
rfs4_deleg_state(rfs4_state_t * sp,open_delegation_type4 dtype,int * recall)1571 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall)
1572 {
1573 	rfs4_file_t *fp = sp->rs_finfo;
1574 	bool_t create = TRUE;
1575 	rfs4_deleg_state_t *dsp;
1576 	vnode_t *vp;
1577 	int open_prev = *recall;
1578 	int ret;
1579 	int fflags = 0;
1580 
1581 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1582 	ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1583 
1584 	/* Shouldn't happen */
1585 	if (fp->rf_dinfo.rd_recall_count != 0 ||
1586 	    (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1587 	    dtype != OPEN_DELEGATE_READ)) {
1588 		return (NULL);
1589 	}
1590 
1591 	/* Unlock to avoid deadlock */
1592 	rfs4_dbe_unlock(fp->rf_dbe);
1593 	rfs4_dbe_unlock(sp->rs_dbe);
1594 
1595 	dsp = rfs4_finddeleg(sp, &create);
1596 
1597 	rfs4_dbe_lock(sp->rs_dbe);
1598 	rfs4_dbe_lock(fp->rf_dbe);
1599 
1600 	if (dsp == NULL)
1601 		return (NULL);
1602 
1603 	/*
1604 	 * It is possible that since we dropped the lock
1605 	 * in order to call finddeleg, the rfs4_file_t
1606 	 * was marked such that we should not grant a
1607 	 * delegation, if so bail out.
1608 	 */
1609 	if (fp->rf_dinfo.rd_hold_grant > 0) {
1610 		rfs4_deleg_state_rele(dsp);
1611 		return (NULL);
1612 	}
1613 
1614 	if (create == FALSE) {
1615 		if (sp->rs_owner->ro_client == dsp->rds_client &&
1616 		    dsp->rds_dtype == dtype) {
1617 			return (dsp);
1618 		} else {
1619 			rfs4_deleg_state_rele(dsp);
1620 			return (NULL);
1621 		}
1622 	}
1623 
1624 	/*
1625 	 * Check that this file has not been delegated to another
1626 	 * client
1627 	 */
1628 	if (fp->rf_dinfo.rd_recall_count != 0 ||
1629 	    fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE ||
1630 	    (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1631 	    dtype != OPEN_DELEGATE_READ)) {
1632 		rfs4_deleg_state_rele(dsp);
1633 		return (NULL);
1634 	}
1635 
1636 	vp = fp->rf_vp;
1637 	/* vnevent_support returns 0 if file system supports vnevents */
1638 	if (vnevent_support(vp, NULL)) {
1639 		rfs4_deleg_state_rele(dsp);
1640 		return (NULL);
1641 	}
1642 
1643 	/* Calculate the fflags for this OPEN. */
1644 	if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)
1645 		fflags |= FREAD;
1646 	if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)
1647 		fflags |= FWRITE;
1648 
1649 	*recall = 0;
1650 	/*
1651 	 * Before granting a delegation we need to know if anyone else has
1652 	 * opened the file in a conflicting mode.  However, first we need to
1653 	 * know how we opened the file to check the counts properly.
1654 	 */
1655 	if (dtype == OPEN_DELEGATE_READ) {
1656 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1657 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1658 		    vn_is_mapped(vp, V_WRITE)) {
1659 			if (open_prev) {
1660 				*recall = 1;
1661 			} else {
1662 				rfs4_deleg_state_rele(dsp);
1663 				return (NULL);
1664 			}
1665 		}
1666 		ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ,
1667 		    rfs4_mon_hold, rfs4_mon_rele);
1668 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1669 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1670 		    vn_is_mapped(vp, V_WRITE)) {
1671 			if (open_prev) {
1672 				*recall = 1;
1673 			} else {
1674 				(void) fem_uninstall(vp, deleg_rdops,
1675 				    (void *)fp);
1676 				rfs4_deleg_state_rele(dsp);
1677 				return (NULL);
1678 			}
1679 		}
1680 		/*
1681 		 * Because a client can hold onto a delegation after the
1682 		 * file has been closed, we need to keep track of the
1683 		 * access to this file.  Otherwise the CIFS server would
1684 		 * not know about the client accessing the file and could
1685 		 * inappropriately grant an OPLOCK.
1686 		 * fem_install() returns EBUSY when asked to install a
1687 		 * OPUNIQ monitor more than once.  Therefore, check the
1688 		 * return code because we only want this done once.
1689 		 */
1690 		if (ret == 0)
1691 			vn_open_upgrade(vp, FREAD);
1692 	} else { /* WRITE */
1693 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1694 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1695 		    ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1696 		    (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1697 		    vn_is_mapped(vp, V_RDORWR)) {
1698 			if (open_prev) {
1699 				*recall = 1;
1700 			} else {
1701 				rfs4_deleg_state_rele(dsp);
1702 				return (NULL);
1703 			}
1704 		}
1705 		ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ,
1706 		    rfs4_mon_hold, rfs4_mon_rele);
1707 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1708 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1709 		    ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1710 		    (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1711 		    vn_is_mapped(vp, V_RDORWR)) {
1712 			if (open_prev) {
1713 				*recall = 1;
1714 			} else {
1715 				(void) fem_uninstall(vp, deleg_wrops,
1716 				    (void *)fp);
1717 				rfs4_deleg_state_rele(dsp);
1718 				return (NULL);
1719 			}
1720 		}
1721 		/*
1722 		 * Because a client can hold onto a delegation after the
1723 		 * file has been closed, we need to keep track of the
1724 		 * access to this file.  Otherwise the CIFS server would
1725 		 * not know about the client accessing the file and could
1726 		 * inappropriately grant an OPLOCK.
1727 		 * fem_install() returns EBUSY when asked to install a
1728 		 * OPUNIQ monitor more than once.  Therefore, check the
1729 		 * return code because we only want this done once.
1730 		 */
1731 		if (ret == 0)
1732 			vn_open_upgrade(vp, FREAD|FWRITE);
1733 	}
1734 	/* Place on delegation list for file */
1735 	ASSERT(!list_link_active(&dsp->rds_node));
1736 	list_insert_tail(&fp->rf_delegstatelist, dsp);
1737 
1738 	dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype;
1739 
1740 	/* Update delegation stats for this file */
1741 	fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec();
1742 
1743 	/* reset since this is a new delegation */
1744 	fp->rf_dinfo.rd_conflicted_client = 0;
1745 	fp->rf_dinfo.rd_ever_recalled = FALSE;
1746 
1747 	if (dtype == OPEN_DELEGATE_READ)
1748 		fp->rf_dinfo.rd_rdgrants++;
1749 	else
1750 		fp->rf_dinfo.rd_wrgrants++;
1751 
1752 	return (dsp);
1753 }
1754 
1755 /*
1756  * State routine for the server when a delegation is returned.
1757  */
1758 void
rfs4_return_deleg(rfs4_deleg_state_t * dsp,bool_t revoked)1759 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked)
1760 {
1761 	rfs4_file_t *fp = dsp->rds_finfo;
1762 	open_delegation_type4 dtypewas;
1763 
1764 	rfs4_dbe_lock(fp->rf_dbe);
1765 
1766 	/* nothing to do if no longer on list */
1767 	if (!list_link_active(&dsp->rds_node)) {
1768 		rfs4_dbe_unlock(fp->rf_dbe);
1769 		return;
1770 	}
1771 
1772 	/* Remove state from recall list */
1773 	list_remove(&fp->rf_delegstatelist, dsp);
1774 
1775 	if (list_is_empty(&fp->rf_delegstatelist)) {
1776 		dtypewas = fp->rf_dinfo.rd_dtype;
1777 		fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
1778 		rfs4_dbe_cv_broadcast(fp->rf_dbe);
1779 
1780 		/* if file system was unshared, the vp will be NULL */
1781 		if (fp->rf_vp != NULL) {
1782 			/*
1783 			 * Once a delegation is no longer held by any client,
1784 			 * the monitor is uninstalled.  At this point, the
1785 			 * client must send OPEN otw, so we don't need the
1786 			 * reference on the vnode anymore.  The open
1787 			 * downgrade removes the reference put on earlier.
1788 			 */
1789 			if (dtypewas == OPEN_DELEGATE_READ) {
1790 				(void) fem_uninstall(fp->rf_vp, deleg_rdops,
1791 				    (void *)fp);
1792 				vn_open_downgrade(fp->rf_vp, FREAD);
1793 			} else if (dtypewas == OPEN_DELEGATE_WRITE) {
1794 				(void) fem_uninstall(fp->rf_vp, deleg_wrops,
1795 				    (void *)fp);
1796 				vn_open_downgrade(fp->rf_vp, FREAD|FWRITE);
1797 			}
1798 		}
1799 	}
1800 
1801 	switch (dsp->rds_dtype) {
1802 	case OPEN_DELEGATE_READ:
1803 		fp->rf_dinfo.rd_rdgrants--;
1804 		break;
1805 	case OPEN_DELEGATE_WRITE:
1806 		fp->rf_dinfo.rd_wrgrants--;
1807 		break;
1808 	default:
1809 		break;
1810 	}
1811 
1812 	/* used in the policy decision */
1813 	fp->rf_dinfo.rd_time_returned = gethrestime_sec();
1814 
1815 	/*
1816 	 * reset the time_recalled field so future delegations are not
1817 	 * accidentally revoked
1818 	 */
1819 	if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0)
1820 		fp->rf_dinfo.rd_time_recalled = 0;
1821 
1822 	rfs4_dbe_unlock(fp->rf_dbe);
1823 
1824 	rfs4_dbe_lock(dsp->rds_dbe);
1825 
1826 	dsp->rds_dtype = OPEN_DELEGATE_NONE;
1827 
1828 	if (revoked == TRUE)
1829 		dsp->rds_time_revoked = gethrestime_sec();
1830 
1831 	rfs4_dbe_invalidate(dsp->rds_dbe);
1832 
1833 	rfs4_dbe_unlock(dsp->rds_dbe);
1834 
1835 	if (revoked == TRUE) {
1836 		rfs4_dbe_lock(dsp->rds_client->rc_dbe);
1837 		dsp->rds_client->rc_deleg_revoked++;	/* observability */
1838 		rfs4_dbe_unlock(dsp->rds_client->rc_dbe);
1839 	}
1840 }
1841 
1842 static void
rfs4_revoke_file(rfs4_file_t * fp)1843 rfs4_revoke_file(rfs4_file_t *fp)
1844 {
1845 	rfs4_deleg_state_t *dsp;
1846 
1847 	/*
1848 	 * The lock for rfs4_file_t must be held when traversing the
1849 	 * delegation list but that lock needs to be released to call
1850 	 * rfs4_return_deleg()
1851 	 */
1852 	rfs4_dbe_lock(fp->rf_dbe);
1853 	while (dsp = list_head(&fp->rf_delegstatelist)) {
1854 		rfs4_dbe_hold(dsp->rds_dbe);
1855 		rfs4_dbe_unlock(fp->rf_dbe);
1856 		rfs4_return_deleg(dsp, TRUE);
1857 		rfs4_deleg_state_rele(dsp);
1858 		rfs4_dbe_lock(fp->rf_dbe);
1859 	}
1860 	rfs4_dbe_unlock(fp->rf_dbe);
1861 }
1862 
1863 /*
1864  * A delegation is assumed to be present on the file associated with
1865  * "sp".  Check to see if the delegation matches is associated with
1866  * the same client as referenced by "sp".  If it is not, TRUE is
1867  * returned.  If the delegation DOES match the client (or no
1868  * delegation is present), return FALSE.
1869  * Assume the state entry and file entry are locked.
1870  */
1871 bool_t
rfs4_is_deleg(rfs4_state_t * sp)1872 rfs4_is_deleg(rfs4_state_t *sp)
1873 {
1874 	rfs4_deleg_state_t *dsp;
1875 	rfs4_file_t *fp = sp->rs_finfo;
1876 	rfs4_client_t *cp = sp->rs_owner->ro_client;
1877 
1878 	ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1879 	for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
1880 	    dsp = list_next(&fp->rf_delegstatelist, dsp)) {
1881 		if (cp != dsp->rds_client) {
1882 			return (TRUE);
1883 		}
1884 	}
1885 	return (FALSE);
1886 }
1887 
1888 void
rfs4_disable_delegation(void)1889 rfs4_disable_delegation(void)
1890 {
1891 	nfs4_srv_t *nsrv4;
1892 
1893 	nsrv4 = nfs4_get_srv();
1894 	mutex_enter(&nsrv4->deleg_lock);
1895 	rfs4_deleg_disabled++;
1896 	mutex_exit(&nsrv4->deleg_lock);
1897 }
1898 
1899 void
rfs4_enable_delegation(void)1900 rfs4_enable_delegation(void)
1901 {
1902 	nfs4_srv_t *nsrv4;
1903 
1904 	nsrv4 = nfs4_get_srv();
1905 	mutex_enter(&nsrv4->deleg_lock);
1906 	ASSERT(rfs4_deleg_disabled > 0);
1907 	rfs4_deleg_disabled--;
1908 	mutex_exit(&nsrv4->deleg_lock);
1909 }
1910 
1911 void
rfs4_mon_hold(void * arg)1912 rfs4_mon_hold(void *arg)
1913 {
1914 	rfs4_file_t *fp = arg;
1915 
1916 	rfs4_dbe_hold(fp->rf_dbe);
1917 }
1918 
1919 void
rfs4_mon_rele(void * arg)1920 rfs4_mon_rele(void *arg)
1921 {
1922 	rfs4_file_t *fp = arg;
1923 
1924 	rfs4_dbe_rele_nolock(fp->rf_dbe);
1925 }
1926