nfs4_dispatch.c revision f3b585ce799a83688c5532c430f6133f098431c2
17c478bdstevel@tonic-gate/*
27c478bdstevel@tonic-gate * CDDL HEADER START
37c478bdstevel@tonic-gate *
47c478bdstevel@tonic-gate * The contents of this file are subject to the terms of the
57c478bdstevel@tonic-gate * Common Development and Distribution License (the "License").
67c478bdstevel@tonic-gate * You may not use this file except in compliance with the License.
77c478bdstevel@tonic-gate *
87c478bdstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bdstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bdstevel@tonic-gate * See the License for the specific language governing permissions
117c478bdstevel@tonic-gate * and limitations under the License.
127c478bdstevel@tonic-gate *
137c478bdstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bdstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bdstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bdstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bdstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bdstevel@tonic-gate *
197c478bdstevel@tonic-gate * CDDL HEADER END
207c478bdstevel@tonic-gate */
217c478bdstevel@tonic-gate
227c478bdstevel@tonic-gate/*
230406ceameem * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
247c478bdstevel@tonic-gate * Use is subject to license terms.
257c478bdstevel@tonic-gate */
267c478bdstevel@tonic-gate
277c478bdstevel@tonic-gate#pragma ident	"%Z%%M%	%I%	%E% SMI"
287c478bdstevel@tonic-gate
297c478bdstevel@tonic-gate#include <sys/systm.h>
307c478bdstevel@tonic-gate#include <sys/sdt.h>
31b08923dRobert Mustacchi#include <rpc/types.h>
32b08923dRobert Mustacchi#include <rpc/auth.h>
33b08923dRobert Mustacchi#include <rpc/auth_unix.h>
34b08923dRobert Mustacchi#include <rpc/auth_des.h>
357c478bdstevel@tonic-gate#include <rpc/svc.h>
367c478bdstevel@tonic-gate#include <rpc/xdr.h>
377c478bdstevel@tonic-gate#include <nfs/nfs4.h>
387c478bdstevel@tonic-gate#include <nfs/nfs_dispatch.h>
397c478bdstevel@tonic-gate#include <nfs/nfs4_drc.h>
407c478bdstevel@tonic-gate
417c478bdstevel@tonic-gate#define	NFS4_MAX_MINOR_VERSION	0
427c478bdstevel@tonic-gate
437c478bdstevel@tonic-gate/*
447c478bdstevel@tonic-gate * This is the duplicate request cache for NFSv4
457c478bdstevel@tonic-gate */
467c478bdstevel@tonic-gaterfs4_drc_t *nfs4_drc = NULL;
477c478bdstevel@tonic-gate
487c478bdstevel@tonic-gate/*
497c478bdstevel@tonic-gate * The default size of the duplicate request cache
507c478bdstevel@tonic-gate */
517c478bdstevel@tonic-gateuint32_t nfs4_drc_max = 8 * 1024;
527c478bdstevel@tonic-gate
537c478bdstevel@tonic-gate/*
547c478bdstevel@tonic-gate * The number of buckets we'd like to hash the
557c478bdstevel@tonic-gate * replies into.. do not change this on the fly.
567c478bdstevel@tonic-gate */
577c478bdstevel@tonic-gateuint32_t nfs4_drc_hash = 541;
587c478bdstevel@tonic-gate
597c478bdstevel@tonic-gate/*
607c478bdstevel@tonic-gate * Initialize a duplicate request cache.
617c478bdstevel@tonic-gate */
627c478bdstevel@tonic-gaterfs4_drc_t *
630406ceameemrfs4_init_drc(uint32_t drc_size, uint32_t drc_hash_size)
647c478bdstevel@tonic-gate{
657c478bdstevel@tonic-gate	rfs4_drc_t *drc;
667c478bdstevel@tonic-gate	uint32_t   bki;
677c478bdstevel@tonic-gate
687c478bdstevel@tonic-gate	ASSERT(drc_size);
697c478bdstevel@tonic-gate	ASSERT(drc_hash_size);
707c478bdstevel@tonic-gate
717c478bdstevel@tonic-gate	drc = kmem_alloc(sizeof (rfs4_drc_t), KM_SLEEP);
727c478bdstevel@tonic-gate
737c478bdstevel@tonic-gate	drc->max_size = drc_size;
747c478bdstevel@tonic-gate	drc->in_use = 0;
757c478bdstevel@tonic-gate
767c478bdstevel@tonic-gate	mutex_init(&drc->lock, NULL, MUTEX_DEFAULT, NULL);
777c478bdstevel@tonic-gate
787c478bdstevel@tonic-gate	drc->dr_hash = drc_hash_size;
797c478bdstevel@tonic-gate
807c478bdstevel@tonic-gate	drc->dr_buckets = kmem_alloc(sizeof (list_t)*drc_hash_size, KM_SLEEP);
817c478bdstevel@tonic-gate
827c478bdstevel@tonic-gate	for (bki = 0; bki < drc_hash_size; bki++) {
837c478bdstevel@tonic-gate		list_create(&drc->dr_buckets[bki], sizeof (rfs4_dupreq_t),
847c478bdstevel@tonic-gate		    offsetof(rfs4_dupreq_t, dr_bkt_next));
857c478bdstevel@tonic-gate	}
867c478bdstevel@tonic-gate
877c478bdstevel@tonic-gate	list_create(&(drc->dr_cache), sizeof (rfs4_dupreq_t),
887c478bdstevel@tonic-gate	    offsetof(rfs4_dupreq_t, dr_next));
897c478bdstevel@tonic-gate
907c478bdstevel@tonic-gate	return (drc);
917c478bdstevel@tonic-gate}
927c478bdstevel@tonic-gate
937c478bdstevel@tonic-gate/*
947c478bdstevel@tonic-gate * Destroy a duplicate request cache.
957c478bdstevel@tonic-gate */
967c478bdstevel@tonic-gatevoid
977c478bdstevel@tonic-gaterfs4_fini_drc(rfs4_drc_t *drc)
987c478bdstevel@tonic-gate{
997c478bdstevel@tonic-gate	rfs4_dupreq_t *drp, *drp_next;
1007c478bdstevel@tonic-gate
1017c478bdstevel@tonic-gate	ASSERT(drc);
1027c478bdstevel@tonic-gate
1037c478bdstevel@tonic-gate	/* iterate over the dr_cache and free the enties */
1047c478bdstevel@tonic-gate	for (drp = list_head(&(drc->dr_cache)); drp != NULL; drp = drp_next) {
1057c478bdstevel@tonic-gate
1067c478bdstevel@tonic-gate		if (drp->dr_state == NFS4_DUP_REPLAY)
1077c478bdstevel@tonic-gate			rfs4_compound_free(&(drp->dr_res));
1087c478bdstevel@tonic-gate
1097c478bdstevel@tonic-gate		if (drp->dr_addr.buf != NULL)
1107c478bdstevel@tonic-gate			kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen);
1117c478bdstevel@tonic-gate
1127c478bdstevel@tonic-gate		drp_next = list_next(&(drc->dr_cache), drp);
1137c478bdstevel@tonic-gate
1147c478bdstevel@tonic-gate		kmem_free(drp, sizeof (rfs4_dupreq_t));
1157c478bdstevel@tonic-gate	}
1167c478bdstevel@tonic-gate
1177c478bdstevel@tonic-gate	mutex_destroy(&drc->lock);
1187c478bdstevel@tonic-gate	kmem_free(drc->dr_buckets,
1197c478bdstevel@tonic-gate	    sizeof (list_t)*drc->dr_hash);
1207c478bdstevel@tonic-gate	kmem_free(drc, sizeof (rfs4_drc_t));
1217c478bdstevel@tonic-gate}
1227c478bdstevel@tonic-gate
1237c478bdstevel@tonic-gate/*
1247c478bdstevel@tonic-gate * rfs4_dr_chstate:
1257c478bdstevel@tonic-gate *
1267c478bdstevel@tonic-gate * Change the state of a rfs4_dupreq. If it's not in transition
1277c478bdstevel@tonic-gate * to the FREE state, update the time used and return. If we
1287c478bdstevel@tonic-gate * are moving to the FREE state then we need to clean up the
1297c478bdstevel@tonic-gate * compound results and move the entry to the end of the list.
1307c478bdstevel@tonic-gate */
1317c478bdstevel@tonic-gatevoid
1327c478bdstevel@tonic-gaterfs4_dr_chstate(rfs4_dupreq_t *drp, int new_state)
1337c478bdstevel@tonic-gate{
1347c478bdstevel@tonic-gate	rfs4_drc_t *drc;
1357c478bdstevel@tonic-gate
1367c478bdstevel@tonic-gate	ASSERT(drp);
1377c478bdstevel@tonic-gate	ASSERT(drp->drc);
1387c478bdstevel@tonic-gate	ASSERT(drp->dr_bkt);
1397c478bdstevel@tonic-gate	ASSERT(MUTEX_HELD(&drp->drc->lock));
1407c478bdstevel@tonic-gate
1417c478bdstevel@tonic-gate	drp->dr_state = new_state;
1427c478bdstevel@tonic-gate
1437c478bdstevel@tonic-gate	if (new_state != NFS4_DUP_FREE) {
1447c478bdstevel@tonic-gate		gethrestime(&drp->dr_time_used);
1457c478bdstevel@tonic-gate		return;
1467c478bdstevel@tonic-gate	}
1477c478bdstevel@tonic-gate
1487c478bdstevel@tonic-gate	drc = drp->drc;
1497c478bdstevel@tonic-gate
1507c478bdstevel@tonic-gate	/*
1517c478bdstevel@tonic-gate	 * Remove entry from the bucket and
1527c478bdstevel@tonic-gate	 * dr_cache list, free compound results.
1537c478bdstevel@tonic-gate	 */
1547c478bdstevel@tonic-gate	list_remove(drp->dr_bkt, drp);
1557c478bdstevel@tonic-gate	list_remove(&(drc->dr_cache), drp);
1567c478bdstevel@tonic-gate	rfs4_compound_free(&(drp->dr_res));
1577c478bdstevel@tonic-gate}
1587c478bdstevel@tonic-gate
1597c478bdstevel@tonic-gate/*
1607c478bdstevel@tonic-gate * rfs4_alloc_dr:
1617c478bdstevel@tonic-gate *
1627c478bdstevel@tonic-gate * Malloc a new one if we have not reached our maximum cache
1637c478bdstevel@tonic-gate * limit, otherwise pick an entry off the tail -- Use if it
1647c478bdstevel@tonic-gate * is marked as NFS4_DUP_FREE, or is an entry in the
1657c478bdstevel@tonic-gate * NFS4_DUP_REPLAY state.
1667c478bdstevel@tonic-gate */
1677c478bdstevel@tonic-gaterfs4_dupreq_t *
1687c478bdstevel@tonic-gaterfs4_alloc_dr(rfs4_drc_t *drc)
1697c478bdstevel@tonic-gate{
1707c478bdstevel@tonic-gate	rfs4_dupreq_t *drp_tail, *drp = NULL;
1717c478bdstevel@tonic-gate
1727c478bdstevel@tonic-gate	ASSERT(drc);
1737c478bdstevel@tonic-gate	ASSERT(MUTEX_HELD(&drc->lock));
1747c478bdstevel@tonic-gate
1757c478bdstevel@tonic-gate	/*
1767c478bdstevel@tonic-gate	 * Have we hit the cache limit yet ?
1777c478bdstevel@tonic-gate	 */
1787c478bdstevel@tonic-gate	if (drc->in_use < drc->max_size) {
1797c478bdstevel@tonic-gate		/*
1807c478bdstevel@tonic-gate		 * nope, so let's malloc a new one
1817c478bdstevel@tonic-gate		 */
1827c478bdstevel@tonic-gate		drp = kmem_zalloc(sizeof (rfs4_dupreq_t), KM_SLEEP);
1837c478bdstevel@tonic-gate		drp->drc = drc;
1847c478bdstevel@tonic-gate		drc->in_use++;
1857c478bdstevel@tonic-gate		gethrestime(&drp->dr_time_created);
1867c478bdstevel@tonic-gate		DTRACE_PROBE1(nfss__i__drc_new, rfs4_dupreq_t *, drp);
1877c478bdstevel@tonic-gate		return (drp);
1887c478bdstevel@tonic-gate	}
1897c478bdstevel@tonic-gate
1907c478bdstevel@tonic-gate	/*
1917c478bdstevel@tonic-gate	 * Cache is all allocated now traverse the list
1927c478bdstevel@tonic-gate	 * backwards to find one we can reuse.
1937c478bdstevel@tonic-gate	 */
1947c478bdstevel@tonic-gate	for (drp_tail = list_tail(&drc->dr_cache); drp_tail != NULL;
1957c478bdstevel@tonic-gate	    drp_tail = list_prev(&drc->dr_cache, drp_tail)) {
1967c478bdstevel@tonic-gate
1977c478bdstevel@tonic-gate		switch (drp_tail->dr_state) {
1987c478bdstevel@tonic-gate
1997c478bdstevel@tonic-gate		case NFS4_DUP_FREE:
2007c478bdstevel@tonic-gate			list_remove(&(drc->dr_cache), drp_tail);
2017c478bdstevel@tonic-gate			DTRACE_PROBE1(nfss__i__drc_freeclaim,
2027c478bdstevel@tonic-gate			    rfs4_dupreq_t *, drp_tail);
2037c478bdstevel@tonic-gate			return (drp_tail);
2047c478bdstevel@tonic-gate			/* NOTREACHED */
2057c478bdstevel@tonic-gate
2067c478bdstevel@tonic-gate		case NFS4_DUP_REPLAY:
2077c478bdstevel@tonic-gate			/* grab it. */
2087c478bdstevel@tonic-gate			rfs4_dr_chstate(drp_tail, NFS4_DUP_FREE);
2097c478bdstevel@tonic-gate			DTRACE_PROBE1(nfss__i__drc_replayclaim,
2107c478bdstevel@tonic-gate			    rfs4_dupreq_t *, drp_tail);
2117c478bdstevel@tonic-gate			return (drp_tail);
2127c478bdstevel@tonic-gate			/* NOTREACHED */
2137c478bdstevel@tonic-gate		}
2147c478bdstevel@tonic-gate	}
2157c478bdstevel@tonic-gate	DTRACE_PROBE1(nfss__i__drc_full, rfs4_drc_t *, drc);
2167c478bdstevel@tonic-gate	return (NULL);
2177c478bdstevel@tonic-gate}
2187c478bdstevel@tonic-gate
2197c478bdstevel@tonic-gate/*
2207c478bdstevel@tonic-gate * rfs4_find_dr:
2217c478bdstevel@tonic-gate *
2227c478bdstevel@tonic-gate * Search for an entry in the duplicate request cache by
2237c478bdstevel@tonic-gate * calculating the hash index based on the XID, and examining
2247c478bdstevel@tonic-gate * the entries in the hash bucket. If we find a match stamp the
2257c478bdstevel@tonic-gate * time_used and return. If the entry does not match it could be
2267c478bdstevel@tonic-gate * ready to be freed. Once we have searched the bucket we call
2277c478bdstevel@tonic-gate * rfs4_alloc_dr() to allocate a new entry, or reuse one that is
2287c478bdstevel@tonic-gate * available.
2297c478bdstevel@tonic-gate */
2307c478bdstevel@tonic-gateint
2317c478bdstevel@tonic-gaterfs4_find_dr(struct svc_req *req, rfs4_drc_t *drc, rfs4_dupreq_t **dup)
2327c478bdstevel@tonic-gate{
2337c478bdstevel@tonic-gate
2347c478bdstevel@tonic-gate	uint32_t	the_xid;
2357c478bdstevel@tonic-gate	list_t		*dr_bkt;
2367c478bdstevel@tonic-gate	rfs4_dupreq_t	*drp;
2377c478bdstevel@tonic-gate	int		bktdex;
238b08923dRobert Mustacchi
239b08923dRobert Mustacchi	/*
240b08923dRobert Mustacchi	 * Get the XID, calculate the bucket and search to
241b08923dRobert Mustacchi	 * see if we need to replay from the cache.
2427c478bdstevel@tonic-gate	 */
2437c478bdstevel@tonic-gate	the_xid = req->rq_xprt->xp_xid;
2447c478bdstevel@tonic-gate	bktdex = the_xid % drc->dr_hash;
2457c478bdstevel@tonic-gate
2467c478bdstevel@tonic-gate	dr_bkt = (list_t *)
2477c478bdstevel@tonic-gate	    &(drc->dr_buckets[(the_xid % drc->dr_hash)]);
2487c478bdstevel@tonic-gate
2497c478bdstevel@tonic-gate	DTRACE_PROBE3(nfss__i__drc_bktdex,
2507c478bdstevel@tonic-gate	    int, bktdex,
2517c478bdstevel@tonic-gate	    uint32_t, the_xid,
2527c478bdstevel@tonic-gate	    list_t *, dr_bkt);
2537c478bdstevel@tonic-gate
2547c478bdstevel@tonic-gate	*dup = NULL;
2557c478bdstevel@tonic-gate
2567c478bdstevel@tonic-gate	mutex_enter(&drc->lock);
2577c478bdstevel@tonic-gate	/*
2587c478bdstevel@tonic-gate	 * Search the bucket for a matching xid and address.
2597c478bdstevel@tonic-gate	 */
2607c478bdstevel@tonic-gate	for (drp = list_head(dr_bkt); drp != NULL;
2617c478bdstevel@tonic-gate	    drp = list_next(dr_bkt, drp)) {
2627c478bdstevel@tonic-gate
2637c478bdstevel@tonic-gate		if (drp->dr_xid == the_xid &&
2647c478bdstevel@tonic-gate		    drp->dr_addr.len == req->rq_xprt->xp_rtaddr.len &&
2657c478bdstevel@tonic-gate		    bcmp((caddr_t)drp->dr_addr.buf,
2667c478bdstevel@tonic-gate		    (caddr_t)req->rq_xprt->xp_rtaddr.buf,
2677c478bdstevel@tonic-gate		    drp->dr_addr.len) == 0) {
2687c478bdstevel@tonic-gate
2697c478bdstevel@tonic-gate			/*
2707c478bdstevel@tonic-gate			 * Found a match so REPLAY the Reply
2717c478bdstevel@tonic-gate			 */
2727c478bdstevel@tonic-gate			if (drp->dr_state == NFS4_DUP_REPLAY) {
2737c478bdstevel@tonic-gate				rfs4_dr_chstate(drp, NFS4_DUP_INUSE);
2747c478bdstevel@tonic-gate				mutex_exit(&drc->lock);
2757c478bdstevel@tonic-gate				*dup = drp;
2767c478bdstevel@tonic-gate				DTRACE_PROBE1(nfss__i__drc_replay,
2777c478bdstevel@tonic-gate				    rfs4_dupreq_t *, drp);
2787c478bdstevel@tonic-gate				return (NFS4_DUP_REPLAY);
2797c478bdstevel@tonic-gate			}
2807c478bdstevel@tonic-gate
2817c478bdstevel@tonic-gate			/*
2827c478bdstevel@tonic-gate			 * This entry must be in transition, so return
2837c478bdstevel@tonic-gate			 * the 'pending' status.
2847c478bdstevel@tonic-gate			 */
2857c478bdstevel@tonic-gate			mutex_exit(&drc->lock);
2867c478bdstevel@tonic-gate			return (NFS4_DUP_PENDING);
2877c478bdstevel@tonic-gate		}
288b08923dRobert Mustacchi
2897c478bdstevel@tonic-gate		/*
2907c478bdstevel@tonic-gate		 * Not a match, but maybe this entry is okay
2917c478bdstevel@tonic-gate		 * to be reused.
2927c478bdstevel@tonic-gate		 */
2937c478bdstevel@tonic-gate		if (drp->dr_state == NFS4_DUP_REPLAY) {
2947c478bdstevel@tonic-gate			rfs4_dr_chstate(drp, NFS4_DUP_FREE);
2957c478bdstevel@tonic-gate			list_insert_tail(&(drp->drc->dr_cache), drp);
2967c478bdstevel@tonic-gate		}
2977c478bdstevel@tonic-gate	}
2987c478bdstevel@tonic-gate
2997c478bdstevel@tonic-gate	drp = rfs4_alloc_dr(drc);
3007c478bdstevel@tonic-gate	mutex_exit(&drc->lock);
3017c478bdstevel@tonic-gate
3027c478bdstevel@tonic-gate	/*
3037c478bdstevel@tonic-gate	 * The DRC is full and all entries are in use. Upper function
3047c478bdstevel@tonic-gate	 * should error out this request and force the client to
3057c478bdstevel@tonic-gate	 * retransmit -- effectively this is a resource issue. NFSD
3067c478bdstevel@tonic-gate	 * threads tied up with native File System, or the cache size
3077c478bdstevel@tonic-gate	 * is too small for the server load.
3087c478bdstevel@tonic-gate	 */
3097c478bdstevel@tonic-gate	if (drp == NULL)
3107c478bdstevel@tonic-gate		return (NFS4_DUP_ERROR);
3117c478bdstevel@tonic-gate
3127c478bdstevel@tonic-gate	/*
3137c478bdstevel@tonic-gate	 * Init the state to NEW and clear the time used field.
3147c478bdstevel@tonic-gate	 */
3157c478bdstevel@tonic-gate	drp->dr_state = NFS4_DUP_NEW;
3167c478bdstevel@tonic-gate	drp->dr_time_used.tv_sec = drp->dr_time_used.tv_nsec = 0;
3177c478bdstevel@tonic-gate
3187c478bdstevel@tonic-gate	/*
3197c478bdstevel@tonic-gate	 * If needed, resize the address buffer
3207c478bdstevel@tonic-gate	 */
3217c478bdstevel@tonic-gate	if (drp->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) {
3227c478bdstevel@tonic-gate		if (drp->dr_addr.buf != NULL)
3237c478bdstevel@tonic-gate			kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen);
3247c478bdstevel@tonic-gate		drp->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len;
3257c478bdstevel@tonic-gate		drp->dr_addr.buf = kmem_alloc(drp->dr_addr.maxlen, KM_NOSLEEP);
3267c478bdstevel@tonic-gate		if (drp->dr_addr.buf == NULL) {
3277c478bdstevel@tonic-gate			/*
3287c478bdstevel@tonic-gate			 * If the malloc fails, mark the entry
3297c478bdstevel@tonic-gate			 * as free and put on the tail.
3307c478bdstevel@tonic-gate			 */
3317c478bdstevel@tonic-gate			drp->dr_addr.maxlen = 0;
3327c478bdstevel@tonic-gate			drp->dr_state = NFS4_DUP_FREE;
3337c478bdstevel@tonic-gate			mutex_enter(&drc->lock);
3347c478bdstevel@tonic-gate			list_insert_tail(&(drc->dr_cache), drp);
3357c478bdstevel@tonic-gate			mutex_exit(&drc->lock);
3367c478bdstevel@tonic-gate			return (NFS4_DUP_ERROR);
3377c478bdstevel@tonic-gate		}
3387c478bdstevel@tonic-gate	}
3397c478bdstevel@tonic-gate
3407c478bdstevel@tonic-gate
3417c478bdstevel@tonic-gate	/*
3427c478bdstevel@tonic-gate	 * Copy the address.
3437c478bdstevel@tonic-gate	 */
3447c478bdstevel@tonic-gate	drp->dr_addr.len = req->rq_xprt->xp_rtaddr.len;
3457c478bdstevel@tonic-gate
3467c478bdstevel@tonic-gate	bcopy((caddr_t)req->rq_xprt->xp_rtaddr.buf,
3477c478bdstevel@tonic-gate	    (caddr_t)drp->dr_addr.buf,
3487c478bdstevel@tonic-gate	    drp->dr_addr.len);
3497c478bdstevel@tonic-gate
3507c478bdstevel@tonic-gate	drp->dr_xid = the_xid;
3517c478bdstevel@tonic-gate	drp->dr_bkt = dr_bkt;
3527c478bdstevel@tonic-gate
3537c478bdstevel@tonic-gate	/*
3547c478bdstevel@tonic-gate	 * Insert at the head of the bucket and
3557c478bdstevel@tonic-gate	 * the drc lists..
3567c478bdstevel@tonic-gate	 */
3577c478bdstevel@tonic-gate	mutex_enter(&drc->lock);
358b08923dRobert Mustacchi	list_insert_head(&drc->dr_cache, drp);
359b08923dRobert Mustacchi	list_insert_head(dr_bkt, drp);
3607c478bdstevel@tonic-gate	mutex_exit(&drc->lock);
3617c478bdstevel@tonic-gate
3627c478bdstevel@tonic-gate	*dup = drp;
3637c478bdstevel@tonic-gate
3647c478bdstevel@tonic-gate	return (NFS4_DUP_NEW);
3657c478bdstevel@tonic-gate}
3667c478bdstevel@tonic-gate
3677c478bdstevel@tonic-gate/*
3687c478bdstevel@tonic-gate *
3697c478bdstevel@tonic-gate * This function handles the duplicate request cache,
3707c478bdstevel@tonic-gate * NULL_PROC and COMPOUND procedure calls for NFSv4;
3717c478bdstevel@tonic-gate *
3727c478bdstevel@tonic-gate * Passed into this function are:-
3737c478bdstevel@tonic-gate *
3747c478bdstevel@tonic-gate * 	disp	A pointer to our dispatch table entry
3757c478bdstevel@tonic-gate * 	req	The request to process
3767c478bdstevel@tonic-gate * 	xprt	The server transport handle
3777c478bdstevel@tonic-gate * 	ap	A pointer to the arguments
3787c478bdstevel@tonic-gate *
3797c478bdstevel@tonic-gate *
3807c478bdstevel@tonic-gate * When appropriate this function is responsible for inserting
3817c478bdstevel@tonic-gate * the reply into the duplicate cache or replaying an existing
3827c478bdstevel@tonic-gate * cached reply.
3837c478bdstevel@tonic-gate *
3847c478bdstevel@tonic-gate * dr_stat 	reflects the state of the duplicate request that
3857c478bdstevel@tonic-gate * 		has been inserted into or retrieved from the cache
3867c478bdstevel@tonic-gate *
3877c478bdstevel@tonic-gate * drp		is the duplicate request entry
3887c478bdstevel@tonic-gate *
3897c478bdstevel@tonic-gate */
3907c478bdstevel@tonic-gateint
3917c478bdstevel@tonic-gaterfs4_dispatch(struct rpcdisp *disp, struct svc_req *req,
3927c478bdstevel@tonic-gate		SVCXPRT *xprt, char *ap)
3937c478bdstevel@tonic-gate{
3947c478bdstevel@tonic-gate
3957c478bdstevel@tonic-gate	COMPOUND4res res_buf, *rbp;
3967c478bdstevel@tonic-gate	COMPOUND4args *cap;
3977c478bdstevel@tonic-gate
3987c478bdstevel@tonic-gate	cred_t 	*cr = NULL;
3997c478bdstevel@tonic-gate	int	error = 0;
4007c478bdstevel@tonic-gate	int 	dis_flags = 0;
4017c478bdstevel@tonic-gate	int 	dr_stat = NFS4_NOT_DUP;
4027c478bdstevel@tonic-gate	rfs4_dupreq_t *drp = NULL;
4037c478bdstevel@tonic-gate
4047c478bdstevel@tonic-gate	ASSERT(disp);
4057c478bdstevel@tonic-gate
4067c478bdstevel@tonic-gate	/*
4077c478bdstevel@tonic-gate	 * Short circuit the RPC_NULL proc.
4087c478bdstevel@tonic-gate	 */
4097c478bdstevel@tonic-gate	if (disp->dis_proc == rpc_null) {
4107c478bdstevel@tonic-gate		DTRACE_NFSV4_1(null__start, struct svc_req *, req);
4117c478bdstevel@tonic-gate		if (!svc_sendreply(xprt, xdr_void, NULL)) {
4127c478bdstevel@tonic-gate			DTRACE_NFSV4_1(null__done, struct svc_req *, req);
4137c478bdstevel@tonic-gate			return (1);
4147c478bdstevel@tonic-gate		}
4157c478bdstevel@tonic-gate		DTRACE_NFSV4_1(null__done, struct svc_req *, req);
4167c478bdstevel@tonic-gate		return (0);
4177c478bdstevel@tonic-gate	}
4187c478bdstevel@tonic-gate
4197c478bdstevel@tonic-gate	/* Only NFSv4 Compounds from this point onward */
4207c478bdstevel@tonic-gate
4217c478bdstevel@tonic-gate	rbp = &res_buf;
4227c478bdstevel@tonic-gate	cap = (COMPOUND4args *)ap;
4237c478bdstevel@tonic-gate
4247c478bdstevel@tonic-gate	/*
4257c478bdstevel@tonic-gate	 * Figure out the disposition of the whole COMPOUND
4267c478bdstevel@tonic-gate	 * and record it's IDEMPOTENTCY.
4277c478bdstevel@tonic-gate	 */
4287c478bdstevel@tonic-gate	rfs4_compound_flagproc(cap, &dis_flags);
4297c478bdstevel@tonic-gate
4307c478bdstevel@tonic-gate	/*
4317c478bdstevel@tonic-gate	 * If NON-IDEMPOTENT then we need to figure out if this
4327c478bdstevel@tonic-gate	 * request can be replied from the duplicate cache.
4337c478bdstevel@tonic-gate	 *
4347c478bdstevel@tonic-gate	 * If this is a new request then we need to insert the
4357c478bdstevel@tonic-gate	 * reply into the duplicate cache.
4367c478bdstevel@tonic-gate	 */
4377c478bdstevel@tonic-gate	if (!(dis_flags & RPC_IDEMPOTENT)) {
4387c478bdstevel@tonic-gate		/* look for a replay from the cache or allocate */
4397c478bdstevel@tonic-gate		dr_stat = rfs4_find_dr(req, nfs4_drc, &drp);
4407c478bdstevel@tonic-gate
4417c478bdstevel@tonic-gate		switch (dr_stat) {
4427c478bdstevel@tonic-gate
4437c478bdstevel@tonic-gate		case NFS4_DUP_ERROR:
4447c478bdstevel@tonic-gate			svcerr_systemerr(xprt);
4457c478bdstevel@tonic-gate			return (1);
4467c478bdstevel@tonic-gate			/* NOTREACHED */
4477c478bdstevel@tonic-gate
4487c478bdstevel@tonic-gate		case NFS4_DUP_PENDING:
4497c478bdstevel@tonic-gate			/*
4507c478bdstevel@tonic-gate			 * reply has previously been inserted into the
4517c478bdstevel@tonic-gate			 * duplicate cache, however the reply has
4527c478bdstevel@tonic-gate			 * not yet been sent via svc_sendreply()
4537c478bdstevel@tonic-gate			 */
4547c478bdstevel@tonic-gate			return (1);
4557c478bdstevel@tonic-gate			/* NOTREACHED */
4567c478bdstevel@tonic-gate
4577c478bdstevel@tonic-gate		case NFS4_DUP_NEW:
4587c478bdstevel@tonic-gate			curthread->t_flag |= T_DONTPEND;
4597c478bdstevel@tonic-gate			/* NON-IDEMPOTENT proc call */
4607c478bdstevel@tonic-gate			rfs4_compound(cap, rbp, NULL, req, cr);
4617c478bdstevel@tonic-gate
4627c478bdstevel@tonic-gate			curthread->t_flag &= ~T_DONTPEND;
4637c478bdstevel@tonic-gate
4647c478bdstevel@tonic-gate			/*
4657c478bdstevel@tonic-gate			 * dr_res must be initialized before calling
4667c478bdstevel@tonic-gate			 * rfs4_dr_chstate (it frees the reply).
4677c478bdstevel@tonic-gate			 */
4687c478bdstevel@tonic-gate			drp->dr_res = res_buf;
4697c478bdstevel@tonic-gate			if (curthread->t_flag & T_WOULDBLOCK) {
4707c478bdstevel@tonic-gate				curthread->t_flag &= ~T_WOULDBLOCK;
4717c478bdstevel@tonic-gate				/*
4727c478bdstevel@tonic-gate				 * mark this entry as FREE and plop
4737c478bdstevel@tonic-gate				 * on the end of the cache list
4747c478bdstevel@tonic-gate				 */
4757c478bdstevel@tonic-gate				mutex_enter(&drp->drc->lock);
4767c478bdstevel@tonic-gate				rfs4_dr_chstate(drp, NFS4_DUP_FREE);
4777c478bdstevel@tonic-gate				list_insert_tail(&(drp->drc->dr_cache), drp);
4787c478bdstevel@tonic-gate				mutex_exit(&drp->drc->lock);
4797c478bdstevel@tonic-gate				return (1);
4807c478bdstevel@tonic-gate			}
4817c478bdstevel@tonic-gate			break;
4827c478bdstevel@tonic-gate
4837c478bdstevel@tonic-gate		case NFS4_DUP_REPLAY:
4847c478bdstevel@tonic-gate			/* replay from the cache */
4857c478bdstevel@tonic-gate			rbp = &(drp->dr_res);
4867c478bdstevel@tonic-gate			break;
4877c478bdstevel@tonic-gate		}
4887c478bdstevel@tonic-gate	} else {
4897c478bdstevel@tonic-gate		curthread->t_flag |= T_DONTPEND;
4907c478bdstevel@tonic-gate		/* IDEMPOTENT proc call */
4917c478bdstevel@tonic-gate		rfs4_compound(cap, rbp, NULL, req, cr);
4927c478bdstevel@tonic-gate
4937c478bdstevel@tonic-gate		curthread->t_flag &= ~T_DONTPEND;
4947c478bdstevel@tonic-gate		if (curthread->t_flag & T_WOULDBLOCK) {
4957c478bdstevel@tonic-gate			curthread->t_flag &= ~T_WOULDBLOCK;
4967c478bdstevel@tonic-gate			return (1);
4977c478bdstevel@tonic-gate		}
4987c478bdstevel@tonic-gate	}
4997c478bdstevel@tonic-gate
5007c478bdstevel@tonic-gate	/*
5017c478bdstevel@tonic-gate	 * Send out the replayed reply or the 'real' one.
5027c478bdstevel@tonic-gate	 */
5037c478bdstevel@tonic-gate	if (!svc_sendreply(xprt,  xdr_COMPOUND4res_srv, (char *)rbp)) {
5047c478bdstevel@tonic-gate		DTRACE_PROBE2(nfss__e__dispatch_sendfail,
5057c478bdstevel@tonic-gate		    struct svc_req *, xprt,
5067c478bdstevel@tonic-gate		    char *, rbp);
5077c478bdstevel@tonic-gate		error++;
5087c478bdstevel@tonic-gate	}
5097c478bdstevel@tonic-gate
5107c478bdstevel@tonic-gate	/*
5117c478bdstevel@tonic-gate	 * If this reply was just inserted into the duplicate cache
5127c478bdstevel@tonic-gate	 * or it was replayed from the dup cache; (re)mark it as
5137c478bdstevel@tonic-gate	 * available for replay
5147c478bdstevel@tonic-gate	 *
5157c478bdstevel@tonic-gate	 * At first glance, this 'if' statement seems a little strange;
5167c478bdstevel@tonic-gate	 * testing for NFS4_DUP_REPLAY, and then calling...
5177c478bdstevel@tonic-gate	 *
5187c478bdstevel@tonic-gate	 *	rfs4_dr_chatate(NFS4_DUP_REPLAY)
5197c478bdstevel@tonic-gate	 *
5207c478bdstevel@tonic-gate	 * ... but notice that we are checking dr_stat, and not the
5217c478bdstevel@tonic-gate	 * state of the entry itself, the entry will be NFS4_DUP_INUSE,
5227c478bdstevel@tonic-gate	 * we do that so that we know not to prematurely reap it whilst
5237c478bdstevel@tonic-gate	 * we resent it to the client.
5247c478bdstevel@tonic-gate	 *
5257c478bdstevel@tonic-gate	 */
5267c478bdstevel@tonic-gate	if (dr_stat == NFS4_DUP_NEW || dr_stat == NFS4_DUP_REPLAY) {
5277c478bdstevel@tonic-gate		mutex_enter(&drp->drc->lock);
5287c478bdstevel@tonic-gate		rfs4_dr_chstate(drp, NFS4_DUP_REPLAY);
5297c478bdstevel@tonic-gate		mutex_exit(&drp->drc->lock);
5307c478bdstevel@tonic-gate	} else if (dr_stat == NFS4_NOT_DUP) {
5317c478bdstevel@tonic-gate		rfs4_compound_free(rbp);
5327c478bdstevel@tonic-gate	}
5337c478bdstevel@tonic-gate
5347c478bdstevel@tonic-gate	return (error);
5357c478bdstevel@tonic-gate}
5367c478bdstevel@tonic-gate
5377c478bdstevel@tonic-gatebool_t
5387c478bdstevel@tonic-gaterfs4_minorvers_mismatch(struct svc_req *req, SVCXPRT *xprt, void *args)
5397c478bdstevel@tonic-gate{
5407c478bdstevel@tonic-gate	COMPOUND4args *argsp;
5417c478bdstevel@tonic-gate	COMPOUND4res res_buf, *resp;
5427c478bdstevel@tonic-gate
5437c478bdstevel@tonic-gate	if (req->rq_vers != 4)
5447c478bdstevel@tonic-gate		return (FALSE);
5457c478bdstevel@tonic-gate
5467c478bdstevel@tonic-gate	argsp = (COMPOUND4args *)args;
5477c478bdstevel@tonic-gate
5487c478bdstevel@tonic-gate	if (argsp->minorversion <= NFS4_MAX_MINOR_VERSION)
5497c478bdstevel@tonic-gate		return (FALSE);
5507c478bdstevel@tonic-gate
5517c478bdstevel@tonic-gate	resp = &res_buf;
5527c478bdstevel@tonic-gate
5537c478bdstevel@tonic-gate	/*
5547c478bdstevel@tonic-gate	 * Form a reply tag by copying over the reqeuest tag.
5557c478bdstevel@tonic-gate	 */
5567c478bdstevel@tonic-gate	resp->tag.utf8string_val =
5577c478bdstevel@tonic-gate	    kmem_alloc(argsp->tag.utf8string_len, KM_SLEEP);
558b08923dRobert Mustacchi	resp->tag.utf8string_len = argsp->tag.utf8string_len;
559b08923dRobert Mustacchi	bcopy(argsp->tag.utf8string_val, resp->tag.utf8string_val,
5607c478bdstevel@tonic-gate	    resp->tag.utf8string_len);
5617c478bdstevel@tonic-gate	resp->array_len = 0;
5627c478bdstevel@tonic-gate	resp->array = NULL;
5637c478bdstevel@tonic-gate	resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5647c478bdstevel@tonic-gate	if (!svc_sendreply(xprt,  xdr_COMPOUND4res_srv, (char *)resp)) {
5657c478bdstevel@tonic-gate		DTRACE_PROBE2(nfss__e__minorvers_mismatch,
5667c478bdstevel@tonic-gate		    SVCXPRT *, xprt, char *, resp);
5677c478bdstevel@tonic-gate	}
5687c478bdstevel@tonic-gate	rfs4_compound_free(resp);
5697c478bdstevel@tonic-gate	return (TRUE);
5707c478bdstevel@tonic-gate}
5717c478bdstevel@tonic-gate