xref: /illumos-gate/usr/src/uts/common/rpc/clnt_rdma.c (revision 8ca9c6bb)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50a701b1eSRobert Gordon  * Common Development and Distribution License (the "License").
60a701b1eSRobert Gordon  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
2240d23e76SSiddheshwar Mahesh  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
267c478bd9Sstevel@tonic-gate /* All Rights Reserved */
277c478bd9Sstevel@tonic-gate /*
287c478bd9Sstevel@tonic-gate  * Portions of this source code were derived from Berkeley
297c478bd9Sstevel@tonic-gate  * 4.3 BSD under license from the Regents of the University of
307c478bd9Sstevel@tonic-gate  * California.
317c478bd9Sstevel@tonic-gate  */
327c478bd9Sstevel@tonic-gate 
337c478bd9Sstevel@tonic-gate #include <sys/param.h>
347c478bd9Sstevel@tonic-gate #include <sys/types.h>
357c478bd9Sstevel@tonic-gate #include <sys/user.h>
367c478bd9Sstevel@tonic-gate #include <sys/systm.h>
377c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
387c478bd9Sstevel@tonic-gate #include <sys/errno.h>
397c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
407c478bd9Sstevel@tonic-gate #include <sys/debug.h>
417c478bd9Sstevel@tonic-gate #include <sys/systm.h>
427c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
437c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
447c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
457c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
467c478bd9Sstevel@tonic-gate #include <sys/time.h>
477c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h>
487c478bd9Sstevel@tonic-gate #include <sys/zone.h>
490a701b1eSRobert Gordon #include <sys/sdt.h>
507c478bd9Sstevel@tonic-gate 
517c478bd9Sstevel@tonic-gate #include <rpc/types.h>
527c478bd9Sstevel@tonic-gate #include <rpc/xdr.h>
537c478bd9Sstevel@tonic-gate #include <rpc/auth.h>
547c478bd9Sstevel@tonic-gate #include <rpc/clnt.h>
557c478bd9Sstevel@tonic-gate #include <rpc/rpc_msg.h>
567c478bd9Sstevel@tonic-gate #include <rpc/rpc_rdma.h>
570a701b1eSRobert Gordon #include <nfs/nfs.h>
580a701b1eSRobert Gordon #include <nfs/nfs4_kprot.h>
590a701b1eSRobert Gordon 
600a701b1eSRobert Gordon static uint32_t rdma_bufs_rqst = RDMA_BUFS_RQST;
610a701b1eSRobert Gordon 
620a701b1eSRobert Gordon static int clnt_compose_rpcmsg(CLIENT *, rpcproc_t, rdma_buf_t *,
630a701b1eSRobert Gordon 			    XDR *, xdrproc_t, caddr_t);
640a701b1eSRobert Gordon static int  clnt_compose_rdma_header(CONN *, CLIENT *, rdma_buf_t *,
650a701b1eSRobert Gordon 		    XDR **, uint_t *);
660a701b1eSRobert Gordon static int clnt_setup_rlist(CONN *, XDR *, XDR *);
67f837ee4aSSiddheshwar Mahesh static int clnt_setup_wlist(CONN *, XDR *, XDR *, rdma_buf_t *);
680a701b1eSRobert Gordon static int clnt_setup_long_reply(CONN *, struct clist **, uint_t);
690a701b1eSRobert Gordon static void clnt_check_credit(CONN *);
700a701b1eSRobert Gordon static void clnt_return_credit(CONN *);
710a701b1eSRobert Gordon static void clnt_decode_long_reply(CONN *, struct clist *,
720a701b1eSRobert Gordon 		struct clist *, XDR *, XDR **, struct clist *,
730a701b1eSRobert Gordon 		struct clist *, uint_t, uint_t);
740a701b1eSRobert Gordon 
750a701b1eSRobert Gordon static void clnt_update_credit(CONN *, uint32_t);
767c478bd9Sstevel@tonic-gate 
777c478bd9Sstevel@tonic-gate static enum clnt_stat clnt_rdma_kcallit(CLIENT *, rpcproc_t, xdrproc_t,
787c478bd9Sstevel@tonic-gate     caddr_t, xdrproc_t, caddr_t, struct timeval);
797c478bd9Sstevel@tonic-gate static void	clnt_rdma_kabort(CLIENT *);
807c478bd9Sstevel@tonic-gate static void	clnt_rdma_kerror(CLIENT *, struct rpc_err *);
817c478bd9Sstevel@tonic-gate static bool_t	clnt_rdma_kfreeres(CLIENT *, xdrproc_t, caddr_t);
827c478bd9Sstevel@tonic-gate static void	clnt_rdma_kdestroy(CLIENT *);
837c478bd9Sstevel@tonic-gate static bool_t	clnt_rdma_kcontrol(CLIENT *, int, char *);
847c478bd9Sstevel@tonic-gate static int	clnt_rdma_ksettimers(CLIENT *, struct rpc_timers *,
857c478bd9Sstevel@tonic-gate     struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t);
867c478bd9Sstevel@tonic-gate 
877c478bd9Sstevel@tonic-gate /*
887c478bd9Sstevel@tonic-gate  * Operations vector for RDMA based RPC
897c478bd9Sstevel@tonic-gate  */
907c478bd9Sstevel@tonic-gate static struct clnt_ops rdma_clnt_ops = {
917c478bd9Sstevel@tonic-gate 	clnt_rdma_kcallit,	/* do rpc call */
927c478bd9Sstevel@tonic-gate 	clnt_rdma_kabort,	/* abort call */
937c478bd9Sstevel@tonic-gate 	clnt_rdma_kerror,	/* return error status */
947c478bd9Sstevel@tonic-gate 	clnt_rdma_kfreeres,	/* free results */
957c478bd9Sstevel@tonic-gate 	clnt_rdma_kdestroy,	/* destroy rpc handle */
967c478bd9Sstevel@tonic-gate 	clnt_rdma_kcontrol,	/* the ioctl() of rpc */
977c478bd9Sstevel@tonic-gate 	clnt_rdma_ksettimers,	/* set retry timers */
987c478bd9Sstevel@tonic-gate };
997c478bd9Sstevel@tonic-gate 
1007c478bd9Sstevel@tonic-gate /*
1017c478bd9Sstevel@tonic-gate  * The size of the preserialized RPC header information.
1027c478bd9Sstevel@tonic-gate  */
1037c478bd9Sstevel@tonic-gate #define	CKU_HDRSIZE	20
1040a701b1eSRobert Gordon #define	CLNT_RDMA_SUCCESS 0
1050a701b1eSRobert Gordon #define	CLNT_RDMA_FAIL (-1)
1060a701b1eSRobert Gordon 
1070a701b1eSRobert Gordon #define	AUTH_REFRESH_COUNT 2
1080a701b1eSRobert Gordon 
1090a701b1eSRobert Gordon #define	IS_RPCSEC_GSS(authh)			\
1100a701b1eSRobert Gordon 	(authh->cl_auth->ah_cred.oa_flavor == RPCSEC_GSS)
1117c478bd9Sstevel@tonic-gate 
1127c478bd9Sstevel@tonic-gate /*
1137c478bd9Sstevel@tonic-gate  * Per RPC RDMA endpoint details
1147c478bd9Sstevel@tonic-gate  */
1157c478bd9Sstevel@tonic-gate typedef struct cku_private {
1167c478bd9Sstevel@tonic-gate 	CLIENT			cku_client;	/* client handle */
1177c478bd9Sstevel@tonic-gate 	rdma_mod_t		*cku_rd_mod;	/* underlying RDMA mod */
1187c478bd9Sstevel@tonic-gate 	void			*cku_rd_handle;	/* underlying RDMA device */
1197f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	struct netbuf		cku_srcaddr;	/* source address for retries */
1207c478bd9Sstevel@tonic-gate 	struct netbuf		cku_addr;	/* remote netbuf address */
1217c478bd9Sstevel@tonic-gate 	int			cku_addrfmly;	/* for finding addr_type */
1227c478bd9Sstevel@tonic-gate 	struct rpc_err		cku_err;	/* error status */
1237c478bd9Sstevel@tonic-gate 	struct cred		*cku_cred;	/* credentials */
1247c478bd9Sstevel@tonic-gate 	XDR			cku_outxdr;	/* xdr stream for output */
1257c478bd9Sstevel@tonic-gate 	uint32_t		cku_outsz;
1267c478bd9Sstevel@tonic-gate 	XDR			cku_inxdr;	/* xdr stream for input */
1277c478bd9Sstevel@tonic-gate 	char			cku_rpchdr[CKU_HDRSIZE+4]; /* rpc header */
1287c478bd9Sstevel@tonic-gate 	uint32_t		cku_xid;	/* current XID */
1297c478bd9Sstevel@tonic-gate } cku_private_t;
1307c478bd9Sstevel@tonic-gate 
1317c478bd9Sstevel@tonic-gate #define	CLNT_RDMA_DELAY	10	/* secs to delay after a connection failure */
1327c478bd9Sstevel@tonic-gate static int clnt_rdma_min_delay = CLNT_RDMA_DELAY;
1337c478bd9Sstevel@tonic-gate 
1347c478bd9Sstevel@tonic-gate struct {
1357c478bd9Sstevel@tonic-gate 	kstat_named_t	rccalls;
1367c478bd9Sstevel@tonic-gate 	kstat_named_t	rcbadcalls;
1377c478bd9Sstevel@tonic-gate 	kstat_named_t	rcbadxids;
1387c478bd9Sstevel@tonic-gate 	kstat_named_t	rctimeouts;
1397c478bd9Sstevel@tonic-gate 	kstat_named_t	rcnewcreds;
1407c478bd9Sstevel@tonic-gate 	kstat_named_t	rcbadverfs;
1417c478bd9Sstevel@tonic-gate 	kstat_named_t	rctimers;
1427c478bd9Sstevel@tonic-gate 	kstat_named_t	rccantconn;
1437c478bd9Sstevel@tonic-gate 	kstat_named_t	rcnomem;
1447c478bd9Sstevel@tonic-gate 	kstat_named_t	rcintrs;
1457c478bd9Sstevel@tonic-gate 	kstat_named_t	rclongrpcs;
1467c478bd9Sstevel@tonic-gate } rdmarcstat = {
1477c478bd9Sstevel@tonic-gate 	{ "calls",	KSTAT_DATA_UINT64 },
1487c478bd9Sstevel@tonic-gate 	{ "badcalls",	KSTAT_DATA_UINT64 },
1497c478bd9Sstevel@tonic-gate 	{ "badxids",	KSTAT_DATA_UINT64 },
1507c478bd9Sstevel@tonic-gate 	{ "timeouts",	KSTAT_DATA_UINT64 },
1517c478bd9Sstevel@tonic-gate 	{ "newcreds",	KSTAT_DATA_UINT64 },
1527c478bd9Sstevel@tonic-gate 	{ "badverfs",	KSTAT_DATA_UINT64 },
1537c478bd9Sstevel@tonic-gate 	{ "timers",	KSTAT_DATA_UINT64 },
1547c478bd9Sstevel@tonic-gate 	{ "cantconn",	KSTAT_DATA_UINT64 },
1557c478bd9Sstevel@tonic-gate 	{ "nomem",	KSTAT_DATA_UINT64 },
1567c478bd9Sstevel@tonic-gate 	{ "interrupts", KSTAT_DATA_UINT64 },
1577c478bd9Sstevel@tonic-gate 	{ "longrpc", 	KSTAT_DATA_UINT64 }
1587c478bd9Sstevel@tonic-gate };
1597c478bd9Sstevel@tonic-gate 
1607c478bd9Sstevel@tonic-gate kstat_named_t *rdmarcstat_ptr = (kstat_named_t *)&rdmarcstat;
1617c478bd9Sstevel@tonic-gate uint_t rdmarcstat_ndata = sizeof (rdmarcstat) / sizeof (kstat_named_t);
1627c478bd9Sstevel@tonic-gate 
1637c478bd9Sstevel@tonic-gate #ifdef DEBUG
1647c478bd9Sstevel@tonic-gate int rdma_clnt_debug = 0;
1657c478bd9Sstevel@tonic-gate #endif
1667c478bd9Sstevel@tonic-gate 
1677c478bd9Sstevel@tonic-gate #ifdef accurate_stats
1687c478bd9Sstevel@tonic-gate extern kmutex_t rdmarcstat_lock;    /* mutex for rcstat updates */
1697c478bd9Sstevel@tonic-gate 
1707c478bd9Sstevel@tonic-gate #define	RCSTAT_INCR(x)			\
1717c478bd9Sstevel@tonic-gate 	mutex_enter(&rdmarcstat_lock);	\
1727c478bd9Sstevel@tonic-gate 	rdmarcstat.x.value.ui64++;	\
1737c478bd9Sstevel@tonic-gate 	mutex_exit(&rdmarcstat_lock);
1747c478bd9Sstevel@tonic-gate #else
1757c478bd9Sstevel@tonic-gate #define	RCSTAT_INCR(x)			\
1767c478bd9Sstevel@tonic-gate 	rdmarcstat.x.value.ui64++;
1777c478bd9Sstevel@tonic-gate #endif
1787c478bd9Sstevel@tonic-gate 
1797c478bd9Sstevel@tonic-gate #define	ptoh(p)		(&((p)->cku_client))
1807c478bd9Sstevel@tonic-gate #define	htop(h)		((cku_private_t *)((h)->cl_private))
1817c478bd9Sstevel@tonic-gate 
1820a701b1eSRobert Gordon uint_t
calc_length(uint_t len)1830a701b1eSRobert Gordon calc_length(uint_t len)
1840a701b1eSRobert Gordon {
1850a701b1eSRobert Gordon 	len = RNDUP(len);
1860a701b1eSRobert Gordon 
1870a701b1eSRobert Gordon 	if (len <= 64 * 1024) {
1880a701b1eSRobert Gordon 		if (len > 32 * 1024) {
1890a701b1eSRobert Gordon 			len = 64 * 1024;
1900a701b1eSRobert Gordon 		} else {
1910a701b1eSRobert Gordon 			if (len > 16 * 1024) {
1920a701b1eSRobert Gordon 				len = 32 * 1024;
1930a701b1eSRobert Gordon 			} else {
1940a701b1eSRobert Gordon 				if (len > 8 * 1024) {
1950a701b1eSRobert Gordon 					len = 16 * 1024;
1960a701b1eSRobert Gordon 				} else {
1970a701b1eSRobert Gordon 					len = 8 * 1024;
1980a701b1eSRobert Gordon 				}
1990a701b1eSRobert Gordon 			}
2000a701b1eSRobert Gordon 		}
2010a701b1eSRobert Gordon 	}
2020a701b1eSRobert Gordon 	return (len);
2030a701b1eSRobert Gordon }
2047c478bd9Sstevel@tonic-gate int
clnt_rdma_kcreate(char * proto,void * handle,struct netbuf * raddr,int family,rpcprog_t pgm,rpcvers_t vers,struct cred * cred,CLIENT ** cl)2057c478bd9Sstevel@tonic-gate clnt_rdma_kcreate(char *proto, void *handle, struct netbuf *raddr, int family,
2067c478bd9Sstevel@tonic-gate     rpcprog_t pgm, rpcvers_t vers, struct cred *cred, CLIENT **cl)
2077c478bd9Sstevel@tonic-gate {
2087c478bd9Sstevel@tonic-gate 	CLIENT *h;
2097c478bd9Sstevel@tonic-gate 	struct cku_private *p;
2107c478bd9Sstevel@tonic-gate 	struct rpc_msg call_msg;
2117c478bd9Sstevel@tonic-gate 	rdma_registry_t *rp;
2127c478bd9Sstevel@tonic-gate 
2137c478bd9Sstevel@tonic-gate 	ASSERT(INGLOBALZONE(curproc));
2147c478bd9Sstevel@tonic-gate 
2157c478bd9Sstevel@tonic-gate 	if (cl == NULL)
2167c478bd9Sstevel@tonic-gate 		return (EINVAL);
2177c478bd9Sstevel@tonic-gate 	*cl = NULL;
2187c478bd9Sstevel@tonic-gate 
2197c478bd9Sstevel@tonic-gate 	p = kmem_zalloc(sizeof (*p), KM_SLEEP);
2207c478bd9Sstevel@tonic-gate 
2217c478bd9Sstevel@tonic-gate 	/*
2227c478bd9Sstevel@tonic-gate 	 * Find underlying RDMATF plugin
2237c478bd9Sstevel@tonic-gate 	 */
2247c478bd9Sstevel@tonic-gate 	rw_enter(&rdma_lock, RW_READER);
2257c478bd9Sstevel@tonic-gate 	rp = rdma_mod_head;
2267c478bd9Sstevel@tonic-gate 	while (rp != NULL) {
2277c478bd9Sstevel@tonic-gate 		if (strcmp(rp->r_mod->rdma_api, proto))
2287c478bd9Sstevel@tonic-gate 			rp = rp->r_next;
2297c478bd9Sstevel@tonic-gate 		else {
2307c478bd9Sstevel@tonic-gate 			p->cku_rd_mod = rp->r_mod;
2317c478bd9Sstevel@tonic-gate 			p->cku_rd_handle = handle;
2327c478bd9Sstevel@tonic-gate 			break;
2337c478bd9Sstevel@tonic-gate 		}
2347c478bd9Sstevel@tonic-gate 	}
2357c478bd9Sstevel@tonic-gate 	rw_exit(&rdma_lock);
2367c478bd9Sstevel@tonic-gate 
2377c478bd9Sstevel@tonic-gate 	if (p->cku_rd_mod == NULL) {
2387c478bd9Sstevel@tonic-gate 		/*
2397c478bd9Sstevel@tonic-gate 		 * Should not happen.
2407c478bd9Sstevel@tonic-gate 		 * No matching RDMATF plugin.
2417c478bd9Sstevel@tonic-gate 		 */
2427c478bd9Sstevel@tonic-gate 		kmem_free(p, sizeof (struct cku_private));
2437c478bd9Sstevel@tonic-gate 		return (EINVAL);
2447c478bd9Sstevel@tonic-gate 	}
2457c478bd9Sstevel@tonic-gate 
2467c478bd9Sstevel@tonic-gate 	h = ptoh(p);
2477c478bd9Sstevel@tonic-gate 	h->cl_ops = &rdma_clnt_ops;
2487c478bd9Sstevel@tonic-gate 	h->cl_private = (caddr_t)p;
2497c478bd9Sstevel@tonic-gate 	h->cl_auth = authkern_create();
2507c478bd9Sstevel@tonic-gate 
2517c478bd9Sstevel@tonic-gate 	/* call message, just used to pre-serialize below */
2527c478bd9Sstevel@tonic-gate 	call_msg.rm_xid = 0;
2537c478bd9Sstevel@tonic-gate 	call_msg.rm_direction = CALL;
2547c478bd9Sstevel@tonic-gate 	call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION;
2557c478bd9Sstevel@tonic-gate 	call_msg.rm_call.cb_prog = pgm;
2567c478bd9Sstevel@tonic-gate 	call_msg.rm_call.cb_vers = vers;
2577c478bd9Sstevel@tonic-gate 
2587c478bd9Sstevel@tonic-gate 	xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, CKU_HDRSIZE, XDR_ENCODE);
2597c478bd9Sstevel@tonic-gate 	/* pre-serialize call message header */
2607c478bd9Sstevel@tonic-gate 	if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) {
2617c478bd9Sstevel@tonic-gate 		XDR_DESTROY(&p->cku_outxdr);
2627c478bd9Sstevel@tonic-gate 		auth_destroy(h->cl_auth);
2637c478bd9Sstevel@tonic-gate 		kmem_free(p, sizeof (struct cku_private));
2647c478bd9Sstevel@tonic-gate 		return (EINVAL);
2657c478bd9Sstevel@tonic-gate 	}
2667c478bd9Sstevel@tonic-gate 
2677c478bd9Sstevel@tonic-gate 	/*
2687c478bd9Sstevel@tonic-gate 	 * Set up the rpc information
2697c478bd9Sstevel@tonic-gate 	 */
2707c478bd9Sstevel@tonic-gate 	p->cku_cred = cred;
2717f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	p->cku_srcaddr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
2727f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	p->cku_srcaddr.maxlen = raddr->maxlen;
2737f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	p->cku_srcaddr.len = 0;
2747c478bd9Sstevel@tonic-gate 	p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
2757c478bd9Sstevel@tonic-gate 	p->cku_addr.maxlen = raddr->maxlen;
2767c478bd9Sstevel@tonic-gate 	p->cku_addr.len = raddr->len;
2777c478bd9Sstevel@tonic-gate 	bcopy(raddr->buf, p->cku_addr.buf, raddr->len);
2787c478bd9Sstevel@tonic-gate 	p->cku_addrfmly = family;
2797c478bd9Sstevel@tonic-gate 
2807c478bd9Sstevel@tonic-gate 	*cl = h;
2817c478bd9Sstevel@tonic-gate 	return (0);
2827c478bd9Sstevel@tonic-gate }
2837c478bd9Sstevel@tonic-gate 
2847c478bd9Sstevel@tonic-gate static void
clnt_rdma_kdestroy(CLIENT * h)2857c478bd9Sstevel@tonic-gate clnt_rdma_kdestroy(CLIENT *h)
2867c478bd9Sstevel@tonic-gate {
2877c478bd9Sstevel@tonic-gate 	struct cku_private *p = htop(h);
2887c478bd9Sstevel@tonic-gate 
2897f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	kmem_free(p->cku_srcaddr.buf, p->cku_srcaddr.maxlen);
2907c478bd9Sstevel@tonic-gate 	kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
2917c478bd9Sstevel@tonic-gate 	kmem_free(p, sizeof (*p));
2927c478bd9Sstevel@tonic-gate }
2937c478bd9Sstevel@tonic-gate 
2947c478bd9Sstevel@tonic-gate void
clnt_rdma_kinit(CLIENT * h,char * proto,void * handle,struct netbuf * raddr,struct cred * cred)2957c478bd9Sstevel@tonic-gate clnt_rdma_kinit(CLIENT *h, char *proto, void *handle, struct netbuf *raddr,
2967c478bd9Sstevel@tonic-gate     struct cred *cred)
2977c478bd9Sstevel@tonic-gate {
2987c478bd9Sstevel@tonic-gate 	struct cku_private *p = htop(h);
2997c478bd9Sstevel@tonic-gate 	rdma_registry_t *rp;
3007c478bd9Sstevel@tonic-gate 
3017c478bd9Sstevel@tonic-gate 	ASSERT(INGLOBALZONE(curproc));
3027c478bd9Sstevel@tonic-gate 	/*
3037c478bd9Sstevel@tonic-gate 	 * Find underlying RDMATF plugin
3047c478bd9Sstevel@tonic-gate 	 */
3057c478bd9Sstevel@tonic-gate 	p->cku_rd_mod = NULL;
3067c478bd9Sstevel@tonic-gate 	rw_enter(&rdma_lock, RW_READER);
3077c478bd9Sstevel@tonic-gate 	rp = rdma_mod_head;
3087c478bd9Sstevel@tonic-gate 	while (rp != NULL) {
3097c478bd9Sstevel@tonic-gate 		if (strcmp(rp->r_mod->rdma_api, proto))
3107c478bd9Sstevel@tonic-gate 			rp = rp->r_next;
3117c478bd9Sstevel@tonic-gate 		else {
3127c478bd9Sstevel@tonic-gate 			p->cku_rd_mod = rp->r_mod;
3137c478bd9Sstevel@tonic-gate 			p->cku_rd_handle = handle;
3147c478bd9Sstevel@tonic-gate 			break;
3157c478bd9Sstevel@tonic-gate 		}
3167c478bd9Sstevel@tonic-gate 
3177c478bd9Sstevel@tonic-gate 	}
3187c478bd9Sstevel@tonic-gate 	rw_exit(&rdma_lock);
3197c478bd9Sstevel@tonic-gate 
3207c478bd9Sstevel@tonic-gate 	/*
3217c478bd9Sstevel@tonic-gate 	 * Set up the rpc information
3227c478bd9Sstevel@tonic-gate 	 */
3237c478bd9Sstevel@tonic-gate 	p->cku_cred = cred;
3247c478bd9Sstevel@tonic-gate 	p->cku_xid = 0;
3257c478bd9Sstevel@tonic-gate 
3267c478bd9Sstevel@tonic-gate 	if (p->cku_addr.maxlen < raddr->len) {
3277c478bd9Sstevel@tonic-gate 		if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL)
3287c478bd9Sstevel@tonic-gate 			kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
3297c478bd9Sstevel@tonic-gate 		p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
3307c478bd9Sstevel@tonic-gate 		p->cku_addr.maxlen = raddr->maxlen;
3317c478bd9Sstevel@tonic-gate 	}
3327c478bd9Sstevel@tonic-gate 
3337f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	p->cku_srcaddr.len = 0;
3347f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
3357c478bd9Sstevel@tonic-gate 	p->cku_addr.len = raddr->len;
3367c478bd9Sstevel@tonic-gate 	bcopy(raddr->buf, p->cku_addr.buf, raddr->len);
3377c478bd9Sstevel@tonic-gate 	h->cl_ops = &rdma_clnt_ops;
3387c478bd9Sstevel@tonic-gate }
3397c478bd9Sstevel@tonic-gate 
3400a701b1eSRobert Gordon static int
clnt_compose_rpcmsg(CLIENT * h,rpcproc_t procnum,rdma_buf_t * rpcmsg,XDR * xdrs,xdrproc_t xdr_args,caddr_t argsp)3410a701b1eSRobert Gordon clnt_compose_rpcmsg(CLIENT *h, rpcproc_t procnum,
3420a701b1eSRobert Gordon     rdma_buf_t *rpcmsg, XDR *xdrs,
3430a701b1eSRobert Gordon     xdrproc_t xdr_args, caddr_t argsp)
3440a701b1eSRobert Gordon {
3450a701b1eSRobert Gordon 	cku_private_t *p = htop(h);
3460a701b1eSRobert Gordon 
3470a701b1eSRobert Gordon 	if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
3480a701b1eSRobert Gordon 		/*
3490a701b1eSRobert Gordon 		 * Copy in the preserialized RPC header
3500a701b1eSRobert Gordon 		 * information.
3510a701b1eSRobert Gordon 		 */
3520a701b1eSRobert Gordon 		bcopy(p->cku_rpchdr, rpcmsg->addr, CKU_HDRSIZE);
3530a701b1eSRobert Gordon 
3540a701b1eSRobert Gordon 		/*
3550a701b1eSRobert Gordon 		 * transaction id is the 1st thing in the output
3560a701b1eSRobert Gordon 		 * buffer.
3570a701b1eSRobert Gordon 		 */
3580a701b1eSRobert Gordon 		/* LINTED pointer alignment */
3590a701b1eSRobert Gordon 		(*(uint32_t *)(rpcmsg->addr)) = p->cku_xid;
3600a701b1eSRobert Gordon 
3610a701b1eSRobert Gordon 		/* Skip the preserialized stuff. */
3620a701b1eSRobert Gordon 		XDR_SETPOS(xdrs, CKU_HDRSIZE);
3630a701b1eSRobert Gordon 
3640a701b1eSRobert Gordon 		/* Serialize dynamic stuff into the output buffer. */
3650a701b1eSRobert Gordon 		if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) ||
3660a701b1eSRobert Gordon 		    (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) ||
3670a701b1eSRobert Gordon 		    (!(*xdr_args)(xdrs, argsp))) {
3680a701b1eSRobert Gordon 			DTRACE_PROBE(krpc__e__clntrdma__rpcmsg__dynargs);
3690a701b1eSRobert Gordon 			return (CLNT_RDMA_FAIL);
3700a701b1eSRobert Gordon 		}
3710a701b1eSRobert Gordon 		p->cku_outsz = XDR_GETPOS(xdrs);
3720a701b1eSRobert Gordon 	} else {
3730a701b1eSRobert Gordon 		uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[CKU_HDRSIZE];
3740a701b1eSRobert Gordon 		IXDR_PUT_U_INT32(uproc, procnum);
3750a701b1eSRobert Gordon 		(*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid;
3760a701b1eSRobert Gordon 		XDR_SETPOS(xdrs, 0);
3770a701b1eSRobert Gordon 
3780a701b1eSRobert Gordon 		/* Serialize the procedure number and the arguments. */
3790a701b1eSRobert Gordon 		if (!AUTH_WRAP(h->cl_auth, (caddr_t)p->cku_rpchdr,
3800a701b1eSRobert Gordon 		    CKU_HDRSIZE+4, xdrs, xdr_args, argsp)) {
3810a701b1eSRobert Gordon 			if (rpcmsg->addr != xdrs->x_base) {
3820a701b1eSRobert Gordon 				rpcmsg->addr = xdrs->x_base;
3830a701b1eSRobert Gordon 				rpcmsg->len = xdr_getbufsize(xdrs);
3840a701b1eSRobert Gordon 			}
3850a701b1eSRobert Gordon 			DTRACE_PROBE(krpc__e__clntrdma__rpcmsg__procnum);
3860a701b1eSRobert Gordon 			return (CLNT_RDMA_FAIL);
3870a701b1eSRobert Gordon 		}
3880a701b1eSRobert Gordon 		/*
3890a701b1eSRobert Gordon 		 * If we had to allocate a new buffer while encoding
3900a701b1eSRobert Gordon 		 * then update the addr and len.
3910a701b1eSRobert Gordon 		 */
3920a701b1eSRobert Gordon 		if (rpcmsg->addr != xdrs->x_base) {
3930a701b1eSRobert Gordon 			rpcmsg->addr = xdrs->x_base;
3940a701b1eSRobert Gordon 			rpcmsg->len = xdr_getbufsize(xdrs);
3950a701b1eSRobert Gordon 		}
3960a701b1eSRobert Gordon 
3970a701b1eSRobert Gordon 		p->cku_outsz = XDR_GETPOS(xdrs);
3980a701b1eSRobert Gordon 		DTRACE_PROBE1(krpc__i__compose__size__sec, int, p->cku_outsz)
3990a701b1eSRobert Gordon 	}
4000a701b1eSRobert Gordon 
4010a701b1eSRobert Gordon 	return (CLNT_RDMA_SUCCESS);
4020a701b1eSRobert Gordon }
4030a701b1eSRobert Gordon 
4040a701b1eSRobert Gordon static int
clnt_compose_rdma_header(CONN * conn,CLIENT * h,rdma_buf_t * clmsg,XDR ** xdrs,uint_t * op)4050a701b1eSRobert Gordon clnt_compose_rdma_header(CONN *conn, CLIENT *h, rdma_buf_t *clmsg,
4060a701b1eSRobert Gordon     XDR **xdrs, uint_t *op)
4070a701b1eSRobert Gordon {
4080a701b1eSRobert Gordon 	cku_private_t *p = htop(h);
4090a701b1eSRobert Gordon 	uint_t vers;
4100a701b1eSRobert Gordon 	uint32_t rdma_credit = rdma_bufs_rqst;
4110a701b1eSRobert Gordon 
4120a701b1eSRobert Gordon 	vers = RPCRDMA_VERS;
4130a701b1eSRobert Gordon 	clmsg->type = SEND_BUFFER;
4140a701b1eSRobert Gordon 
4150a701b1eSRobert Gordon 	if (rdma_buf_alloc(conn, clmsg)) {
4160a701b1eSRobert Gordon 		return (CLNT_RDMA_FAIL);
4170a701b1eSRobert Gordon 	}
4180a701b1eSRobert Gordon 
4190a701b1eSRobert Gordon 	*xdrs = &p->cku_outxdr;
4200a701b1eSRobert Gordon 	xdrmem_create(*xdrs, clmsg->addr, clmsg->len, XDR_ENCODE);
4210a701b1eSRobert Gordon 
4220a701b1eSRobert Gordon 	(*(uint32_t *)clmsg->addr) = p->cku_xid;
4230a701b1eSRobert Gordon 	XDR_SETPOS(*xdrs, sizeof (uint32_t));
4240a701b1eSRobert Gordon 	(void) xdr_u_int(*xdrs, &vers);
4250a701b1eSRobert Gordon 	(void) xdr_u_int(*xdrs, &rdma_credit);
4260a701b1eSRobert Gordon 	(void) xdr_u_int(*xdrs, op);
4270a701b1eSRobert Gordon 
4280a701b1eSRobert Gordon 	return (CLNT_RDMA_SUCCESS);
4290a701b1eSRobert Gordon }
4300a701b1eSRobert Gordon 
4310a701b1eSRobert Gordon /*
4320a701b1eSRobert Gordon  * If xp_cl is NULL value, then the RPC payload will NOT carry
4330a701b1eSRobert Gordon  * an RDMA READ chunk list, in this case we insert FALSE into
4340a701b1eSRobert Gordon  * the XDR stream. Otherwise we use the clist and RDMA register
4350a701b1eSRobert Gordon  * the memory and encode the clist into the outbound XDR stream.
4360a701b1eSRobert Gordon  */
4370a701b1eSRobert Gordon static int
clnt_setup_rlist(CONN * conn,XDR * xdrs,XDR * call_xdrp)4380a701b1eSRobert Gordon clnt_setup_rlist(CONN *conn, XDR *xdrs, XDR *call_xdrp)
4390a701b1eSRobert Gordon {
4400a701b1eSRobert Gordon 	int status;
4410a701b1eSRobert Gordon 	struct clist *rclp;
4420a701b1eSRobert Gordon 	int32_t xdr_flag = XDR_RDMA_RLIST_REG;
4430a701b1eSRobert Gordon 
4440a701b1eSRobert Gordon 	XDR_CONTROL(call_xdrp, XDR_RDMA_GET_RLIST, &rclp);
4450a701b1eSRobert Gordon 
4460a701b1eSRobert Gordon 	if (rclp != NULL) {
4470a701b1eSRobert Gordon 		status = clist_register(conn, rclp, CLIST_REG_SOURCE);
4480a701b1eSRobert Gordon 		if (status != RDMA_SUCCESS) {
4490a701b1eSRobert Gordon 			return (CLNT_RDMA_FAIL);
4500a701b1eSRobert Gordon 		}
4510a701b1eSRobert Gordon 		XDR_CONTROL(call_xdrp, XDR_RDMA_SET_FLAGS, &xdr_flag);
4520a701b1eSRobert Gordon 	}
4530a701b1eSRobert Gordon 	(void) xdr_do_clist(xdrs, &rclp);
4540a701b1eSRobert Gordon 
4550a701b1eSRobert Gordon 	return (CLNT_RDMA_SUCCESS);
4560a701b1eSRobert Gordon }
4570a701b1eSRobert Gordon 
4580a701b1eSRobert Gordon /*
4590a701b1eSRobert Gordon  * If xp_wcl is NULL value, then the RPC payload will NOT carry
4600a701b1eSRobert Gordon  * an RDMA WRITE chunk list, in this case we insert FALSE into
4610a701b1eSRobert Gordon  * the XDR stream. Otherwise we use the clist and  RDMA register
4620a701b1eSRobert Gordon  * the memory and encode the clist into the outbound XDR stream.
4630a701b1eSRobert Gordon  */
4640a701b1eSRobert Gordon static int
clnt_setup_wlist(CONN * conn,XDR * xdrs,XDR * call_xdrp,rdma_buf_t * rndbuf)465f837ee4aSSiddheshwar Mahesh clnt_setup_wlist(CONN *conn, XDR *xdrs, XDR *call_xdrp, rdma_buf_t *rndbuf)
4660a701b1eSRobert Gordon {
4670a701b1eSRobert Gordon 	int status;
468f837ee4aSSiddheshwar Mahesh 	struct clist *wlist, *rndcl;
469f837ee4aSSiddheshwar Mahesh 	int wlen, rndlen;
4700a701b1eSRobert Gordon 	int32_t xdr_flag = XDR_RDMA_WLIST_REG;
4710a701b1eSRobert Gordon 
4720a701b1eSRobert Gordon 	XDR_CONTROL(call_xdrp, XDR_RDMA_GET_WLIST, &wlist);
4730a701b1eSRobert Gordon 
4740a701b1eSRobert Gordon 	if (wlist != NULL) {
475f837ee4aSSiddheshwar Mahesh 		/*
476f837ee4aSSiddheshwar Mahesh 		 * If we are sending a non 4-byte alligned length
477f837ee4aSSiddheshwar Mahesh 		 * the server will roundup the length to 4-byte
478f837ee4aSSiddheshwar Mahesh 		 * boundary. In such a case, a trailing chunk is
479f837ee4aSSiddheshwar Mahesh 		 * added to take any spill over roundup bytes.
480f837ee4aSSiddheshwar Mahesh 		 */
481f837ee4aSSiddheshwar Mahesh 		wlen = clist_len(wlist);
482f837ee4aSSiddheshwar Mahesh 		rndlen = (roundup(wlen, BYTES_PER_XDR_UNIT) - wlen);
483f837ee4aSSiddheshwar Mahesh 		if (rndlen) {
484f837ee4aSSiddheshwar Mahesh 			rndcl = clist_alloc();
485f837ee4aSSiddheshwar Mahesh 			/*
486f837ee4aSSiddheshwar Mahesh 			 * calc_length() will allocate a PAGESIZE
487f837ee4aSSiddheshwar Mahesh 			 * buffer below.
488f837ee4aSSiddheshwar Mahesh 			 */
489f837ee4aSSiddheshwar Mahesh 			rndcl->c_len = calc_length(rndlen);
490f837ee4aSSiddheshwar Mahesh 			rndcl->rb_longbuf.type = RDMA_LONG_BUFFER;
491f837ee4aSSiddheshwar Mahesh 			rndcl->rb_longbuf.len = rndcl->c_len;
492f837ee4aSSiddheshwar Mahesh 			if (rdma_buf_alloc(conn, &rndcl->rb_longbuf)) {
493f837ee4aSSiddheshwar Mahesh 				clist_free(rndcl);
494f837ee4aSSiddheshwar Mahesh 				return (CLNT_RDMA_FAIL);
495f837ee4aSSiddheshwar Mahesh 			}
496f837ee4aSSiddheshwar Mahesh 
497f837ee4aSSiddheshwar Mahesh 			/* Roundup buffer freed back in caller */
498f837ee4aSSiddheshwar Mahesh 			*rndbuf = rndcl->rb_longbuf;
499f837ee4aSSiddheshwar Mahesh 
500f837ee4aSSiddheshwar Mahesh 			rndcl->u.c_daddr3 = rndcl->rb_longbuf.addr;
501f837ee4aSSiddheshwar Mahesh 			rndcl->c_next = NULL;
502f837ee4aSSiddheshwar Mahesh 			rndcl->c_dmemhandle = rndcl->rb_longbuf.handle;
503f837ee4aSSiddheshwar Mahesh 			wlist->c_next = rndcl;
504f837ee4aSSiddheshwar Mahesh 		}
505f837ee4aSSiddheshwar Mahesh 
5060a701b1eSRobert Gordon 		status = clist_register(conn, wlist, CLIST_REG_DST);
5070a701b1eSRobert Gordon 		if (status != RDMA_SUCCESS) {
508f837ee4aSSiddheshwar Mahesh 			rdma_buf_free(conn, rndbuf);
509f837ee4aSSiddheshwar Mahesh 			bzero(rndbuf, sizeof (rdma_buf_t));
5100a701b1eSRobert Gordon 			return (CLNT_RDMA_FAIL);
5110a701b1eSRobert Gordon 		}
5120a701b1eSRobert Gordon 		XDR_CONTROL(call_xdrp, XDR_RDMA_SET_FLAGS, &xdr_flag);
5130a701b1eSRobert Gordon 	}
5140a701b1eSRobert Gordon 
515f837ee4aSSiddheshwar Mahesh 	if (!xdr_encode_wlist(xdrs, wlist)) {
516f837ee4aSSiddheshwar Mahesh 		if (rndlen) {
517f837ee4aSSiddheshwar Mahesh 			rdma_buf_free(conn, rndbuf);
518f837ee4aSSiddheshwar Mahesh 			bzero(rndbuf, sizeof (rdma_buf_t));
519f837ee4aSSiddheshwar Mahesh 		}
5200a701b1eSRobert Gordon 		return (CLNT_RDMA_FAIL);
521f837ee4aSSiddheshwar Mahesh 	}
5220a701b1eSRobert Gordon 
5230a701b1eSRobert Gordon 	return (CLNT_RDMA_SUCCESS);
5240a701b1eSRobert Gordon }
5250a701b1eSRobert Gordon 
5260a701b1eSRobert Gordon static int
clnt_setup_long_reply(CONN * conn,struct clist ** clpp,uint_t length)5270a701b1eSRobert Gordon clnt_setup_long_reply(CONN *conn, struct clist **clpp, uint_t length)
5280a701b1eSRobert Gordon {
5290a701b1eSRobert Gordon 	if (length == 0) {
5300a701b1eSRobert Gordon 		*clpp = NULL;
5310a701b1eSRobert Gordon 		return (CLNT_RDMA_SUCCESS);
5320a701b1eSRobert Gordon 	}
5330a701b1eSRobert Gordon 
5340a701b1eSRobert Gordon 	*clpp = clist_alloc();
5350a701b1eSRobert Gordon 
5360a701b1eSRobert Gordon 	(*clpp)->rb_longbuf.len = calc_length(length);
5370a701b1eSRobert Gordon 	(*clpp)->rb_longbuf.type = RDMA_LONG_BUFFER;
5380a701b1eSRobert Gordon 
5390a701b1eSRobert Gordon 	if (rdma_buf_alloc(conn, &((*clpp)->rb_longbuf))) {
5400a701b1eSRobert Gordon 		clist_free(*clpp);
5410a701b1eSRobert Gordon 		*clpp = NULL;
5420a701b1eSRobert Gordon 		return (CLNT_RDMA_FAIL);
5430a701b1eSRobert Gordon 	}
5440a701b1eSRobert Gordon 
5450a701b1eSRobert Gordon 	(*clpp)->u.c_daddr3 = (*clpp)->rb_longbuf.addr;
5460a701b1eSRobert Gordon 	(*clpp)->c_len = (*clpp)->rb_longbuf.len;
5470a701b1eSRobert Gordon 	(*clpp)->c_next = NULL;
5480a701b1eSRobert Gordon 	(*clpp)->c_dmemhandle = (*clpp)->rb_longbuf.handle;
5490a701b1eSRobert Gordon 
5500a701b1eSRobert Gordon 	if (clist_register(conn, *clpp, CLIST_REG_DST)) {
5510a701b1eSRobert Gordon 		DTRACE_PROBE(krpc__e__clntrdma__longrep_regbuf);
5520a701b1eSRobert Gordon 		rdma_buf_free(conn, &((*clpp)->rb_longbuf));
5530a701b1eSRobert Gordon 		clist_free(*clpp);
55440d23e76SSiddheshwar Mahesh 		*clpp = NULL;
5550a701b1eSRobert Gordon 		return (CLNT_RDMA_FAIL);
5560a701b1eSRobert Gordon 	}
5570a701b1eSRobert Gordon 
5580a701b1eSRobert Gordon 	return (CLNT_RDMA_SUCCESS);
5590a701b1eSRobert Gordon }
5600a701b1eSRobert Gordon 
5617c478bd9Sstevel@tonic-gate /* ARGSUSED */
5627c478bd9Sstevel@tonic-gate static enum clnt_stat
clnt_rdma_kcallit(CLIENT * h,rpcproc_t procnum,xdrproc_t xdr_args,caddr_t argsp,xdrproc_t xdr_results,caddr_t resultsp,struct timeval wait)5637c478bd9Sstevel@tonic-gate clnt_rdma_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args,
5640a701b1eSRobert Gordon     caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp,
5650a701b1eSRobert Gordon     struct timeval wait)
5667c478bd9Sstevel@tonic-gate {
5677c478bd9Sstevel@tonic-gate 	cku_private_t *p = htop(h);
5680a701b1eSRobert Gordon 
5690a701b1eSRobert Gordon 	int 	try_call_again;
5700a701b1eSRobert Gordon 	int	refresh_attempt = AUTH_REFRESH_COUNT;
5717c478bd9Sstevel@tonic-gate 	int 	status;
5720a701b1eSRobert Gordon 	int 	msglen;
5730a701b1eSRobert Gordon 
5740a701b1eSRobert Gordon 	XDR	*call_xdrp, callxdr; /* for xdrrdma encoding the RPC call */
5750a701b1eSRobert Gordon 	XDR	*reply_xdrp, replyxdr; /* for xdrrdma decoding the RPC reply */
5760a701b1eSRobert Gordon 	XDR 	*rdmahdr_o_xdrs, *rdmahdr_i_xdrs;
5770a701b1eSRobert Gordon 
5787c478bd9Sstevel@tonic-gate 	struct rpc_msg 	reply_msg;
57951f34d4bSRajkumar Sivaprakasam 	rdma_registry_t	*m;
5800a701b1eSRobert Gordon 
5810a701b1eSRobert Gordon 	struct clist *cl_sendlist;
5820a701b1eSRobert Gordon 	struct clist *cl_recvlist;
5830a701b1eSRobert Gordon 	struct clist *cl;
5840a701b1eSRobert Gordon 	struct clist *cl_rpcmsg;
5850a701b1eSRobert Gordon 	struct clist *cl_rdma_reply;
5860a701b1eSRobert Gordon 	struct clist *cl_rpcreply_wlist;
5870a701b1eSRobert Gordon 	struct clist *cl_long_reply;
588f837ee4aSSiddheshwar Mahesh 	rdma_buf_t  rndup;
5890a701b1eSRobert Gordon 
5900a701b1eSRobert Gordon 	uint_t vers;
5910a701b1eSRobert Gordon 	uint_t op;
5927c478bd9Sstevel@tonic-gate 	uint_t off;
5930a701b1eSRobert Gordon 	uint32_t seg_array_len;
5940a701b1eSRobert Gordon 	uint_t long_reply_len;
5950a701b1eSRobert Gordon 	uint_t rpcsec_gss;
5960a701b1eSRobert Gordon 	uint_t gss_i_or_p;
5970a701b1eSRobert Gordon 
5987c478bd9Sstevel@tonic-gate 	CONN *conn = NULL;
5990a701b1eSRobert Gordon 	rdma_buf_t clmsg;
6000a701b1eSRobert Gordon 	rdma_buf_t rpcmsg;
6010a701b1eSRobert Gordon 	rdma_chunkinfo_lengths_t rcil;
6020a701b1eSRobert Gordon 
6037c478bd9Sstevel@tonic-gate 	clock_t	ticks;
6040a701b1eSRobert Gordon 	bool_t wlist_exists_reply;
6050a701b1eSRobert Gordon 
6060a701b1eSRobert Gordon 	uint32_t rdma_credit = rdma_bufs_rqst;
6077c478bd9Sstevel@tonic-gate 
6087c478bd9Sstevel@tonic-gate 	RCSTAT_INCR(rccalls);
6090a701b1eSRobert Gordon 
6100a701b1eSRobert Gordon call_again:
6110a701b1eSRobert Gordon 
6120a701b1eSRobert Gordon 	bzero(&clmsg, sizeof (clmsg));
6130a701b1eSRobert Gordon 	bzero(&rpcmsg, sizeof (rpcmsg));
614f837ee4aSSiddheshwar Mahesh 	bzero(&rndup, sizeof (rndup));
6150a701b1eSRobert Gordon 	try_call_again = 0;
6160a701b1eSRobert Gordon 	cl_sendlist = NULL;
6170a701b1eSRobert Gordon 	cl_recvlist = NULL;
6180a701b1eSRobert Gordon 	cl = NULL;
6190a701b1eSRobert Gordon 	cl_rpcmsg = NULL;
6200a701b1eSRobert Gordon 	cl_rdma_reply = NULL;
6210a701b1eSRobert Gordon 	call_xdrp = NULL;
6220a701b1eSRobert Gordon 	reply_xdrp = NULL;
6230a701b1eSRobert Gordon 	wlist_exists_reply  = FALSE;
6240a701b1eSRobert Gordon 	cl_rpcreply_wlist = NULL;
6250a701b1eSRobert Gordon 	cl_long_reply = NULL;
6260a701b1eSRobert Gordon 	rcil.rcil_len = 0;
6270a701b1eSRobert Gordon 	rcil.rcil_len_alt = 0;
6280a701b1eSRobert Gordon 	long_reply_len = 0;
6290a701b1eSRobert Gordon 
63051f34d4bSRajkumar Sivaprakasam 	rw_enter(&rdma_lock, RW_READER);
63151f34d4bSRajkumar Sivaprakasam 	m = (rdma_registry_t *)p->cku_rd_handle;
63251f34d4bSRajkumar Sivaprakasam 	if (m->r_mod_state == RDMA_MOD_INACTIVE) {
63351f34d4bSRajkumar Sivaprakasam 		/*
63451f34d4bSRajkumar Sivaprakasam 		 * If we didn't find a matching RDMA module in the registry
63551f34d4bSRajkumar Sivaprakasam 		 * then there is no transport.
63651f34d4bSRajkumar Sivaprakasam 		 */
63751f34d4bSRajkumar Sivaprakasam 		rw_exit(&rdma_lock);
63851f34d4bSRajkumar Sivaprakasam 		p->cku_err.re_status = RPC_CANTSEND;
63951f34d4bSRajkumar Sivaprakasam 		p->cku_err.re_errno = EIO;
64051f34d4bSRajkumar Sivaprakasam 		ticks = clnt_rdma_min_delay * drv_usectohz(1000000);
64151f34d4bSRajkumar Sivaprakasam 		if (h->cl_nosignal == TRUE) {
64251f34d4bSRajkumar Sivaprakasam 			delay(ticks);
64351f34d4bSRajkumar Sivaprakasam 		} else {
64451f34d4bSRajkumar Sivaprakasam 			if (delay_sig(ticks) == EINTR) {
64551f34d4bSRajkumar Sivaprakasam 				p->cku_err.re_status = RPC_INTR;
64651f34d4bSRajkumar Sivaprakasam 				p->cku_err.re_errno = EINTR;
64751f34d4bSRajkumar Sivaprakasam 			}
64851f34d4bSRajkumar Sivaprakasam 		}
64951f34d4bSRajkumar Sivaprakasam 		return (RPC_CANTSEND);
65051f34d4bSRajkumar Sivaprakasam 	}
6517c478bd9Sstevel@tonic-gate 	/*
6527c478bd9Sstevel@tonic-gate 	 * Get unique xid
6537c478bd9Sstevel@tonic-gate 	 */
6547c478bd9Sstevel@tonic-gate 	if (p->cku_xid == 0)
6557c478bd9Sstevel@tonic-gate 		p->cku_xid = alloc_xid();
6567c478bd9Sstevel@tonic-gate 
6577f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	status = RDMA_GET_CONN(p->cku_rd_mod->rdma_ops, &p->cku_srcaddr,
6587f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	    &p->cku_addr, p->cku_addrfmly, p->cku_rd_handle, &conn);
65951f34d4bSRajkumar Sivaprakasam 	rw_exit(&rdma_lock);
6607c478bd9Sstevel@tonic-gate 
6610a701b1eSRobert Gordon 	/*
6620a701b1eSRobert Gordon 	 * If there is a problem with the connection reflect the issue
6630a701b1eSRobert Gordon 	 * back to the higher level to address, we MAY delay for a short
6640a701b1eSRobert Gordon 	 * period so that we are kind to the transport.
6650a701b1eSRobert Gordon 	 */
6667c478bd9Sstevel@tonic-gate 	if (conn == NULL) {
6677c478bd9Sstevel@tonic-gate 		/*
6687c478bd9Sstevel@tonic-gate 		 * Connect failed to server. Could be because of one
6697c478bd9Sstevel@tonic-gate 		 * of several things. In some cases we don't want
6707c478bd9Sstevel@tonic-gate 		 * the caller to retry immediately - delay before
6717c478bd9Sstevel@tonic-gate 		 * returning to caller.
6727c478bd9Sstevel@tonic-gate 		 */
6737c478bd9Sstevel@tonic-gate 		switch (status) {
6747c478bd9Sstevel@tonic-gate 		case RDMA_TIMEDOUT:
6757c478bd9Sstevel@tonic-gate 			/*
6767c478bd9Sstevel@tonic-gate 			 * Already timed out. No need to delay
6777c478bd9Sstevel@tonic-gate 			 * some more.
6787c478bd9Sstevel@tonic-gate 			 */
6797c478bd9Sstevel@tonic-gate 			p->cku_err.re_status = RPC_TIMEDOUT;
6807c478bd9Sstevel@tonic-gate 			p->cku_err.re_errno = ETIMEDOUT;
6817c478bd9Sstevel@tonic-gate 			break;
6827c478bd9Sstevel@tonic-gate 		case RDMA_INTR:
6837c478bd9Sstevel@tonic-gate 			/*
6847c478bd9Sstevel@tonic-gate 			 * Failed because of an signal. Very likely
6857c478bd9Sstevel@tonic-gate 			 * the caller will not retry.
6867c478bd9Sstevel@tonic-gate 			 */
6877c478bd9Sstevel@tonic-gate 			p->cku_err.re_status = RPC_INTR;
6887c478bd9Sstevel@tonic-gate 			p->cku_err.re_errno = EINTR;
6897c478bd9Sstevel@tonic-gate 			break;
6907c478bd9Sstevel@tonic-gate 		default:
6917c478bd9Sstevel@tonic-gate 			/*
6927c478bd9Sstevel@tonic-gate 			 * All other failures - server down or service
6937c478bd9Sstevel@tonic-gate 			 * down or temporary resource failure. Delay before
6947c478bd9Sstevel@tonic-gate 			 * returning to caller.
6957c478bd9Sstevel@tonic-gate 			 */
6967c478bd9Sstevel@tonic-gate 			ticks = clnt_rdma_min_delay * drv_usectohz(1000000);
6977c478bd9Sstevel@tonic-gate 			p->cku_err.re_status = RPC_CANTCONNECT;
6987c478bd9Sstevel@tonic-gate 			p->cku_err.re_errno = EIO;
6997c478bd9Sstevel@tonic-gate 
7007c478bd9Sstevel@tonic-gate 			if (h->cl_nosignal == TRUE) {
7017c478bd9Sstevel@tonic-gate 				delay(ticks);
7027c478bd9Sstevel@tonic-gate 			} else {
7037c478bd9Sstevel@tonic-gate 				if (delay_sig(ticks) == EINTR) {
7047c478bd9Sstevel@tonic-gate 					p->cku_err.re_status = RPC_INTR;
7057c478bd9Sstevel@tonic-gate 					p->cku_err.re_errno = EINTR;
7067c478bd9Sstevel@tonic-gate 				}
7077c478bd9Sstevel@tonic-gate 			}
7087c478bd9Sstevel@tonic-gate 			break;
7097c478bd9Sstevel@tonic-gate 		}
7107c478bd9Sstevel@tonic-gate 
7117c478bd9Sstevel@tonic-gate 		return (p->cku_err.re_status);
7127c478bd9Sstevel@tonic-gate 	}
7137c478bd9Sstevel@tonic-gate 
7147f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if (p->cku_srcaddr.maxlen < conn->c_laddr.len) {
7157f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		if ((p->cku_srcaddr.maxlen != 0) &&
7167f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    (p->cku_srcaddr.buf != NULL))
7177f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			kmem_free(p->cku_srcaddr.buf, p->cku_srcaddr.maxlen);
7187f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		p->cku_srcaddr.buf = kmem_zalloc(conn->c_laddr.maxlen,
7197f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    KM_SLEEP);
7207f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		p->cku_srcaddr.maxlen = conn->c_laddr.maxlen;
7217f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	}
7227f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
7237f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	p->cku_srcaddr.len = conn->c_laddr.len;
7247f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	bcopy(conn->c_laddr.buf, p->cku_srcaddr.buf, conn->c_laddr.len);
7257f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
7260a701b1eSRobert Gordon 	clnt_check_credit(conn);
7277c478bd9Sstevel@tonic-gate 
7280a701b1eSRobert Gordon 	status = CLNT_RDMA_FAIL;
7297c478bd9Sstevel@tonic-gate 
7300a701b1eSRobert Gordon 	rpcsec_gss = gss_i_or_p = FALSE;
7317c478bd9Sstevel@tonic-gate 
7320a701b1eSRobert Gordon 	if (IS_RPCSEC_GSS(h)) {
7330a701b1eSRobert Gordon 		rpcsec_gss = TRUE;
7340a701b1eSRobert Gordon 		if (rpc_gss_get_service_type(h->cl_auth) ==
7350a701b1eSRobert Gordon 		    rpc_gss_svc_integrity ||
7360a701b1eSRobert Gordon 		    rpc_gss_get_service_type(h->cl_auth) ==
7370a701b1eSRobert Gordon 		    rpc_gss_svc_privacy)
7380a701b1eSRobert Gordon 			gss_i_or_p = TRUE;
7397c478bd9Sstevel@tonic-gate 	}
7407c478bd9Sstevel@tonic-gate 
7410a701b1eSRobert Gordon 	/*
7420a701b1eSRobert Gordon 	 * Try a regular RDMA message if RPCSEC_GSS is not being used
7430a701b1eSRobert Gordon 	 * or if RPCSEC_GSS is being used for authentication only.
7440a701b1eSRobert Gordon 	 */
7450a701b1eSRobert Gordon 	if (rpcsec_gss == FALSE ||
7460a701b1eSRobert Gordon 	    (rpcsec_gss == TRUE && gss_i_or_p == FALSE)) {
7477c478bd9Sstevel@tonic-gate 		/*
7480a701b1eSRobert Gordon 		 * Grab a send buffer for the request.  Try to
7490a701b1eSRobert Gordon 		 * encode it to see if it fits. If not, then it
7500a701b1eSRobert Gordon 		 * needs to be sent in a chunk.
7517c478bd9Sstevel@tonic-gate 		 */
7520a701b1eSRobert Gordon 		rpcmsg.type = SEND_BUFFER;
7530a701b1eSRobert Gordon 		if (rdma_buf_alloc(conn, &rpcmsg)) {
7540a701b1eSRobert Gordon 			DTRACE_PROBE(krpc__e__clntrdma__callit_nobufs);
7550a701b1eSRobert Gordon 			goto done;
7560a701b1eSRobert Gordon 		}
7577c478bd9Sstevel@tonic-gate 
7580a701b1eSRobert Gordon 		/* First try to encode into regular send buffer */
7590a701b1eSRobert Gordon 		op = RDMA_MSG;
7607c478bd9Sstevel@tonic-gate 
7610a701b1eSRobert Gordon 		call_xdrp = &callxdr;
7627c478bd9Sstevel@tonic-gate 
7630a701b1eSRobert Gordon 		xdrrdma_create(call_xdrp, rpcmsg.addr, rpcmsg.len,
7640a701b1eSRobert Gordon 		    rdma_minchunk, NULL, XDR_ENCODE, conn);
7650a701b1eSRobert Gordon 
7660a701b1eSRobert Gordon 		status = clnt_compose_rpcmsg(h, procnum, &rpcmsg, call_xdrp,
7670a701b1eSRobert Gordon 		    xdr_args, argsp);
7680a701b1eSRobert Gordon 
7690a701b1eSRobert Gordon 		if (status != CLNT_RDMA_SUCCESS) {
7700a701b1eSRobert Gordon 			/* Clean up from previous encode attempt */
7717c478bd9Sstevel@tonic-gate 			rdma_buf_free(conn, &rpcmsg);
7720a701b1eSRobert Gordon 			XDR_DESTROY(call_xdrp);
7730a701b1eSRobert Gordon 		} else {
7740a701b1eSRobert Gordon 			XDR_CONTROL(call_xdrp, XDR_RDMA_GET_CHUNK_LEN, &rcil);
7757c478bd9Sstevel@tonic-gate 		}
7760a701b1eSRobert Gordon 	}
7770a701b1eSRobert Gordon 
7780a701b1eSRobert Gordon 	/* If the encode didn't work, then try a NOMSG */
7790a701b1eSRobert Gordon 	if (status != CLNT_RDMA_SUCCESS) {
7800a701b1eSRobert Gordon 
7810a701b1eSRobert Gordon 		msglen = CKU_HDRSIZE + BYTES_PER_XDR_UNIT + MAX_AUTH_BYTES +
7820a701b1eSRobert Gordon 		    xdr_sizeof(xdr_args, argsp);
7830a701b1eSRobert Gordon 
7840a701b1eSRobert Gordon 		msglen = calc_length(msglen);
7850a701b1eSRobert Gordon 
7860a701b1eSRobert Gordon 		/* pick up the lengths for the reply buffer needed */
7870a701b1eSRobert Gordon 		(void) xdrrdma_sizeof(xdr_args, argsp, 0,
7880a701b1eSRobert Gordon 		    &rcil.rcil_len, &rcil.rcil_len_alt);
7897c478bd9Sstevel@tonic-gate 
7907c478bd9Sstevel@tonic-gate 		/*
7910a701b1eSRobert Gordon 		 * Construct a clist to describe the CHUNK_BUFFER
7920a701b1eSRobert Gordon 		 * for the rpcmsg.
7937c478bd9Sstevel@tonic-gate 		 */
7940a701b1eSRobert Gordon 		cl_rpcmsg = clist_alloc();
7950a701b1eSRobert Gordon 		cl_rpcmsg->c_len = msglen;
7960a701b1eSRobert Gordon 		cl_rpcmsg->rb_longbuf.type = RDMA_LONG_BUFFER;
7970a701b1eSRobert Gordon 		cl_rpcmsg->rb_longbuf.len = msglen;
7980a701b1eSRobert Gordon 		if (rdma_buf_alloc(conn, &cl_rpcmsg->rb_longbuf)) {
7990a701b1eSRobert Gordon 			clist_free(cl_rpcmsg);
8000a701b1eSRobert Gordon 			goto done;
8017c478bd9Sstevel@tonic-gate 		}
8020a701b1eSRobert Gordon 		cl_rpcmsg->w.c_saddr3 = cl_rpcmsg->rb_longbuf.addr;
8037c478bd9Sstevel@tonic-gate 
8040a701b1eSRobert Gordon 		op = RDMA_NOMSG;
8050a701b1eSRobert Gordon 		call_xdrp = &callxdr;
8060a701b1eSRobert Gordon 
8070a701b1eSRobert Gordon 		xdrrdma_create(call_xdrp, cl_rpcmsg->rb_longbuf.addr,
8080a701b1eSRobert Gordon 		    cl_rpcmsg->rb_longbuf.len, 0,
8090a701b1eSRobert Gordon 		    cl_rpcmsg, XDR_ENCODE, conn);
8100a701b1eSRobert Gordon 
811*8ca9c6bbSFaramarz Jalalian - Sun Microsystems - Irvine United States 		status = clnt_compose_rpcmsg(h, procnum, &cl_rpcmsg->rb_longbuf,
812*8ca9c6bbSFaramarz Jalalian - Sun Microsystems - Irvine United States 		    call_xdrp, xdr_args, argsp);
8130a701b1eSRobert Gordon 
814*8ca9c6bbSFaramarz Jalalian - Sun Microsystems - Irvine United States 		DTRACE_PROBE2(krpc__i__clntrdma__callit__longbuf, int, status,
815*8ca9c6bbSFaramarz Jalalian - Sun Microsystems - Irvine United States 		    int, msglen);
8160a701b1eSRobert Gordon 		if (status != CLNT_RDMA_SUCCESS) {
8170a701b1eSRobert Gordon 			p->cku_err.re_status = RPC_CANTENCODEARGS;
8180a701b1eSRobert Gordon 			p->cku_err.re_errno = EIO;
8190a701b1eSRobert Gordon 			DTRACE_PROBE(krpc__e__clntrdma__callit__composemsg);
8200a701b1eSRobert Gordon 			goto done;
8217c478bd9Sstevel@tonic-gate 		}
8227c478bd9Sstevel@tonic-gate 	}
8237c478bd9Sstevel@tonic-gate 
8240a701b1eSRobert Gordon 	/*
8250a701b1eSRobert Gordon 	 * During the XDR_ENCODE we may have "allocated" an RDMA READ or
8260a701b1eSRobert Gordon 	 * RDMA WRITE clist.
8270a701b1eSRobert Gordon 	 *
8280a701b1eSRobert Gordon 	 * First pull the RDMA READ chunk list from the XDR private
8290a701b1eSRobert Gordon 	 * area to keep it handy.
8300a701b1eSRobert Gordon 	 */
8310a701b1eSRobert Gordon 	XDR_CONTROL(call_xdrp, XDR_RDMA_GET_RLIST, &cl);
8320a701b1eSRobert Gordon 
8330a701b1eSRobert Gordon 	if (gss_i_or_p) {
8340a701b1eSRobert Gordon 		long_reply_len = rcil.rcil_len + rcil.rcil_len_alt;
8350a701b1eSRobert Gordon 		long_reply_len += MAX_AUTH_BYTES;
8360a701b1eSRobert Gordon 	} else {
8370a701b1eSRobert Gordon 		long_reply_len = rcil.rcil_len;
8380a701b1eSRobert Gordon 	}
8397c478bd9Sstevel@tonic-gate 
8407c478bd9Sstevel@tonic-gate 	/*
8417c478bd9Sstevel@tonic-gate 	 * Update the chunk size information for the Long RPC msg.
8427c478bd9Sstevel@tonic-gate 	 */
8437c478bd9Sstevel@tonic-gate 	if (cl && op == RDMA_NOMSG)
8447c478bd9Sstevel@tonic-gate 		cl->c_len = p->cku_outsz;
8457c478bd9Sstevel@tonic-gate 
8467c478bd9Sstevel@tonic-gate 	/*
8470a701b1eSRobert Gordon 	 * Prepare the RDMA header. On success xdrs will hold the result
8480a701b1eSRobert Gordon 	 * of xdrmem_create() for a SEND_BUFFER.
8497c478bd9Sstevel@tonic-gate 	 */
8500a701b1eSRobert Gordon 	status = clnt_compose_rdma_header(conn, h, &clmsg,
8510a701b1eSRobert Gordon 	    &rdmahdr_o_xdrs, &op);
8520a701b1eSRobert Gordon 
8530a701b1eSRobert Gordon 	if (status != CLNT_RDMA_SUCCESS) {
8547c478bd9Sstevel@tonic-gate 		p->cku_err.re_status = RPC_CANTSEND;
8557c478bd9Sstevel@tonic-gate 		p->cku_err.re_errno = EIO;
8567c478bd9Sstevel@tonic-gate 		RCSTAT_INCR(rcnomem);
8570a701b1eSRobert Gordon 		DTRACE_PROBE(krpc__e__clntrdma__callit__nobufs2);
8587c478bd9Sstevel@tonic-gate 		goto done;
8597c478bd9Sstevel@tonic-gate 	}
8600a701b1eSRobert Gordon 
8617c478bd9Sstevel@tonic-gate 	/*
8620a701b1eSRobert Gordon 	 * Now insert the RDMA READ list iff present
8637c478bd9Sstevel@tonic-gate 	 */
8640a701b1eSRobert Gordon 	status = clnt_setup_rlist(conn, rdmahdr_o_xdrs, call_xdrp);
8650a701b1eSRobert Gordon 	if (status != CLNT_RDMA_SUCCESS) {
8660a701b1eSRobert Gordon 		DTRACE_PROBE(krpc__e__clntrdma__callit__clistreg);
8670a701b1eSRobert Gordon 		rdma_buf_free(conn, &clmsg);
8680a701b1eSRobert Gordon 		p->cku_err.re_status = RPC_CANTSEND;
8690a701b1eSRobert Gordon 		p->cku_err.re_errno = EIO;
8700a701b1eSRobert Gordon 		goto done;
8710a701b1eSRobert Gordon 	}
8727c478bd9Sstevel@tonic-gate 
8737c478bd9Sstevel@tonic-gate 	/*
8740a701b1eSRobert Gordon 	 * Setup RDMA WRITE chunk list for nfs read operation
8750a701b1eSRobert Gordon 	 * other operations will have a NULL which will result
8760a701b1eSRobert Gordon 	 * as a NULL list in the XDR stream.
8777c478bd9Sstevel@tonic-gate 	 */
878f837ee4aSSiddheshwar Mahesh 	status = clnt_setup_wlist(conn, rdmahdr_o_xdrs, call_xdrp, &rndup);
8790a701b1eSRobert Gordon 	if (status != CLNT_RDMA_SUCCESS) {
8800a701b1eSRobert Gordon 		rdma_buf_free(conn, &clmsg);
8810a701b1eSRobert Gordon 		p->cku_err.re_status = RPC_CANTSEND;
8820a701b1eSRobert Gordon 		p->cku_err.re_errno = EIO;
8830a701b1eSRobert Gordon 		goto done;
8840a701b1eSRobert Gordon 	}
8857c478bd9Sstevel@tonic-gate 
8860a701b1eSRobert Gordon 	/*
8870a701b1eSRobert Gordon 	 * If NULL call and RPCSEC_GSS, provide a chunk such that
8880a701b1eSRobert Gordon 	 * large responses can flow back to the client.
8890a701b1eSRobert Gordon 	 * If RPCSEC_GSS with integrity or privacy is in use, get chunk.
8900a701b1eSRobert Gordon 	 */
8910a701b1eSRobert Gordon 	if ((procnum == 0 && rpcsec_gss == TRUE) ||
8920a701b1eSRobert Gordon 	    (rpcsec_gss == TRUE && gss_i_or_p == TRUE))
8930a701b1eSRobert Gordon 		long_reply_len += 1024;
8940a701b1eSRobert Gordon 
8950a701b1eSRobert Gordon 	status = clnt_setup_long_reply(conn, &cl_long_reply, long_reply_len);
8967c478bd9Sstevel@tonic-gate 
897*8ca9c6bbSFaramarz Jalalian - Sun Microsystems - Irvine United States 	DTRACE_PROBE2(krpc__i__clntrdma__callit__longreply, int, status,
898*8ca9c6bbSFaramarz Jalalian - Sun Microsystems - Irvine United States 	    int, long_reply_len);
899*8ca9c6bbSFaramarz Jalalian - Sun Microsystems - Irvine United States 
9000a701b1eSRobert Gordon 	if (status != CLNT_RDMA_SUCCESS) {
9010a701b1eSRobert Gordon 		rdma_buf_free(conn, &clmsg);
9020a701b1eSRobert Gordon 		p->cku_err.re_status = RPC_CANTSEND;
9030a701b1eSRobert Gordon 		p->cku_err.re_errno = EIO;
9040a701b1eSRobert Gordon 		goto done;
9057c478bd9Sstevel@tonic-gate 	}
9067c478bd9Sstevel@tonic-gate 
9077c478bd9Sstevel@tonic-gate 	/*
9080a701b1eSRobert Gordon 	 * XDR encode the RDMA_REPLY write chunk
9090a701b1eSRobert Gordon 	 */
9100a701b1eSRobert Gordon 	seg_array_len = (cl_long_reply ? 1 : 0);
9110a701b1eSRobert Gordon 	(void) xdr_encode_reply_wchunk(rdmahdr_o_xdrs, cl_long_reply,
9120a701b1eSRobert Gordon 	    seg_array_len);
9130a701b1eSRobert Gordon 
9140a701b1eSRobert Gordon 	/*
9150a701b1eSRobert Gordon 	 * Construct a clist in "sendlist" that represents what we
9160a701b1eSRobert Gordon 	 * will push over the wire.
9170a701b1eSRobert Gordon 	 *
9187c478bd9Sstevel@tonic-gate 	 * Start with the RDMA header and clist (if any)
9197c478bd9Sstevel@tonic-gate 	 */
9200a701b1eSRobert Gordon 	clist_add(&cl_sendlist, 0, XDR_GETPOS(rdmahdr_o_xdrs), &clmsg.handle,
9210a701b1eSRobert Gordon 	    clmsg.addr, NULL, NULL);
9227c478bd9Sstevel@tonic-gate 
9237c478bd9Sstevel@tonic-gate 	/*
9240a701b1eSRobert Gordon 	 * Put the RPC call message in  sendlist if small RPC
9257c478bd9Sstevel@tonic-gate 	 */
9267c478bd9Sstevel@tonic-gate 	if (op == RDMA_MSG) {
9270a701b1eSRobert Gordon 		clist_add(&cl_sendlist, 0, p->cku_outsz, &rpcmsg.handle,
9280a701b1eSRobert Gordon 		    rpcmsg.addr, NULL, NULL);
9297c478bd9Sstevel@tonic-gate 	} else {
9307c478bd9Sstevel@tonic-gate 		/* Long RPC already in chunk list */
9317c478bd9Sstevel@tonic-gate 		RCSTAT_INCR(rclongrpcs);
9327c478bd9Sstevel@tonic-gate 	}
9337c478bd9Sstevel@tonic-gate 
9347c478bd9Sstevel@tonic-gate 	/*
9357c478bd9Sstevel@tonic-gate 	 * Set up a reply buffer ready for the reply
9367c478bd9Sstevel@tonic-gate 	 */
9377c478bd9Sstevel@tonic-gate 	status = rdma_clnt_postrecv(conn, p->cku_xid);
9387c478bd9Sstevel@tonic-gate 	if (status != RDMA_SUCCESS) {
9397c478bd9Sstevel@tonic-gate 		rdma_buf_free(conn, &clmsg);
9407c478bd9Sstevel@tonic-gate 		p->cku_err.re_status = RPC_CANTSEND;
9417c478bd9Sstevel@tonic-gate 		p->cku_err.re_errno = EIO;
9427c478bd9Sstevel@tonic-gate 		goto done;
9437c478bd9Sstevel@tonic-gate 	}
9440a701b1eSRobert Gordon 
9457c478bd9Sstevel@tonic-gate 	/*
9467c478bd9Sstevel@tonic-gate 	 * sync the memory for dma
9477c478bd9Sstevel@tonic-gate 	 */
9487c478bd9Sstevel@tonic-gate 	if (cl != NULL) {
9490a701b1eSRobert Gordon 		status = clist_syncmem(conn, cl, CLIST_REG_SOURCE);
9507c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
9510a701b1eSRobert Gordon 			(void) rdma_clnt_postrecv_remove(conn, p->cku_xid);
9527c478bd9Sstevel@tonic-gate 			rdma_buf_free(conn, &clmsg);
9537c478bd9Sstevel@tonic-gate 			p->cku_err.re_status = RPC_CANTSEND;
9547c478bd9Sstevel@tonic-gate 			p->cku_err.re_errno = EIO;
9557c478bd9Sstevel@tonic-gate 			goto done;
9567c478bd9Sstevel@tonic-gate 		}
9577c478bd9Sstevel@tonic-gate 	}
9587c478bd9Sstevel@tonic-gate 
9597c478bd9Sstevel@tonic-gate 	/*
9600a701b1eSRobert Gordon 	 * Send the RDMA Header and RPC call message to the server
9617c478bd9Sstevel@tonic-gate 	 */
9620a701b1eSRobert Gordon 	status = RDMA_SEND(conn, cl_sendlist, p->cku_xid);
9637c478bd9Sstevel@tonic-gate 	if (status != RDMA_SUCCESS) {
9640a701b1eSRobert Gordon 		(void) rdma_clnt_postrecv_remove(conn, p->cku_xid);
9657c478bd9Sstevel@tonic-gate 		p->cku_err.re_status = RPC_CANTSEND;
9667c478bd9Sstevel@tonic-gate 		p->cku_err.re_errno = EIO;
9677c478bd9Sstevel@tonic-gate 		goto done;
9687c478bd9Sstevel@tonic-gate 	}
9697c478bd9Sstevel@tonic-gate 
9707c478bd9Sstevel@tonic-gate 	/*
9710a701b1eSRobert Gordon 	 * RDMA plugin now owns the send msg buffers.
9720a701b1eSRobert Gordon 	 * Clear them out and don't free them.
9737c478bd9Sstevel@tonic-gate 	 */
9740a701b1eSRobert Gordon 	clmsg.addr = NULL;
9750a701b1eSRobert Gordon 	if (rpcmsg.type == SEND_BUFFER)
9760a701b1eSRobert Gordon 		rpcmsg.addr = NULL;
9777c478bd9Sstevel@tonic-gate 
9787c478bd9Sstevel@tonic-gate 	/*
9790a701b1eSRobert Gordon 	 * Recv rpc reply
9807c478bd9Sstevel@tonic-gate 	 */
9810a701b1eSRobert Gordon 	status = RDMA_RECV(conn, &cl_recvlist, p->cku_xid);
9827c478bd9Sstevel@tonic-gate 
9837c478bd9Sstevel@tonic-gate 	/*
9847c478bd9Sstevel@tonic-gate 	 * Now check recv status
9857c478bd9Sstevel@tonic-gate 	 */
9867c478bd9Sstevel@tonic-gate 	if (status != 0) {
9877c478bd9Sstevel@tonic-gate 		if (status == RDMA_INTR) {
9887c478bd9Sstevel@tonic-gate 			p->cku_err.re_status = RPC_INTR;
9897c478bd9Sstevel@tonic-gate 			p->cku_err.re_errno = EINTR;
9907c478bd9Sstevel@tonic-gate 			RCSTAT_INCR(rcintrs);
9917c478bd9Sstevel@tonic-gate 		} else if (status == RPC_TIMEDOUT) {
9927c478bd9Sstevel@tonic-gate 			p->cku_err.re_status = RPC_TIMEDOUT;
9937c478bd9Sstevel@tonic-gate 			p->cku_err.re_errno = ETIMEDOUT;
9947c478bd9Sstevel@tonic-gate 			RCSTAT_INCR(rctimeouts);
9957c478bd9Sstevel@tonic-gate 		} else {
9967c478bd9Sstevel@tonic-gate 			p->cku_err.re_status = RPC_CANTRECV;
9977c478bd9Sstevel@tonic-gate 			p->cku_err.re_errno = EIO;
9987c478bd9Sstevel@tonic-gate 		}
9997c478bd9Sstevel@tonic-gate 		goto done;
10007c478bd9Sstevel@tonic-gate 	}
10010a701b1eSRobert Gordon 
10027c478bd9Sstevel@tonic-gate 	/*
10037c478bd9Sstevel@tonic-gate 	 * Process the reply message.
10047c478bd9Sstevel@tonic-gate 	 *
10057c478bd9Sstevel@tonic-gate 	 * First the chunk list (if any)
10067c478bd9Sstevel@tonic-gate 	 */
10070a701b1eSRobert Gordon 	rdmahdr_i_xdrs = &(p->cku_inxdr);
10080a701b1eSRobert Gordon 	xdrmem_create(rdmahdr_i_xdrs,
10090a701b1eSRobert Gordon 	    (caddr_t)(uintptr_t)cl_recvlist->w.c_saddr3,
10100a701b1eSRobert Gordon 	    cl_recvlist->c_len, XDR_DECODE);
10110a701b1eSRobert Gordon 
10127c478bd9Sstevel@tonic-gate 	/*
10137c478bd9Sstevel@tonic-gate 	 * Treat xid as opaque (xid is the first entity
10147c478bd9Sstevel@tonic-gate 	 * in the rpc rdma message).
10150a701b1eSRobert Gordon 	 * Skip xid and set the xdr position accordingly.
10167c478bd9Sstevel@tonic-gate 	 */
10170a701b1eSRobert Gordon 	XDR_SETPOS(rdmahdr_i_xdrs, sizeof (uint32_t));
10180a701b1eSRobert Gordon 	(void) xdr_u_int(rdmahdr_i_xdrs, &vers);
10190a701b1eSRobert Gordon 	(void) xdr_u_int(rdmahdr_i_xdrs, &rdma_credit);
10200a701b1eSRobert Gordon 	(void) xdr_u_int(rdmahdr_i_xdrs, &op);
10210a701b1eSRobert Gordon 	(void) xdr_do_clist(rdmahdr_i_xdrs, &cl);
10220a701b1eSRobert Gordon 
10230a701b1eSRobert Gordon 	clnt_update_credit(conn, rdma_credit);
10240a701b1eSRobert Gordon 
10250a701b1eSRobert Gordon 	wlist_exists_reply = FALSE;
10260a701b1eSRobert Gordon 	if (! xdr_decode_wlist(rdmahdr_i_xdrs, &cl_rpcreply_wlist,
10270a701b1eSRobert Gordon 	    &wlist_exists_reply)) {
10280a701b1eSRobert Gordon 		DTRACE_PROBE(krpc__e__clntrdma__callit__wlist_decode);
10290a701b1eSRobert Gordon 		p->cku_err.re_status = RPC_CANTDECODERES;
10300a701b1eSRobert Gordon 		p->cku_err.re_errno = EIO;
10310a701b1eSRobert Gordon 		goto done;
10320a701b1eSRobert Gordon 	}
10337c478bd9Sstevel@tonic-gate 
10347c478bd9Sstevel@tonic-gate 	/*
10350a701b1eSRobert Gordon 	 * The server shouldn't have sent a RDMA_SEND that
10360a701b1eSRobert Gordon 	 * the client needs to RDMA_WRITE a reply back to
10370a701b1eSRobert Gordon 	 * the server.  So silently ignoring what the
10380a701b1eSRobert Gordon 	 * server returns in the rdma_reply section of the
10390a701b1eSRobert Gordon 	 * header.
10407c478bd9Sstevel@tonic-gate 	 */
10410a701b1eSRobert Gordon 	(void) xdr_decode_reply_wchunk(rdmahdr_i_xdrs, &cl_rdma_reply);
10420a701b1eSRobert Gordon 	off = xdr_getpos(rdmahdr_i_xdrs);
10437c478bd9Sstevel@tonic-gate 
10440a701b1eSRobert Gordon 	clnt_decode_long_reply(conn, cl_long_reply,
10450a701b1eSRobert Gordon 	    cl_rdma_reply, &replyxdr, &reply_xdrp,
10460a701b1eSRobert Gordon 	    cl, cl_recvlist, op, off);
10477c478bd9Sstevel@tonic-gate 
10480a701b1eSRobert Gordon 	if (reply_xdrp == NULL)
10490a701b1eSRobert Gordon 		goto done;
10507c478bd9Sstevel@tonic-gate 
10510a701b1eSRobert Gordon 	if (wlist_exists_reply) {
10520a701b1eSRobert Gordon 		XDR_CONTROL(reply_xdrp, XDR_RDMA_SET_WLIST, cl_rpcreply_wlist);
10537c478bd9Sstevel@tonic-gate 	}
10547c478bd9Sstevel@tonic-gate 
10557c478bd9Sstevel@tonic-gate 	reply_msg.rm_direction = REPLY;
10567c478bd9Sstevel@tonic-gate 	reply_msg.rm_reply.rp_stat = MSG_ACCEPTED;
10577c478bd9Sstevel@tonic-gate 	reply_msg.acpted_rply.ar_stat = SUCCESS;
10587c478bd9Sstevel@tonic-gate 	reply_msg.acpted_rply.ar_verf = _null_auth;
10590a701b1eSRobert Gordon 
10607c478bd9Sstevel@tonic-gate 	/*
10617c478bd9Sstevel@tonic-gate 	 *  xdr_results will be done in AUTH_UNWRAP.
10627c478bd9Sstevel@tonic-gate 	 */
10637c478bd9Sstevel@tonic-gate 	reply_msg.acpted_rply.ar_results.where = NULL;
10647c478bd9Sstevel@tonic-gate 	reply_msg.acpted_rply.ar_results.proc = xdr_void;
10657c478bd9Sstevel@tonic-gate 
10667c478bd9Sstevel@tonic-gate 	/*
10677c478bd9Sstevel@tonic-gate 	 * Decode and validate the response.
10687c478bd9Sstevel@tonic-gate 	 */
10690a701b1eSRobert Gordon 	if (xdr_replymsg(reply_xdrp, &reply_msg)) {
10707c478bd9Sstevel@tonic-gate 		enum clnt_stat re_status;
10717c478bd9Sstevel@tonic-gate 
10727c478bd9Sstevel@tonic-gate 		_seterr_reply(&reply_msg, &(p->cku_err));
10737c478bd9Sstevel@tonic-gate 
10747c478bd9Sstevel@tonic-gate 		re_status = p->cku_err.re_status;
10757c478bd9Sstevel@tonic-gate 		if (re_status == RPC_SUCCESS) {
10767c478bd9Sstevel@tonic-gate 			/*
10777c478bd9Sstevel@tonic-gate 			 * Reply is good, check auth.
10787c478bd9Sstevel@tonic-gate 			 */
10797c478bd9Sstevel@tonic-gate 			if (!AUTH_VALIDATE(h->cl_auth,
10807c478bd9Sstevel@tonic-gate 			    &reply_msg.acpted_rply.ar_verf)) {
10817c478bd9Sstevel@tonic-gate 				p->cku_err.re_status = RPC_AUTHERROR;
10827c478bd9Sstevel@tonic-gate 				p->cku_err.re_why = AUTH_INVALIDRESP;
10837c478bd9Sstevel@tonic-gate 				RCSTAT_INCR(rcbadverfs);
10840a701b1eSRobert Gordon 				DTRACE_PROBE(
10850a701b1eSRobert Gordon 				    krpc__e__clntrdma__callit__authvalidate);
10860a701b1eSRobert Gordon 			} else if (!AUTH_UNWRAP(h->cl_auth, reply_xdrp,
10877c478bd9Sstevel@tonic-gate 			    xdr_results, resultsp)) {
10887c478bd9Sstevel@tonic-gate 				p->cku_err.re_status = RPC_CANTDECODERES;
10897c478bd9Sstevel@tonic-gate 				p->cku_err.re_errno = EIO;
10900a701b1eSRobert Gordon 				DTRACE_PROBE(
10910a701b1eSRobert Gordon 				    krpc__e__clntrdma__callit__authunwrap);
10927c478bd9Sstevel@tonic-gate 			}
10937c478bd9Sstevel@tonic-gate 		} else {
10947c478bd9Sstevel@tonic-gate 			/* set errno in case we can't recover */
10957c478bd9Sstevel@tonic-gate 			if (re_status != RPC_VERSMISMATCH &&
10967c478bd9Sstevel@tonic-gate 			    re_status != RPC_AUTHERROR &&
10977c478bd9Sstevel@tonic-gate 			    re_status != RPC_PROGVERSMISMATCH)
10987c478bd9Sstevel@tonic-gate 				p->cku_err.re_errno = EIO;
10997c478bd9Sstevel@tonic-gate 
11007c478bd9Sstevel@tonic-gate 			if (re_status == RPC_AUTHERROR) {
11010a701b1eSRobert Gordon 				if ((refresh_attempt > 0) &&
11020a701b1eSRobert Gordon 				    AUTH_REFRESH(h->cl_auth, &reply_msg,
11030a701b1eSRobert Gordon 				    p->cku_cred)) {
11040a701b1eSRobert Gordon 					refresh_attempt--;
11050a701b1eSRobert Gordon 					try_call_again = 1;
11060a701b1eSRobert Gordon 					goto done;
11070a701b1eSRobert Gordon 				}
11080a701b1eSRobert Gordon 
11090a701b1eSRobert Gordon 				try_call_again = 0;
11100a701b1eSRobert Gordon 
11110a701b1eSRobert Gordon 				/*
11120a701b1eSRobert Gordon 				 * We have used the client handle to
11130a701b1eSRobert Gordon 				 * do an AUTH_REFRESH and the RPC status may
11140a701b1eSRobert Gordon 				 * be set to RPC_SUCCESS; Let's make sure to
11150a701b1eSRobert Gordon 				 * set it to RPC_AUTHERROR.
11160a701b1eSRobert Gordon 				 */
11170a701b1eSRobert Gordon 				p->cku_err.re_status = RPC_AUTHERROR;
11180a701b1eSRobert Gordon 
11197c478bd9Sstevel@tonic-gate 				/*
11207c478bd9Sstevel@tonic-gate 				 * Map recoverable and unrecoverable
11217c478bd9Sstevel@tonic-gate 				 * authentication errors to appropriate
11227c478bd9Sstevel@tonic-gate 				 * errno
11237c478bd9Sstevel@tonic-gate 				 */
11247c478bd9Sstevel@tonic-gate 				switch (p->cku_err.re_why) {
11257c478bd9Sstevel@tonic-gate 				case AUTH_BADCRED:
11267c478bd9Sstevel@tonic-gate 				case AUTH_BADVERF:
11277c478bd9Sstevel@tonic-gate 				case AUTH_INVALIDRESP:
11287c478bd9Sstevel@tonic-gate 				case AUTH_TOOWEAK:
11297c478bd9Sstevel@tonic-gate 				case AUTH_FAILED:
11307c478bd9Sstevel@tonic-gate 				case RPCSEC_GSS_NOCRED:
11317c478bd9Sstevel@tonic-gate 				case RPCSEC_GSS_FAILED:
11327c478bd9Sstevel@tonic-gate 					p->cku_err.re_errno = EACCES;
11337c478bd9Sstevel@tonic-gate 					break;
11347c478bd9Sstevel@tonic-gate 				case AUTH_REJECTEDCRED:
11357c478bd9Sstevel@tonic-gate 				case AUTH_REJECTEDVERF:
11367c478bd9Sstevel@tonic-gate 				default:
11377c478bd9Sstevel@tonic-gate 					p->cku_err.re_errno = EIO;
11387c478bd9Sstevel@tonic-gate 					break;
11397c478bd9Sstevel@tonic-gate 				}
11407c478bd9Sstevel@tonic-gate 			}
11410a701b1eSRobert Gordon 			DTRACE_PROBE1(krpc__e__clntrdma__callit__rpcfailed,
11420a701b1eSRobert Gordon 			    int, p->cku_err.re_why);
11437c478bd9Sstevel@tonic-gate 		}
11447c478bd9Sstevel@tonic-gate 	} else {
11457c478bd9Sstevel@tonic-gate 		p->cku_err.re_status = RPC_CANTDECODERES;
11467c478bd9Sstevel@tonic-gate 		p->cku_err.re_errno = EIO;
11470a701b1eSRobert Gordon 		DTRACE_PROBE(krpc__e__clntrdma__callit__replymsg);
11487c478bd9Sstevel@tonic-gate 	}
11497c478bd9Sstevel@tonic-gate 
11500a701b1eSRobert Gordon done:
11510a701b1eSRobert Gordon 	clnt_return_credit(conn);
11520a701b1eSRobert Gordon 
11530a701b1eSRobert Gordon 	if (cl_sendlist != NULL)
11540a701b1eSRobert Gordon 		clist_free(cl_sendlist);
11550a701b1eSRobert Gordon 
11567c478bd9Sstevel@tonic-gate 	/*
11577c478bd9Sstevel@tonic-gate 	 * If rpc reply is in a chunk, free it now.
11587c478bd9Sstevel@tonic-gate 	 */
11590a701b1eSRobert Gordon 	if (cl_long_reply) {
1160f837ee4aSSiddheshwar Mahesh 		(void) clist_deregister(conn, cl_long_reply);
11610a701b1eSRobert Gordon 		rdma_buf_free(conn, &cl_long_reply->rb_longbuf);
11620a701b1eSRobert Gordon 		clist_free(cl_long_reply);
11630a701b1eSRobert Gordon 	}
11647c478bd9Sstevel@tonic-gate 
11650a701b1eSRobert Gordon 	if (call_xdrp)
11660a701b1eSRobert Gordon 		XDR_DESTROY(call_xdrp);
11677c478bd9Sstevel@tonic-gate 
1168f837ee4aSSiddheshwar Mahesh 	if (rndup.rb_private) {
1169f837ee4aSSiddheshwar Mahesh 		rdma_buf_free(conn, &rndup);
1170f837ee4aSSiddheshwar Mahesh 	}
1171f837ee4aSSiddheshwar Mahesh 
11720a701b1eSRobert Gordon 	if (reply_xdrp) {
11730a701b1eSRobert Gordon 		(void) xdr_rpc_free_verifier(reply_xdrp, &reply_msg);
11740a701b1eSRobert Gordon 		XDR_DESTROY(reply_xdrp);
11757c478bd9Sstevel@tonic-gate 	}
11767c478bd9Sstevel@tonic-gate 
11770a701b1eSRobert Gordon 	if (cl_rdma_reply) {
11780a701b1eSRobert Gordon 		clist_free(cl_rdma_reply);
11797c478bd9Sstevel@tonic-gate 	}
11807c478bd9Sstevel@tonic-gate 
11810a701b1eSRobert Gordon 	if (cl_recvlist) {
11820a701b1eSRobert Gordon 		rdma_buf_t	recvmsg = {0};
11830a701b1eSRobert Gordon 		recvmsg.addr = (caddr_t)(uintptr_t)cl_recvlist->w.c_saddr3;
11847c478bd9Sstevel@tonic-gate 		recvmsg.type = RECV_BUFFER;
11857c478bd9Sstevel@tonic-gate 		RDMA_BUF_FREE(conn, &recvmsg);
11860a701b1eSRobert Gordon 		clist_free(cl_recvlist);
11877c478bd9Sstevel@tonic-gate 	}
11880a701b1eSRobert Gordon 
11897c478bd9Sstevel@tonic-gate 	RDMA_REL_CONN(conn);
11900a701b1eSRobert Gordon 
11910a701b1eSRobert Gordon 	if (try_call_again)
11920a701b1eSRobert Gordon 		goto call_again;
11930a701b1eSRobert Gordon 
11947c478bd9Sstevel@tonic-gate 	if (p->cku_err.re_status != RPC_SUCCESS) {
11957c478bd9Sstevel@tonic-gate 		RCSTAT_INCR(rcbadcalls);
11967c478bd9Sstevel@tonic-gate 	}
11977c478bd9Sstevel@tonic-gate 	return (p->cku_err.re_status);
11987c478bd9Sstevel@tonic-gate }
11997c478bd9Sstevel@tonic-gate 
12000a701b1eSRobert Gordon 
12010a701b1eSRobert Gordon static void
clnt_decode_long_reply(CONN * conn,struct clist * cl_long_reply,struct clist * cl_rdma_reply,XDR * xdrs,XDR ** rxdrp,struct clist * cl,struct clist * cl_recvlist,uint_t op,uint_t off)12020a701b1eSRobert Gordon clnt_decode_long_reply(CONN *conn,
12030a701b1eSRobert Gordon     struct clist *cl_long_reply,
12040a701b1eSRobert Gordon     struct clist *cl_rdma_reply, XDR *xdrs,
12050a701b1eSRobert Gordon     XDR **rxdrp, struct clist *cl,
12060a701b1eSRobert Gordon     struct clist *cl_recvlist,
12070a701b1eSRobert Gordon     uint_t  op, uint_t off)
12080a701b1eSRobert Gordon {
12090a701b1eSRobert Gordon 	if (op != RDMA_NOMSG) {
12100a701b1eSRobert Gordon 		DTRACE_PROBE1(krpc__i__longrepl__rdmamsg__len,
12110a701b1eSRobert Gordon 		    int, cl_recvlist->c_len - off);
12120a701b1eSRobert Gordon 		xdrrdma_create(xdrs,
12130a701b1eSRobert Gordon 		    (caddr_t)(uintptr_t)(cl_recvlist->w.c_saddr3 + off),
12140a701b1eSRobert Gordon 		    cl_recvlist->c_len - off, 0, cl, XDR_DECODE, conn);
12150a701b1eSRobert Gordon 		*rxdrp = xdrs;
12160a701b1eSRobert Gordon 		return;
12170a701b1eSRobert Gordon 	}
12180a701b1eSRobert Gordon 
12190a701b1eSRobert Gordon 	/* op must be RDMA_NOMSG */
12200a701b1eSRobert Gordon 	if (cl) {
12210a701b1eSRobert Gordon 		DTRACE_PROBE(krpc__e__clntrdma__declongreply__serverreadlist);
12220a701b1eSRobert Gordon 		return;
12230a701b1eSRobert Gordon 	}
12240a701b1eSRobert Gordon 
12250a701b1eSRobert Gordon 	if (cl_long_reply->u.c_daddr) {
12260a701b1eSRobert Gordon 		DTRACE_PROBE1(krpc__i__longrepl__rdmanomsg__len,
12270a701b1eSRobert Gordon 		    int, cl_rdma_reply->c_len);
12280a701b1eSRobert Gordon 
12290a701b1eSRobert Gordon 		xdrrdma_create(xdrs, (caddr_t)cl_long_reply->u.c_daddr3,
12300a701b1eSRobert Gordon 		    cl_rdma_reply->c_len, 0, NULL, XDR_DECODE, conn);
12310a701b1eSRobert Gordon 
12320a701b1eSRobert Gordon 		*rxdrp = xdrs;
12330a701b1eSRobert Gordon 	}
12340a701b1eSRobert Gordon }
12350a701b1eSRobert Gordon 
12360a701b1eSRobert Gordon static void
clnt_return_credit(CONN * conn)12370a701b1eSRobert Gordon clnt_return_credit(CONN *conn)
12380a701b1eSRobert Gordon {
12390a701b1eSRobert Gordon 	rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
12400a701b1eSRobert Gordon 
12410a701b1eSRobert Gordon 	mutex_enter(&conn->c_lock);
12420a701b1eSRobert Gordon 	cc_info->clnt_cc_in_flight_ops--;
12430a701b1eSRobert Gordon 	cv_signal(&cc_info->clnt_cc_cv);
12440a701b1eSRobert Gordon 	mutex_exit(&conn->c_lock);
12450a701b1eSRobert Gordon }
12460a701b1eSRobert Gordon 
12470a701b1eSRobert Gordon static void
clnt_update_credit(CONN * conn,uint32_t rdma_credit)12480a701b1eSRobert Gordon clnt_update_credit(CONN *conn, uint32_t rdma_credit)
12490a701b1eSRobert Gordon {
12500a701b1eSRobert Gordon 	rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
12510a701b1eSRobert Gordon 
12520a701b1eSRobert Gordon 	/*
12530a701b1eSRobert Gordon 	 * If the granted has not altered, avoid taking the
12540a701b1eSRobert Gordon 	 * mutex, to essentially do nothing..
12550a701b1eSRobert Gordon 	 */
12560a701b1eSRobert Gordon 	if (cc_info->clnt_cc_granted_ops == rdma_credit)
12570a701b1eSRobert Gordon 		return;
12580a701b1eSRobert Gordon 	/*
12590a701b1eSRobert Gordon 	 * Get the granted number of buffers for credit control.
12600a701b1eSRobert Gordon 	 */
12610a701b1eSRobert Gordon 	mutex_enter(&conn->c_lock);
12620a701b1eSRobert Gordon 	cc_info->clnt_cc_granted_ops = rdma_credit;
12630a701b1eSRobert Gordon 	mutex_exit(&conn->c_lock);
12640a701b1eSRobert Gordon }
12650a701b1eSRobert Gordon 
12660a701b1eSRobert Gordon static void
clnt_check_credit(CONN * conn)12670a701b1eSRobert Gordon clnt_check_credit(CONN *conn)
12680a701b1eSRobert Gordon {
12690a701b1eSRobert Gordon 	rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
12700a701b1eSRobert Gordon 
12710a701b1eSRobert Gordon 	/*
12720a701b1eSRobert Gordon 	 * Make sure we are not going over our allowed buffer use
12730a701b1eSRobert Gordon 	 * (and make sure we have gotten a granted value before).
12740a701b1eSRobert Gordon 	 */
12750a701b1eSRobert Gordon 	mutex_enter(&conn->c_lock);
12760a701b1eSRobert Gordon 	while (cc_info->clnt_cc_in_flight_ops >= cc_info->clnt_cc_granted_ops &&
12770a701b1eSRobert Gordon 	    cc_info->clnt_cc_granted_ops != 0) {
12780a701b1eSRobert Gordon 		/*
12790a701b1eSRobert Gordon 		 * Client has maxed out its granted buffers due to
12800a701b1eSRobert Gordon 		 * credit control.  Current handling is to block and wait.
12810a701b1eSRobert Gordon 		 */
12820a701b1eSRobert Gordon 		cv_wait(&cc_info->clnt_cc_cv, &conn->c_lock);
12830a701b1eSRobert Gordon 	}
12840a701b1eSRobert Gordon 	cc_info->clnt_cc_in_flight_ops++;
12850a701b1eSRobert Gordon 	mutex_exit(&conn->c_lock);
12860a701b1eSRobert Gordon }
12870a701b1eSRobert Gordon 
12887c478bd9Sstevel@tonic-gate /* ARGSUSED */
12897c478bd9Sstevel@tonic-gate static void
clnt_rdma_kabort(CLIENT * h)12907c478bd9Sstevel@tonic-gate clnt_rdma_kabort(CLIENT *h)
12917c478bd9Sstevel@tonic-gate {
12927c478bd9Sstevel@tonic-gate }
12937c478bd9Sstevel@tonic-gate 
12947c478bd9Sstevel@tonic-gate static void
clnt_rdma_kerror(CLIENT * h,struct rpc_err * err)12957c478bd9Sstevel@tonic-gate clnt_rdma_kerror(CLIENT *h, struct rpc_err *err)
12967c478bd9Sstevel@tonic-gate {
12977c478bd9Sstevel@tonic-gate 	struct cku_private *p = htop(h);
12987c478bd9Sstevel@tonic-gate 	*err = p->cku_err;
12997c478bd9Sstevel@tonic-gate }
13007c478bd9Sstevel@tonic-gate 
13017c478bd9Sstevel@tonic-gate static bool_t
clnt_rdma_kfreeres(CLIENT * h,xdrproc_t xdr_res,caddr_t res_ptr)13027c478bd9Sstevel@tonic-gate clnt_rdma_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr)
13037c478bd9Sstevel@tonic-gate {
13047c478bd9Sstevel@tonic-gate 	struct cku_private *p = htop(h);
13057c478bd9Sstevel@tonic-gate 	XDR *xdrs;
13067c478bd9Sstevel@tonic-gate 
13077c478bd9Sstevel@tonic-gate 	xdrs = &(p->cku_outxdr);
13087c478bd9Sstevel@tonic-gate 	xdrs->x_op = XDR_FREE;
13097c478bd9Sstevel@tonic-gate 	return ((*xdr_res)(xdrs, res_ptr));
13107c478bd9Sstevel@tonic-gate }
13117c478bd9Sstevel@tonic-gate 
13127c478bd9Sstevel@tonic-gate /* ARGSUSED */
13137c478bd9Sstevel@tonic-gate static bool_t
clnt_rdma_kcontrol(CLIENT * h,int cmd,char * arg)13147c478bd9Sstevel@tonic-gate clnt_rdma_kcontrol(CLIENT *h, int cmd, char *arg)
13157c478bd9Sstevel@tonic-gate {
13167c478bd9Sstevel@tonic-gate 	return (TRUE);
13177c478bd9Sstevel@tonic-gate }
13187c478bd9Sstevel@tonic-gate 
13197c478bd9Sstevel@tonic-gate /* ARGSUSED */
13207c478bd9Sstevel@tonic-gate static int
clnt_rdma_ksettimers(CLIENT * h,struct rpc_timers * t,struct rpc_timers * all,int minimum,void (* feedback)(int,int,caddr_t),caddr_t arg,uint32_t xid)13217c478bd9Sstevel@tonic-gate clnt_rdma_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all,
13227c478bd9Sstevel@tonic-gate 	int minimum, void(*feedback)(int, int, caddr_t), caddr_t arg,
13237c478bd9Sstevel@tonic-gate 	uint32_t xid)
13247c478bd9Sstevel@tonic-gate {
13257c478bd9Sstevel@tonic-gate 	RCSTAT_INCR(rctimers);
13267c478bd9Sstevel@tonic-gate 	return (0);
13277c478bd9Sstevel@tonic-gate }
13287c478bd9Sstevel@tonic-gate 
13297c478bd9Sstevel@tonic-gate int
rdma_reachable(int addr_type,struct netbuf * addr,struct knetconfig ** knconf)13307c478bd9Sstevel@tonic-gate rdma_reachable(int addr_type, struct netbuf *addr, struct knetconfig **knconf)
13317c478bd9Sstevel@tonic-gate {
13327c478bd9Sstevel@tonic-gate 	rdma_registry_t	*rp;
13337c478bd9Sstevel@tonic-gate 	void *handle = NULL;
13347c478bd9Sstevel@tonic-gate 	struct knetconfig *knc;
13357c478bd9Sstevel@tonic-gate 	char *pf, *p;
13367c478bd9Sstevel@tonic-gate 	rdma_stat status;
13377c478bd9Sstevel@tonic-gate 	int error = 0;
13387c478bd9Sstevel@tonic-gate 
13397c478bd9Sstevel@tonic-gate 	if (!INGLOBALZONE(curproc))
13407c478bd9Sstevel@tonic-gate 		return (-1);
13410a701b1eSRobert Gordon 
13427c478bd9Sstevel@tonic-gate 	/*
13437c478bd9Sstevel@tonic-gate 	 * modload the RDMA plugins if not already done.
13447c478bd9Sstevel@tonic-gate 	 */
13457c478bd9Sstevel@tonic-gate 	if (!rdma_modloaded) {
13467c478bd9Sstevel@tonic-gate 		mutex_enter(&rdma_modload_lock);
13477c478bd9Sstevel@tonic-gate 		if (!rdma_modloaded) {
13487c478bd9Sstevel@tonic-gate 			error = rdma_modload();
13497c478bd9Sstevel@tonic-gate 		}
13507c478bd9Sstevel@tonic-gate 		mutex_exit(&rdma_modload_lock);
13517c478bd9Sstevel@tonic-gate 		if (error)
13527c478bd9Sstevel@tonic-gate 			return (-1);
13537c478bd9Sstevel@tonic-gate 	}
13547c478bd9Sstevel@tonic-gate 
13557c478bd9Sstevel@tonic-gate 	if (!rdma_dev_available)
13567c478bd9Sstevel@tonic-gate 		return (-1);
13577c478bd9Sstevel@tonic-gate 
13587c478bd9Sstevel@tonic-gate 	rw_enter(&rdma_lock, RW_READER);
13597c478bd9Sstevel@tonic-gate 	rp = rdma_mod_head;
13607c478bd9Sstevel@tonic-gate 	while (rp != NULL) {
136151f34d4bSRajkumar Sivaprakasam 		if (rp->r_mod_state == RDMA_MOD_INACTIVE) {
136251f34d4bSRajkumar Sivaprakasam 			rp = rp->r_next;
136351f34d4bSRajkumar Sivaprakasam 			continue;
136451f34d4bSRajkumar Sivaprakasam 		}
13657c478bd9Sstevel@tonic-gate 		status = RDMA_REACHABLE(rp->r_mod->rdma_ops, addr_type, addr,
13667c478bd9Sstevel@tonic-gate 		    &handle);
13677c478bd9Sstevel@tonic-gate 		if (status == RDMA_SUCCESS) {
13687c478bd9Sstevel@tonic-gate 			knc = kmem_zalloc(sizeof (struct knetconfig),
13690a701b1eSRobert Gordon 			    KM_SLEEP);
13707c478bd9Sstevel@tonic-gate 			knc->knc_semantics = NC_TPI_RDMA;
13717c478bd9Sstevel@tonic-gate 			pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
13727c478bd9Sstevel@tonic-gate 			p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
13737c478bd9Sstevel@tonic-gate 			if (addr_type == AF_INET)
13747c478bd9Sstevel@tonic-gate 				(void) strncpy(pf, NC_INET, KNC_STRSIZE);
13757c478bd9Sstevel@tonic-gate 			else if (addr_type == AF_INET6)
13767c478bd9Sstevel@tonic-gate 				(void) strncpy(pf, NC_INET6, KNC_STRSIZE);
13777c478bd9Sstevel@tonic-gate 			pf[KNC_STRSIZE - 1] = '\0';
13787c478bd9Sstevel@tonic-gate 
13797c478bd9Sstevel@tonic-gate 			(void) strncpy(p, rp->r_mod->rdma_api, KNC_STRSIZE);
13807c478bd9Sstevel@tonic-gate 			p[KNC_STRSIZE - 1] = '\0';
13817c478bd9Sstevel@tonic-gate 
13827c478bd9Sstevel@tonic-gate 			knc->knc_protofmly = pf;
13837c478bd9Sstevel@tonic-gate 			knc->knc_proto = p;
138451f34d4bSRajkumar Sivaprakasam 			knc->knc_rdev = (dev_t)rp;
13857c478bd9Sstevel@tonic-gate 			*knconf = knc;
13867c478bd9Sstevel@tonic-gate 			rw_exit(&rdma_lock);
13877c478bd9Sstevel@tonic-gate 			return (0);
13887c478bd9Sstevel@tonic-gate 		}
13897c478bd9Sstevel@tonic-gate 		rp = rp->r_next;
13907c478bd9Sstevel@tonic-gate 	}
13917c478bd9Sstevel@tonic-gate 	rw_exit(&rdma_lock);
13927c478bd9Sstevel@tonic-gate 	return (-1);
13937c478bd9Sstevel@tonic-gate }
1394