xref: /illumos-gate/usr/src/uts/common/rpc/xdr_rdma.c (revision 6a634c9d)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50a701b1eSRobert Gordon  * Common Development and Distribution License (the "License").
60a701b1eSRobert Gordon  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
220a4b0810SKaren Rochford  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
237c478bd9Sstevel@tonic-gate  */
247c478bd9Sstevel@tonic-gate 
250a701b1eSRobert Gordon /*
260a701b1eSRobert Gordon  * Copyright (c) 2007, The Ohio State University. All rights reserved.
270a701b1eSRobert Gordon  *
280a701b1eSRobert Gordon  * Portions of this source code is developed by the team members of
290a701b1eSRobert Gordon  * The Ohio State University's Network-Based Computing Laboratory (NBCL),
300a701b1eSRobert Gordon  * headed by Professor Dhabaleswar K. (DK) Panda.
310a701b1eSRobert Gordon  *
320a701b1eSRobert Gordon  * Acknowledgements to contributions from developors:
330a701b1eSRobert Gordon  *   Ranjit Noronha: noronha@cse.ohio-state.edu
340a701b1eSRobert Gordon  *   Lei Chai      : chail@cse.ohio-state.edu
350a701b1eSRobert Gordon  *   Weikuan Yu    : yuw@cse.ohio-state.edu
360a701b1eSRobert Gordon  *
370a701b1eSRobert Gordon  */
387c478bd9Sstevel@tonic-gate 
397c478bd9Sstevel@tonic-gate /*
407c478bd9Sstevel@tonic-gate  * xdr_rdma.c, XDR implementation using RDMA to move large chunks
417c478bd9Sstevel@tonic-gate  */
427c478bd9Sstevel@tonic-gate 
437c478bd9Sstevel@tonic-gate #include <sys/param.h>
447c478bd9Sstevel@tonic-gate #include <sys/types.h>
457c478bd9Sstevel@tonic-gate #include <sys/systm.h>
467c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
470a701b1eSRobert Gordon #include <sys/sdt.h>
480a701b1eSRobert Gordon #include <sys/debug.h>
497c478bd9Sstevel@tonic-gate 
507c478bd9Sstevel@tonic-gate #include <rpc/types.h>
517c478bd9Sstevel@tonic-gate #include <rpc/xdr.h>
527c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
537c478bd9Sstevel@tonic-gate #include <rpc/rpc_sztypes.h>
547c478bd9Sstevel@tonic-gate #include <rpc/rpc_rdma.h>
550a701b1eSRobert Gordon #include <sys/sysmacros.h>
560a701b1eSRobert Gordon 
570a4b0810SKaren Rochford /*
580a4b0810SKaren Rochford  * RCP header and xdr encoding overhead.  The number was determined by
590a4b0810SKaren Rochford  * tracing the msglen in svc_rdma_ksend for sec=sys,krb5,krb5i and krb5p.
600a4b0810SKaren Rochford  * If the XDR_RDMA_BUF_OVERHEAD is not large enough the result is the trigger
610a4b0810SKaren Rochford  * of the dtrace probe on the server "krpc-e-svcrdma-ksend-noreplycl" from
620a4b0810SKaren Rochford  * svc_rdma_ksend.
630a4b0810SKaren Rochford  */
640a4b0810SKaren Rochford #define	XDR_RDMA_BUF_OVERHEAD	300
650a4b0810SKaren Rochford 
660a701b1eSRobert Gordon static bool_t   xdrrdma_getint32(XDR *, int32_t *);
670a701b1eSRobert Gordon static bool_t   xdrrdma_putint32(XDR *, int32_t *);
680a701b1eSRobert Gordon static bool_t   xdrrdma_getbytes(XDR *, caddr_t, int);
690a701b1eSRobert Gordon static bool_t   xdrrdma_putbytes(XDR *, caddr_t, int);
700a701b1eSRobert Gordon uint_t		xdrrdma_getpos(XDR *);
710a701b1eSRobert Gordon bool_t		xdrrdma_setpos(XDR *, uint_t);
720a701b1eSRobert Gordon static rpc_inline_t *xdrrdma_inline(XDR *, int);
730a701b1eSRobert Gordon void		xdrrdma_destroy(XDR *);
740a701b1eSRobert Gordon static bool_t   xdrrdma_control(XDR *, int, void *);
75f837ee4aSSiddheshwar Mahesh static bool_t  xdrrdma_read_a_chunk(XDR *, CONN **);
76f837ee4aSSiddheshwar Mahesh static void xdrrdma_free_xdr_chunks(CONN *, struct clist *);
770a701b1eSRobert Gordon 
780a701b1eSRobert Gordon struct xdr_ops  xdrrdmablk_ops = {
790a701b1eSRobert Gordon 	xdrrdma_getbytes,
800a701b1eSRobert Gordon 	xdrrdma_putbytes,
810a701b1eSRobert Gordon 	xdrrdma_getpos,
820a701b1eSRobert Gordon 	xdrrdma_setpos,
830a701b1eSRobert Gordon 	xdrrdma_inline,
840a701b1eSRobert Gordon 	xdrrdma_destroy,
850a701b1eSRobert Gordon 	xdrrdma_control,
860a701b1eSRobert Gordon 	xdrrdma_getint32,
870a701b1eSRobert Gordon 	xdrrdma_putint32
880a701b1eSRobert Gordon };
897c478bd9Sstevel@tonic-gate 
900a701b1eSRobert Gordon struct xdr_ops  xdrrdma_ops = {
910a701b1eSRobert Gordon 	xdrrdma_getbytes,
920a701b1eSRobert Gordon 	xdrrdma_putbytes,
930a701b1eSRobert Gordon 	xdrrdma_getpos,
940a701b1eSRobert Gordon 	xdrrdma_setpos,
950a701b1eSRobert Gordon 	xdrrdma_inline,
960a701b1eSRobert Gordon 	xdrrdma_destroy,
970a701b1eSRobert Gordon 	xdrrdma_control,
980a701b1eSRobert Gordon 	xdrrdma_getint32,
990a701b1eSRobert Gordon 	xdrrdma_putint32
1000a701b1eSRobert Gordon };
1017c478bd9Sstevel@tonic-gate 
1027c478bd9Sstevel@tonic-gate /*
1030a701b1eSRobert Gordon  * A chunk list entry identifies a chunk of opaque data to be moved
1040a701b1eSRobert Gordon  * separately from the rest of the RPC message. xp_min_chunk = 0, is a
1050a701b1eSRobert Gordon  * special case for ENCODING, which means do not chunk the incoming stream of
1060a701b1eSRobert Gordon  * data.
107f837ee4aSSiddheshwar Mahesh  *
108f837ee4aSSiddheshwar Mahesh  * A read chunk can contain part of the RPC message in addition to the
109f837ee4aSSiddheshwar Mahesh  * inline message. In such a case, (xp_offp - x_base) will not provide
110f837ee4aSSiddheshwar Mahesh  * the correct xdr offset of the entire message. xp_off is used in such
111f837ee4aSSiddheshwar Mahesh  * a case to denote the offset or current position in the overall message
112f837ee4aSSiddheshwar Mahesh  * covering both the inline and the chunk. This is used only in the case
113f837ee4aSSiddheshwar Mahesh  * of decoding and useful to compare read chunk 'c_xdroff' offsets.
114f837ee4aSSiddheshwar Mahesh  *
115f837ee4aSSiddheshwar Mahesh  * An example for a read chunk containing an XDR message:
116f837ee4aSSiddheshwar Mahesh  * An NFSv4 compound as following:
117f837ee4aSSiddheshwar Mahesh  *
118f837ee4aSSiddheshwar Mahesh  * PUTFH
119f837ee4aSSiddheshwar Mahesh  * WRITE [4109 bytes]
120f837ee4aSSiddheshwar Mahesh  * GETATTR
121f837ee4aSSiddheshwar Mahesh  *
122f837ee4aSSiddheshwar Mahesh  * Solaris Encoding is:
123f837ee4aSSiddheshwar Mahesh  * -------------------
124f837ee4aSSiddheshwar Mahesh  *
125f837ee4aSSiddheshwar Mahesh  * <Inline message>: [PUTFH WRITE4args GETATTR]
126f837ee4aSSiddheshwar Mahesh  *                                   |
127f837ee4aSSiddheshwar Mahesh  *                                   v
128f837ee4aSSiddheshwar Mahesh  * [RDMA_READ chunks]:               [write data]
129f837ee4aSSiddheshwar Mahesh  *
130f837ee4aSSiddheshwar Mahesh  *
131f837ee4aSSiddheshwar Mahesh  * Linux encoding is:
132f837ee4aSSiddheshwar Mahesh  * -----------------
133f837ee4aSSiddheshwar Mahesh  *
134f837ee4aSSiddheshwar Mahesh  * <Inline message>: [PUTFH WRITE4args]
135f837ee4aSSiddheshwar Mahesh  *                                    |
136f837ee4aSSiddheshwar Mahesh  *                                    v
137f837ee4aSSiddheshwar Mahesh  * [RDMA_READ chunks]:                [Write data] [Write data2] [Getattr chunk]
138f837ee4aSSiddheshwar Mahesh  *                                     chunk1       chunk2         chunk3
139f837ee4aSSiddheshwar Mahesh  *
140f837ee4aSSiddheshwar Mahesh  * where the READ chunks are as:
141f837ee4aSSiddheshwar Mahesh  *
142f837ee4aSSiddheshwar Mahesh  *             - chunk1 - 4k
143f837ee4aSSiddheshwar Mahesh  * write data |
144f837ee4aSSiddheshwar Mahesh  *             - chunk2 - 13 bytes(4109 - 4k)
145f837ee4aSSiddheshwar Mahesh  * getattr op  - chunk3 - 19 bytes
146f837ee4aSSiddheshwar Mahesh  * (getattr op starts at byte 4 after 3 bytes of roundup)
147f837ee4aSSiddheshwar Mahesh  *
1487c478bd9Sstevel@tonic-gate  */
1497c478bd9Sstevel@tonic-gate 
1500a701b1eSRobert Gordon typedef struct {
1517c478bd9Sstevel@tonic-gate 	caddr_t		xp_offp;
1527c478bd9Sstevel@tonic-gate 	int		xp_min_chunk;
1537c478bd9Sstevel@tonic-gate 	uint_t		xp_flags;	/* Controls setting for rdma xdr */
1540a701b1eSRobert Gordon 	int		xp_buf_size;	/* size of xdr buffer */
155f837ee4aSSiddheshwar Mahesh 	int		xp_off;		/* overall offset */
156f837ee4aSSiddheshwar Mahesh 	struct clist	*xp_rcl;	/* head of chunk list */
1570a701b1eSRobert Gordon 	struct clist	**xp_rcl_next;	/* location to place/find next chunk */
158f837ee4aSSiddheshwar Mahesh 	struct clist	*xp_rcl_xdr;	/* copy of rcl containing RPC message */
1590a701b1eSRobert Gordon 	struct clist	*xp_wcl;	/* head of write chunk list */
1607c478bd9Sstevel@tonic-gate 	CONN		*xp_conn;	/* connection for chunk data xfer */
1610a701b1eSRobert Gordon 	uint_t		xp_reply_chunk_len;
1620a701b1eSRobert Gordon 	/* used to track length for security modes: integrity/privacy */
1630a701b1eSRobert Gordon 	uint_t		xp_reply_chunk_len_alt;
1640a701b1eSRobert Gordon } xrdma_private_t;
1650a701b1eSRobert Gordon 
1660a701b1eSRobert Gordon extern kmem_cache_t *clist_cache;
1670a701b1eSRobert Gordon 
1680a701b1eSRobert Gordon bool_t
xdrrdma_getrdmablk(XDR * xdrs,struct clist ** rlist,uint_t * sizep,CONN ** conn,const uint_t maxsize)1690a701b1eSRobert Gordon xdrrdma_getrdmablk(XDR *xdrs, struct clist **rlist, uint_t *sizep,
1700a701b1eSRobert Gordon     CONN **conn, const uint_t maxsize)
1710a701b1eSRobert Gordon {
1720a701b1eSRobert Gordon 	xrdma_private_t	*xdrp = (xrdma_private_t *)(xdrs->x_private);
1730a701b1eSRobert Gordon 	struct clist	*cle = *(xdrp->xp_rcl_next);
1740a701b1eSRobert Gordon 	struct clist	*rdclist = NULL, *prev = NULL;
1750a701b1eSRobert Gordon 	bool_t		retval = TRUE;
1760a701b1eSRobert Gordon 	uint32_t	cur_offset = 0;
1770a701b1eSRobert Gordon 	uint32_t	total_segments = 0;
1780a701b1eSRobert Gordon 	uint32_t	actual_segments = 0;
1790a701b1eSRobert Gordon 	uint32_t	alen;
1800a701b1eSRobert Gordon 	uint_t		total_len;
1810a701b1eSRobert Gordon 
1820a701b1eSRobert Gordon 	ASSERT(xdrs->x_op != XDR_FREE);
1830a701b1eSRobert Gordon 
1840a701b1eSRobert Gordon 	/*
1850a701b1eSRobert Gordon 	 * first deal with the length since xdr bytes are counted
1860a701b1eSRobert Gordon 	 */
1870a701b1eSRobert Gordon 	if (!xdr_u_int(xdrs, sizep)) {
1880a701b1eSRobert Gordon 		DTRACE_PROBE(xdr__e__getrdmablk_sizep_fail);
1890a701b1eSRobert Gordon 		return (FALSE);
1900a701b1eSRobert Gordon 	}
1910a701b1eSRobert Gordon 	total_len = *sizep;
1920a701b1eSRobert Gordon 	if (total_len > maxsize) {
1930a701b1eSRobert Gordon 		DTRACE_PROBE2(xdr__e__getrdmablk_bad_size,
1940a701b1eSRobert Gordon 		    int, total_len, int, maxsize);
1950a701b1eSRobert Gordon 		return (FALSE);
1960a701b1eSRobert Gordon 	}
1970a701b1eSRobert Gordon 	(*conn) = xdrp->xp_conn;
1980a701b1eSRobert Gordon 
1990a701b1eSRobert Gordon 	/*
2000a701b1eSRobert Gordon 	 * if no data we are done
2010a701b1eSRobert Gordon 	 */
2020a701b1eSRobert Gordon 	if (total_len == 0)
2030a701b1eSRobert Gordon 		return (TRUE);
2040a701b1eSRobert Gordon 
2050a701b1eSRobert Gordon 	while (cle) {
2060a701b1eSRobert Gordon 		total_segments++;
2070a701b1eSRobert Gordon 		cle = cle->c_next;
2080a701b1eSRobert Gordon 	}
2090a701b1eSRobert Gordon 
2100a701b1eSRobert Gordon 	cle = *(xdrp->xp_rcl_next);
2110a701b1eSRobert Gordon 
2120a701b1eSRobert Gordon 	/*
2130a701b1eSRobert Gordon 	 * If there was a chunk at the current offset, then setup a read
2140a701b1eSRobert Gordon 	 * chunk list which records the destination address and length
2150a701b1eSRobert Gordon 	 * and will RDMA READ the data in later.
2160a701b1eSRobert Gordon 	 */
2170a701b1eSRobert Gordon 	if (cle == NULL)
2180a701b1eSRobert Gordon 		return (FALSE);
2190a701b1eSRobert Gordon 
2200a701b1eSRobert Gordon 	if (cle->c_xdroff != (xdrp->xp_offp - xdrs->x_base))
2210a701b1eSRobert Gordon 		return (FALSE);
2220a701b1eSRobert Gordon 
2230a701b1eSRobert Gordon 	/*
2240a701b1eSRobert Gordon 	 * Setup the chunk list with appropriate
2250a701b1eSRobert Gordon 	 * address (offset) and length
2260a701b1eSRobert Gordon 	 */
2270a701b1eSRobert Gordon 	for (actual_segments = 0;
2280a701b1eSRobert Gordon 	    actual_segments < total_segments; actual_segments++) {
229f837ee4aSSiddheshwar Mahesh 
230f837ee4aSSiddheshwar Mahesh 		DTRACE_PROBE3(krpc__i__xdrrdma_getrdmablk, uint32_t, cle->c_len,
231f837ee4aSSiddheshwar Mahesh 		    uint32_t, total_len, uint32_t, cle->c_xdroff);
232f837ee4aSSiddheshwar Mahesh 
2330a701b1eSRobert Gordon 		if (total_len <= 0)
2340a701b1eSRobert Gordon 			break;
235f837ee4aSSiddheshwar Mahesh 
236f837ee4aSSiddheshwar Mahesh 		/*
237f837ee4aSSiddheshwar Mahesh 		 * not the first time in the loop
238f837ee4aSSiddheshwar Mahesh 		 */
239f837ee4aSSiddheshwar Mahesh 		if (actual_segments > 0)
240f837ee4aSSiddheshwar Mahesh 			cle = cle->c_next;
241f837ee4aSSiddheshwar Mahesh 
2420a701b1eSRobert Gordon 		cle->u.c_daddr = (uint64) cur_offset;
2430a701b1eSRobert Gordon 		alen = 0;
2440a701b1eSRobert Gordon 		if (cle->c_len > total_len) {
2450a701b1eSRobert Gordon 			alen = cle->c_len;
2460a701b1eSRobert Gordon 			cle->c_len = total_len;
2470a701b1eSRobert Gordon 		}
2480a701b1eSRobert Gordon 		if (!alen)
2490a701b1eSRobert Gordon 			xdrp->xp_rcl_next = &cle->c_next;
2500a701b1eSRobert Gordon 
2510a701b1eSRobert Gordon 		cur_offset += cle->c_len;
2520a701b1eSRobert Gordon 		total_len -= cle->c_len;
2530a701b1eSRobert Gordon 
2540a701b1eSRobert Gordon 		if ((total_segments - actual_segments - 1) == 0 &&
2550a701b1eSRobert Gordon 		    total_len > 0) {
2560a701b1eSRobert Gordon 			DTRACE_PROBE(krpc__e__xdrrdma_getblk_chunktooshort);
2570a701b1eSRobert Gordon 			retval = FALSE;
2580a701b1eSRobert Gordon 		}
2590a701b1eSRobert Gordon 
2600a701b1eSRobert Gordon 		if ((total_segments - actual_segments - 1) > 0 &&
2610a701b1eSRobert Gordon 		    total_len == 0) {
2620a701b1eSRobert Gordon 			DTRACE_PROBE2(krpc__e__xdrrdma_getblk_toobig,
2630a701b1eSRobert Gordon 			    int, total_segments, int, actual_segments);
2640a701b1eSRobert Gordon 		}
2650a701b1eSRobert Gordon 
2660a701b1eSRobert Gordon 		rdclist = clist_alloc();
2670a701b1eSRobert Gordon 		(*rdclist) = (*cle);
2680a701b1eSRobert Gordon 		if ((*rlist) == NULL)
2690a701b1eSRobert Gordon 			(*rlist) = rdclist;
2700a701b1eSRobert Gordon 		if (prev == NULL)
2710a701b1eSRobert Gordon 			prev = rdclist;
2720a701b1eSRobert Gordon 		else {
2730a701b1eSRobert Gordon 			prev->c_next = rdclist;
2740a701b1eSRobert Gordon 			prev = rdclist;
2750a701b1eSRobert Gordon 		}
2760a701b1eSRobert Gordon 
2770a701b1eSRobert Gordon 	}
2787c478bd9Sstevel@tonic-gate 
2790a701b1eSRobert Gordon out:
2800a701b1eSRobert Gordon 	if (prev != NULL)
2810a701b1eSRobert Gordon 		prev->c_next = NULL;
2820a701b1eSRobert Gordon 
283f837ee4aSSiddheshwar Mahesh 	/*
284f837ee4aSSiddheshwar Mahesh 	 * Adjust the chunk length, if we read only a part of
285f837ee4aSSiddheshwar Mahesh 	 * a chunk.
286f837ee4aSSiddheshwar Mahesh 	 */
287f837ee4aSSiddheshwar Mahesh 
2880a701b1eSRobert Gordon 	if (alen) {
2890a701b1eSRobert Gordon 		cle->w.c_saddr =
2900a701b1eSRobert Gordon 		    (uint64)(uintptr_t)cle->w.c_saddr + cle->c_len;
2910a701b1eSRobert Gordon 		cle->c_len = alen - cle->c_len;
2920a701b1eSRobert Gordon 	}
2930a701b1eSRobert Gordon 
2940a701b1eSRobert Gordon 	return (retval);
2950a701b1eSRobert Gordon }
2967c478bd9Sstevel@tonic-gate 
2977c478bd9Sstevel@tonic-gate /*
2980a701b1eSRobert Gordon  * The procedure xdrrdma_create initializes a stream descriptor for a memory
2990a701b1eSRobert Gordon  * buffer.
3007c478bd9Sstevel@tonic-gate  */
3017c478bd9Sstevel@tonic-gate void
xdrrdma_create(XDR * xdrs,caddr_t addr,uint_t size,int min_chunk,struct clist * cl,enum xdr_op op,CONN * conn)3027c478bd9Sstevel@tonic-gate xdrrdma_create(XDR *xdrs, caddr_t addr, uint_t size,
3030a701b1eSRobert Gordon     int min_chunk, struct clist *cl, enum xdr_op op, CONN *conn)
3047c478bd9Sstevel@tonic-gate {
3050a701b1eSRobert Gordon 	xrdma_private_t *xdrp;
3060a701b1eSRobert Gordon 	struct clist   *cle;
3077c478bd9Sstevel@tonic-gate 
3087c478bd9Sstevel@tonic-gate 	xdrs->x_op = op;
3090a701b1eSRobert Gordon 	xdrs->x_ops = &xdrrdma_ops;
3107c478bd9Sstevel@tonic-gate 	xdrs->x_base = addr;
3117c478bd9Sstevel@tonic-gate 	xdrs->x_handy = size;
3127c478bd9Sstevel@tonic-gate 	xdrs->x_public = NULL;
3137c478bd9Sstevel@tonic-gate 
3140a701b1eSRobert Gordon 	xdrp = (xrdma_private_t *)kmem_zalloc(sizeof (xrdma_private_t),
3150a701b1eSRobert Gordon 	    KM_SLEEP);
3167c478bd9Sstevel@tonic-gate 	xdrs->x_private = (caddr_t)xdrp;
3177c478bd9Sstevel@tonic-gate 	xdrp->xp_offp = addr;
3187c478bd9Sstevel@tonic-gate 	xdrp->xp_min_chunk = min_chunk;
3197c478bd9Sstevel@tonic-gate 	xdrp->xp_flags = 0;
3207c478bd9Sstevel@tonic-gate 	xdrp->xp_buf_size = size;
3210a701b1eSRobert Gordon 	xdrp->xp_rcl = cl;
3220a701b1eSRobert Gordon 	xdrp->xp_reply_chunk_len = 0;
3230a701b1eSRobert Gordon 	xdrp->xp_reply_chunk_len_alt = 0;
3240a701b1eSRobert Gordon 
3257c478bd9Sstevel@tonic-gate 	if (op == XDR_ENCODE && cl != NULL) {
3260a701b1eSRobert Gordon 		/* Find last element in chunk list and set xp_rcl_next */
3270a701b1eSRobert Gordon 		for (cle = cl; cle->c_next != NULL; cle = cle->c_next)
3280a701b1eSRobert Gordon 			continue;
3290a701b1eSRobert Gordon 
3300a701b1eSRobert Gordon 		xdrp->xp_rcl_next = &(cle->c_next);
3310a701b1eSRobert Gordon 	} else {
3320a701b1eSRobert Gordon 		xdrp->xp_rcl_next = &(xdrp->xp_rcl);
3330a701b1eSRobert Gordon 	}
3340a701b1eSRobert Gordon 
3350a701b1eSRobert Gordon 	xdrp->xp_wcl = NULL;
3360a701b1eSRobert Gordon 
3377c478bd9Sstevel@tonic-gate 	xdrp->xp_conn = conn;
3380a701b1eSRobert Gordon 	if (xdrp->xp_min_chunk != 0)
3390a701b1eSRobert Gordon 		xdrp->xp_flags |= XDR_RDMA_CHUNK;
3407c478bd9Sstevel@tonic-gate }
3417c478bd9Sstevel@tonic-gate 
3427c478bd9Sstevel@tonic-gate /* ARGSUSED */
3437c478bd9Sstevel@tonic-gate void
xdrrdma_destroy(XDR * xdrs)3440a701b1eSRobert Gordon xdrrdma_destroy(XDR * xdrs)
3457c478bd9Sstevel@tonic-gate {
3460a701b1eSRobert Gordon 	xrdma_private_t	*xdrp = (xrdma_private_t *)(xdrs->x_private);
3477c478bd9Sstevel@tonic-gate 
3480a701b1eSRobert Gordon 	if (xdrp == NULL)
3490a701b1eSRobert Gordon 		return;
3500a701b1eSRobert Gordon 
3510a701b1eSRobert Gordon 	if (xdrp->xp_wcl) {
3520a701b1eSRobert Gordon 		if (xdrp->xp_flags & XDR_RDMA_WLIST_REG) {
353f837ee4aSSiddheshwar Mahesh 			(void) clist_deregister(xdrp->xp_conn, xdrp->xp_wcl);
3540a701b1eSRobert Gordon 			rdma_buf_free(xdrp->xp_conn,
3550a701b1eSRobert Gordon 			    &xdrp->xp_wcl->rb_longbuf);
3560a701b1eSRobert Gordon 		}
3570a701b1eSRobert Gordon 		clist_free(xdrp->xp_wcl);
3580a701b1eSRobert Gordon 	}
3590a701b1eSRobert Gordon 
3600a701b1eSRobert Gordon 	if (xdrp->xp_rcl) {
3610a701b1eSRobert Gordon 		if (xdrp->xp_flags & XDR_RDMA_RLIST_REG) {
362f837ee4aSSiddheshwar Mahesh 			(void) clist_deregister(xdrp->xp_conn, xdrp->xp_rcl);
3630a701b1eSRobert Gordon 			rdma_buf_free(xdrp->xp_conn,
3640a701b1eSRobert Gordon 			    &xdrp->xp_rcl->rb_longbuf);
3650a701b1eSRobert Gordon 		}
3660a701b1eSRobert Gordon 		clist_free(xdrp->xp_rcl);
3670a701b1eSRobert Gordon 	}
3680a701b1eSRobert Gordon 
369f837ee4aSSiddheshwar Mahesh 	if (xdrp->xp_rcl_xdr)
370f837ee4aSSiddheshwar Mahesh 		xdrrdma_free_xdr_chunks(xdrp->xp_conn, xdrp->xp_rcl_xdr);
371f837ee4aSSiddheshwar Mahesh 
3720a701b1eSRobert Gordon 	(void) kmem_free(xdrs->x_private, sizeof (xrdma_private_t));
3730a701b1eSRobert Gordon 	xdrs->x_private = NULL;
3747c478bd9Sstevel@tonic-gate }
3757c478bd9Sstevel@tonic-gate 
3760a701b1eSRobert Gordon static	bool_t
xdrrdma_getint32(XDR * xdrs,int32_t * int32p)3777c478bd9Sstevel@tonic-gate xdrrdma_getint32(XDR *xdrs, int32_t *int32p)
3787c478bd9Sstevel@tonic-gate {
3790a701b1eSRobert Gordon 	xrdma_private_t	*xdrp = (xrdma_private_t *)(xdrs->x_private);
380f837ee4aSSiddheshwar Mahesh 	int chunked = 0;
3817c478bd9Sstevel@tonic-gate 
382f837ee4aSSiddheshwar Mahesh 	if ((xdrs->x_handy -= (int)sizeof (int32_t)) < 0) {
383f837ee4aSSiddheshwar Mahesh 		/*
384f837ee4aSSiddheshwar Mahesh 		 * check if rest of the rpc message is in a chunk
385f837ee4aSSiddheshwar Mahesh 		 */
386f837ee4aSSiddheshwar Mahesh 		if (!xdrrdma_read_a_chunk(xdrs, &xdrp->xp_conn)) {
387f837ee4aSSiddheshwar Mahesh 			return (FALSE);
388f837ee4aSSiddheshwar Mahesh 		}
389f837ee4aSSiddheshwar Mahesh 		chunked = 1;
390f837ee4aSSiddheshwar Mahesh 	}
3917c478bd9Sstevel@tonic-gate 
3927c478bd9Sstevel@tonic-gate 	/* LINTED pointer alignment */
3937c478bd9Sstevel@tonic-gate 	*int32p = (int32_t)ntohl((uint32_t)(*((int32_t *)(xdrp->xp_offp))));
394f837ee4aSSiddheshwar Mahesh 
395f837ee4aSSiddheshwar Mahesh 	DTRACE_PROBE1(krpc__i__xdrrdma_getint32, int32_t, *int32p);
396f837ee4aSSiddheshwar Mahesh 
3977c478bd9Sstevel@tonic-gate 	xdrp->xp_offp += sizeof (int32_t);
3987c478bd9Sstevel@tonic-gate 
399f837ee4aSSiddheshwar Mahesh 	if (chunked)
400f837ee4aSSiddheshwar Mahesh 		xdrs->x_handy -= (int)sizeof (int32_t);
401f837ee4aSSiddheshwar Mahesh 
402f837ee4aSSiddheshwar Mahesh 	if (xdrp->xp_off != 0) {
403f837ee4aSSiddheshwar Mahesh 		xdrp->xp_off += sizeof (int32_t);
404f837ee4aSSiddheshwar Mahesh 	}
405f837ee4aSSiddheshwar Mahesh 
4067c478bd9Sstevel@tonic-gate 	return (TRUE);
4077c478bd9Sstevel@tonic-gate }
4087c478bd9Sstevel@tonic-gate 
4090a701b1eSRobert Gordon static	bool_t
xdrrdma_putint32(XDR * xdrs,int32_t * int32p)4107c478bd9Sstevel@tonic-gate xdrrdma_putint32(XDR *xdrs, int32_t *int32p)
4117c478bd9Sstevel@tonic-gate {
4120a701b1eSRobert Gordon 	xrdma_private_t	*xdrp = (xrdma_private_t *)(xdrs->x_private);
4137c478bd9Sstevel@tonic-gate 
4147c478bd9Sstevel@tonic-gate 	if ((xdrs->x_handy -= (int)sizeof (int32_t)) < 0)
4157c478bd9Sstevel@tonic-gate 		return (FALSE);
4167c478bd9Sstevel@tonic-gate 
4177c478bd9Sstevel@tonic-gate 	/* LINTED pointer alignment */
4187c478bd9Sstevel@tonic-gate 	*(int32_t *)xdrp->xp_offp = (int32_t)htonl((uint32_t)(*int32p));
4197c478bd9Sstevel@tonic-gate 	xdrp->xp_offp += sizeof (int32_t);
4207c478bd9Sstevel@tonic-gate 
4217c478bd9Sstevel@tonic-gate 	return (TRUE);
4227c478bd9Sstevel@tonic-gate }
4237c478bd9Sstevel@tonic-gate 
4247c478bd9Sstevel@tonic-gate /*
4250a701b1eSRobert Gordon  * DECODE bytes from XDR stream for rdma.
4260a701b1eSRobert Gordon  * If the XDR stream contains a read chunk list,
4270a701b1eSRobert Gordon  * it will go through xdrrdma_getrdmablk instead.
4287c478bd9Sstevel@tonic-gate  */
4290a701b1eSRobert Gordon static	bool_t
xdrrdma_getbytes(XDR * xdrs,caddr_t addr,int len)4307c478bd9Sstevel@tonic-gate xdrrdma_getbytes(XDR *xdrs, caddr_t addr, int len)
4317c478bd9Sstevel@tonic-gate {
4320a701b1eSRobert Gordon 	xrdma_private_t	*xdrp = (xrdma_private_t *)(xdrs->x_private);
4330a701b1eSRobert Gordon 	struct clist	*cle = *(xdrp->xp_rcl_next);
4340a701b1eSRobert Gordon 	struct clist	*cls = *(xdrp->xp_rcl_next);
4350a701b1eSRobert Gordon 	struct clist	cl;
4360a701b1eSRobert Gordon 	bool_t		retval = TRUE;
4370a701b1eSRobert Gordon 	uint32_t	total_len = len;
4380a701b1eSRobert Gordon 	uint32_t	cur_offset = 0;
4390a701b1eSRobert Gordon 	uint32_t	total_segments = 0;
4400a701b1eSRobert Gordon 	uint32_t	actual_segments = 0;
441ed629aefSSiddheshwar Mahesh 	uint32_t	status = RDMA_SUCCESS;
442ed629aefSSiddheshwar Mahesh 	uint32_t	alen = 0;
443f837ee4aSSiddheshwar Mahesh 	uint32_t	xpoff;
4440a701b1eSRobert Gordon 
4450a701b1eSRobert Gordon 	while (cle) {
4460a701b1eSRobert Gordon 		total_segments++;
4470a701b1eSRobert Gordon 		cle = cle->c_next;
4480a701b1eSRobert Gordon 	}
4497c478bd9Sstevel@tonic-gate 
4500a701b1eSRobert Gordon 	cle = *(xdrp->xp_rcl_next);
451f837ee4aSSiddheshwar Mahesh 
452f837ee4aSSiddheshwar Mahesh 	if (xdrp->xp_off) {
453f837ee4aSSiddheshwar Mahesh 		xpoff = xdrp->xp_off;
454f837ee4aSSiddheshwar Mahesh 	} else {
455f837ee4aSSiddheshwar Mahesh 		xpoff = (xdrp->xp_offp - xdrs->x_base);
456f837ee4aSSiddheshwar Mahesh 	}
457f837ee4aSSiddheshwar Mahesh 
4587c478bd9Sstevel@tonic-gate 	/*
4590a701b1eSRobert Gordon 	 * If there was a chunk at the current offset, then setup a read
4600a701b1eSRobert Gordon 	 * chunk list which records the destination address and length
4610a701b1eSRobert Gordon 	 * and will RDMA READ the data in later.
4627c478bd9Sstevel@tonic-gate 	 */
4630a701b1eSRobert Gordon 
464f837ee4aSSiddheshwar Mahesh 	if (cle != NULL && cle->c_xdroff == xpoff) {
4650a701b1eSRobert Gordon 		for (actual_segments = 0;
4660a701b1eSRobert Gordon 		    actual_segments < total_segments; actual_segments++) {
467ed629aefSSiddheshwar Mahesh 
4680a701b1eSRobert Gordon 			if (total_len <= 0)
4690a701b1eSRobert Gordon 				break;
470ed629aefSSiddheshwar Mahesh 
471ed629aefSSiddheshwar Mahesh 			if (status != RDMA_SUCCESS)
472ed629aefSSiddheshwar Mahesh 				goto out;
473ed629aefSSiddheshwar Mahesh 
4740a701b1eSRobert Gordon 			cle->u.c_daddr = (uint64)(uintptr_t)addr + cur_offset;
4750a701b1eSRobert Gordon 			alen = 0;
4760a701b1eSRobert Gordon 			if (cle->c_len > total_len) {
4770a701b1eSRobert Gordon 				alen = cle->c_len;
4780a701b1eSRobert Gordon 				cle->c_len = total_len;
4790a701b1eSRobert Gordon 			}
4800a701b1eSRobert Gordon 			if (!alen)
4810a701b1eSRobert Gordon 				xdrp->xp_rcl_next = &cle->c_next;
4827c478bd9Sstevel@tonic-gate 
4830a701b1eSRobert Gordon 			cur_offset += cle->c_len;
4840a701b1eSRobert Gordon 			total_len -= cle->c_len;
4857c478bd9Sstevel@tonic-gate 
4860a701b1eSRobert Gordon 			if ((total_segments - actual_segments - 1) == 0 &&
4870a701b1eSRobert Gordon 			    total_len > 0) {
4880a701b1eSRobert Gordon 				DTRACE_PROBE(
4890a701b1eSRobert Gordon 				    krpc__e__xdrrdma_getbytes_chunktooshort);
4900a701b1eSRobert Gordon 				retval = FALSE;
4910a701b1eSRobert Gordon 			}
4927c478bd9Sstevel@tonic-gate 
4930a701b1eSRobert Gordon 			if ((total_segments - actual_segments - 1) > 0 &&
4940a701b1eSRobert Gordon 			    total_len == 0) {
4950a701b1eSRobert Gordon 				DTRACE_PROBE2(krpc__e__xdrrdma_getbytes_toobig,
4960a701b1eSRobert Gordon 				    int, total_segments, int, actual_segments);
4970a701b1eSRobert Gordon 			}
4980a701b1eSRobert Gordon 
4990a701b1eSRobert Gordon 			/*
5000a701b1eSRobert Gordon 			 * RDMA READ the chunk data from the remote end.
5010a701b1eSRobert Gordon 			 * First prep the destination buffer by registering
5020a701b1eSRobert Gordon 			 * it, then RDMA READ the chunk data. Since we are
5030a701b1eSRobert Gordon 			 * doing streaming memory, sync the destination
5040a701b1eSRobert Gordon 			 * buffer to CPU and deregister the buffer.
5050a701b1eSRobert Gordon 			 */
5060a701b1eSRobert Gordon 			if (xdrp->xp_conn == NULL) {
5070a701b1eSRobert Gordon 				return (FALSE);
5080a701b1eSRobert Gordon 			}
5090a701b1eSRobert Gordon 			cl = *cle;
5100a701b1eSRobert Gordon 			cl.c_next = NULL;
511ed629aefSSiddheshwar Mahesh 			status = clist_register(xdrp->xp_conn, &cl,
512ed629aefSSiddheshwar Mahesh 			    CLIST_REG_DST);
513ed629aefSSiddheshwar Mahesh 			if (status != RDMA_SUCCESS) {
514ed629aefSSiddheshwar Mahesh 				retval = FALSE;
515ed629aefSSiddheshwar Mahesh 				/*
516ed629aefSSiddheshwar Mahesh 				 * Deregister the previous chunks
517ed629aefSSiddheshwar Mahesh 				 * before return
518ed629aefSSiddheshwar Mahesh 				 */
519ed629aefSSiddheshwar Mahesh 				goto out;
5200a701b1eSRobert Gordon 			}
521ed629aefSSiddheshwar Mahesh 
5220a701b1eSRobert Gordon 			cle->c_dmemhandle = cl.c_dmemhandle;
5230a701b1eSRobert Gordon 			cle->c_dsynchandle = cl.c_dsynchandle;
5240a701b1eSRobert Gordon 
5250a701b1eSRobert Gordon 			/*
5260a701b1eSRobert Gordon 			 * Now read the chunk in
5270a701b1eSRobert Gordon 			 */
5280a701b1eSRobert Gordon 			if ((total_segments - actual_segments - 1) == 0 ||
5290a701b1eSRobert Gordon 			    total_len == 0) {
5300a701b1eSRobert Gordon 				status = RDMA_READ(xdrp->xp_conn, &cl, WAIT);
5310a701b1eSRobert Gordon 			} else {
5320a701b1eSRobert Gordon 				status = RDMA_READ(xdrp->xp_conn, &cl, NOWAIT);
5330a701b1eSRobert Gordon 			}
5340a701b1eSRobert Gordon 			if (status != RDMA_SUCCESS) {
5350a701b1eSRobert Gordon 				DTRACE_PROBE1(
5360a701b1eSRobert Gordon 				    krpc__i__xdrrdma_getblk_readfailed,
5370a701b1eSRobert Gordon 				    int, status);
5380a701b1eSRobert Gordon 				retval = FALSE;
5390a701b1eSRobert Gordon 			}
540ed629aefSSiddheshwar Mahesh 
5410a701b1eSRobert Gordon 			cle = cle->c_next;
542ed629aefSSiddheshwar Mahesh 
5437c478bd9Sstevel@tonic-gate 		}
5440a701b1eSRobert Gordon 
5457c478bd9Sstevel@tonic-gate 		/*
5467c478bd9Sstevel@tonic-gate 		 * sync the memory for cpu
5477c478bd9Sstevel@tonic-gate 		 */
5480a701b1eSRobert Gordon 		cl = *cls;
5490a701b1eSRobert Gordon 		cl.c_next = NULL;
5500a701b1eSRobert Gordon 		cl.c_len = cur_offset;
551f837ee4aSSiddheshwar Mahesh 		if (clist_syncmem(
552f837ee4aSSiddheshwar Mahesh 		    xdrp->xp_conn, &cl, CLIST_REG_DST) != RDMA_SUCCESS) {
5537c478bd9Sstevel@tonic-gate 			retval = FALSE;
5547c478bd9Sstevel@tonic-gate 		}
5557c478bd9Sstevel@tonic-gate out:
556ed629aefSSiddheshwar Mahesh 
5577c478bd9Sstevel@tonic-gate 		/*
5587c478bd9Sstevel@tonic-gate 		 * Deregister the chunks
5597c478bd9Sstevel@tonic-gate 		 */
5600a701b1eSRobert Gordon 		cle = cls;
561ed629aefSSiddheshwar Mahesh 		while (actual_segments != 0) {
562ed629aefSSiddheshwar Mahesh 			cl = *cle;
563ed629aefSSiddheshwar Mahesh 			cl.c_next = NULL;
564ed629aefSSiddheshwar Mahesh 
565ed629aefSSiddheshwar Mahesh 			cl.c_regtype = CLIST_REG_DST;
566ed629aefSSiddheshwar Mahesh 			(void) clist_deregister(xdrp->xp_conn, &cl);
567ed629aefSSiddheshwar Mahesh 
568ed629aefSSiddheshwar Mahesh 			cle = cle->c_next;
569ed629aefSSiddheshwar Mahesh 			actual_segments--;
570ed629aefSSiddheshwar Mahesh 		}
571ed629aefSSiddheshwar Mahesh 
5720a701b1eSRobert Gordon 		if (alen) {
573ed629aefSSiddheshwar Mahesh 			cle = *(xdrp->xp_rcl_next);
5740a701b1eSRobert Gordon 			cle->w.c_saddr =
5750a701b1eSRobert Gordon 			    (uint64)(uintptr_t)cle->w.c_saddr + cle->c_len;
5760a701b1eSRobert Gordon 			cle->c_len = alen - cle->c_len;
5770a701b1eSRobert Gordon 		}
578ed629aefSSiddheshwar Mahesh 
5797c478bd9Sstevel@tonic-gate 		return (retval);
5807c478bd9Sstevel@tonic-gate 	}
581f837ee4aSSiddheshwar Mahesh 
5827c478bd9Sstevel@tonic-gate 	if ((xdrs->x_handy -= len) < 0)
5837c478bd9Sstevel@tonic-gate 		return (FALSE);
5847c478bd9Sstevel@tonic-gate 
5857c478bd9Sstevel@tonic-gate 	bcopy(xdrp->xp_offp, addr, len);
586f837ee4aSSiddheshwar Mahesh 
5877c478bd9Sstevel@tonic-gate 	xdrp->xp_offp += len;
5887c478bd9Sstevel@tonic-gate 
589f837ee4aSSiddheshwar Mahesh 	if (xdrp->xp_off != 0)
590f837ee4aSSiddheshwar Mahesh 		xdrp->xp_off += len;
591f837ee4aSSiddheshwar Mahesh 
5927c478bd9Sstevel@tonic-gate 	return (TRUE);
5937c478bd9Sstevel@tonic-gate }
5947c478bd9Sstevel@tonic-gate 
5957c478bd9Sstevel@tonic-gate /*
5960a701b1eSRobert Gordon  * ENCODE some bytes into an XDR stream xp_min_chunk = 0, means the stream of
5970a701b1eSRobert Gordon  * bytes contain no chunks to seperate out, and if the bytes do not fit in
5980a701b1eSRobert Gordon  * the supplied buffer, grow the buffer and free the old buffer.
5997c478bd9Sstevel@tonic-gate  */
6000a701b1eSRobert Gordon static	bool_t
xdrrdma_putbytes(XDR * xdrs,caddr_t addr,int len)6017c478bd9Sstevel@tonic-gate xdrrdma_putbytes(XDR *xdrs, caddr_t addr, int len)
6027c478bd9Sstevel@tonic-gate {
6030a701b1eSRobert Gordon 	xrdma_private_t	*xdrp = (xrdma_private_t *)(xdrs->x_private);
6047c478bd9Sstevel@tonic-gate 	/*
6050a701b1eSRobert Gordon 	 * Is this stream accepting chunks?
6060a701b1eSRobert Gordon 	 * If so, does the either of the two following conditions exist?
6070a701b1eSRobert Gordon 	 * - length of bytes to encode is greater than the min chunk size?
6080a701b1eSRobert Gordon 	 * - remaining space in this stream is shorter than length of
6090a701b1eSRobert Gordon 	 *   bytes to encode?
6100a701b1eSRobert Gordon 	 *
6110a701b1eSRobert Gordon 	 * If the above exists, then create a chunk for this encoding
6120a701b1eSRobert Gordon 	 * and save the addresses, etc.
6137c478bd9Sstevel@tonic-gate 	 */
6140a701b1eSRobert Gordon 	if (xdrp->xp_flags & XDR_RDMA_CHUNK &&
6150a701b1eSRobert Gordon 	    ((xdrp->xp_min_chunk != 0 &&
6160a701b1eSRobert Gordon 	    len >= xdrp->xp_min_chunk) ||
6170a701b1eSRobert Gordon 	    (xdrs->x_handy - len  < 0))) {
6180a701b1eSRobert Gordon 		struct clist	*cle;
6190a701b1eSRobert Gordon 		int		offset = xdrp->xp_offp - xdrs->x_base;
6200a701b1eSRobert Gordon 
6210a701b1eSRobert Gordon 		cle = clist_alloc();
6227c478bd9Sstevel@tonic-gate 		cle->c_xdroff = offset;
6230a701b1eSRobert Gordon 		cle->c_len = len;
6240a701b1eSRobert Gordon 		cle->w.c_saddr = (uint64)(uintptr_t)addr;
6257c478bd9Sstevel@tonic-gate 		cle->c_next = NULL;
6267c478bd9Sstevel@tonic-gate 
6270a701b1eSRobert Gordon 		*(xdrp->xp_rcl_next) = cle;
6280a701b1eSRobert Gordon 		xdrp->xp_rcl_next = &(cle->c_next);
6297c478bd9Sstevel@tonic-gate 
6307c478bd9Sstevel@tonic-gate 		return (TRUE);
6317c478bd9Sstevel@tonic-gate 	}
6320a701b1eSRobert Gordon 	/* Is there enough space to encode what is left? */
6337c478bd9Sstevel@tonic-gate 	if ((xdrs->x_handy -= len) < 0) {
6340a701b1eSRobert Gordon 		return (FALSE);
6357c478bd9Sstevel@tonic-gate 	}
6367c478bd9Sstevel@tonic-gate 	bcopy(addr, xdrp->xp_offp, len);
6377c478bd9Sstevel@tonic-gate 	xdrp->xp_offp += len;
6387c478bd9Sstevel@tonic-gate 
6397c478bd9Sstevel@tonic-gate 	return (TRUE);
6407c478bd9Sstevel@tonic-gate }
6417c478bd9Sstevel@tonic-gate 
6427c478bd9Sstevel@tonic-gate uint_t
xdrrdma_getpos(XDR * xdrs)6437c478bd9Sstevel@tonic-gate xdrrdma_getpos(XDR *xdrs)
6447c478bd9Sstevel@tonic-gate {
6450a701b1eSRobert Gordon 	xrdma_private_t *xdrp = (xrdma_private_t *)(xdrs->x_private);
6467c478bd9Sstevel@tonic-gate 
6477c478bd9Sstevel@tonic-gate 	return ((uint_t)((uintptr_t)xdrp->xp_offp - (uintptr_t)xdrs->x_base));
6487c478bd9Sstevel@tonic-gate }
6497c478bd9Sstevel@tonic-gate 
6507c478bd9Sstevel@tonic-gate bool_t
xdrrdma_setpos(XDR * xdrs,uint_t pos)6517c478bd9Sstevel@tonic-gate xdrrdma_setpos(XDR *xdrs, uint_t pos)
6527c478bd9Sstevel@tonic-gate {
6530a701b1eSRobert Gordon 	xrdma_private_t	*xdrp = (xrdma_private_t *)(xdrs->x_private);
6547c478bd9Sstevel@tonic-gate 
6550a701b1eSRobert Gordon 	caddr_t		newaddr = xdrs->x_base + pos;
6560a701b1eSRobert Gordon 	caddr_t		lastaddr = xdrp->xp_offp + xdrs->x_handy;
6570a701b1eSRobert Gordon 	ptrdiff_t	diff;
6587c478bd9Sstevel@tonic-gate 
6597c478bd9Sstevel@tonic-gate 	if (newaddr > lastaddr)
6607c478bd9Sstevel@tonic-gate 		return (FALSE);
6617c478bd9Sstevel@tonic-gate 
6627c478bd9Sstevel@tonic-gate 	xdrp->xp_offp = newaddr;
6637c478bd9Sstevel@tonic-gate 	diff = lastaddr - newaddr;
6647c478bd9Sstevel@tonic-gate 	xdrs->x_handy = (int)diff;
6657c478bd9Sstevel@tonic-gate 
6667c478bd9Sstevel@tonic-gate 	return (TRUE);
6677c478bd9Sstevel@tonic-gate }
6687c478bd9Sstevel@tonic-gate 
6697c478bd9Sstevel@tonic-gate /* ARGSUSED */
6707c478bd9Sstevel@tonic-gate static rpc_inline_t *
xdrrdma_inline(XDR * xdrs,int len)6717c478bd9Sstevel@tonic-gate xdrrdma_inline(XDR *xdrs, int len)
6727c478bd9Sstevel@tonic-gate {
6730a701b1eSRobert Gordon 	rpc_inline_t	*buf = NULL;
6740a701b1eSRobert Gordon 	xrdma_private_t	*xdrp = (xrdma_private_t *)(xdrs->x_private);
6750a701b1eSRobert Gordon 	struct clist	*cle = *(xdrp->xp_rcl_next);
6767c478bd9Sstevel@tonic-gate 
6777c478bd9Sstevel@tonic-gate 	if (xdrs->x_op == XDR_DECODE) {
6787c478bd9Sstevel@tonic-gate 		/*
6790a701b1eSRobert Gordon 		 * Since chunks aren't in-line, check to see whether there is
6800a701b1eSRobert Gordon 		 * a chunk in the inline range.
6817c478bd9Sstevel@tonic-gate 		 */
6827c478bd9Sstevel@tonic-gate 		if (cle != NULL &&
6830a701b1eSRobert Gordon 		    cle->c_xdroff <= (xdrp->xp_offp - xdrs->x_base + len))
6840a701b1eSRobert Gordon 			return (NULL);
6857c478bd9Sstevel@tonic-gate 	}
6867c478bd9Sstevel@tonic-gate 
6870a701b1eSRobert Gordon 	/* LINTED pointer alignment */
6880a701b1eSRobert Gordon 	buf = (rpc_inline_t *)xdrp->xp_offp;
6890a701b1eSRobert Gordon 	if (!IS_P2ALIGNED(buf, sizeof (int32_t)))
6900a701b1eSRobert Gordon 		return (NULL);
6910a701b1eSRobert Gordon 
6927c478bd9Sstevel@tonic-gate 	if ((xdrs->x_handy < len) || (xdrp->xp_min_chunk != 0 &&
6937c478bd9Sstevel@tonic-gate 	    len >= xdrp->xp_min_chunk)) {
6947c478bd9Sstevel@tonic-gate 		return (NULL);
6957c478bd9Sstevel@tonic-gate 	} else {
6967c478bd9Sstevel@tonic-gate 		xdrs->x_handy -= len;
6977c478bd9Sstevel@tonic-gate 		xdrp->xp_offp += len;
6987c478bd9Sstevel@tonic-gate 		return (buf);
6997c478bd9Sstevel@tonic-gate 	}
7007c478bd9Sstevel@tonic-gate }
7017c478bd9Sstevel@tonic-gate 
7020a701b1eSRobert Gordon static	bool_t
xdrrdma_control(XDR * xdrs,int request,void * info)7037c478bd9Sstevel@tonic-gate xdrrdma_control(XDR *xdrs, int request, void *info)
7047c478bd9Sstevel@tonic-gate {
7050a701b1eSRobert Gordon 	int32_t		*int32p;
7060a701b1eSRobert Gordon 	int		len, i;
7070a701b1eSRobert Gordon 	uint_t		in_flags;
7080a701b1eSRobert Gordon 	xrdma_private_t	*xdrp = (xrdma_private_t *)(xdrs->x_private);
7090a701b1eSRobert Gordon 	rdma_chunkinfo_t *rcip = NULL;
7100a701b1eSRobert Gordon 	rdma_wlist_conn_info_t *rwcip = NULL;
7110a701b1eSRobert Gordon 	rdma_chunkinfo_lengths_t *rcilp = NULL;
7120a701b1eSRobert Gordon 	struct uio *uiop;
713*35bbd688SKaren Rochford 	struct clist	*rwl = NULL, *first = NULL;
7140a701b1eSRobert Gordon 	struct clist	*prev = NULL;
7157c478bd9Sstevel@tonic-gate 
7167c478bd9Sstevel@tonic-gate 	switch (request) {
7177c478bd9Sstevel@tonic-gate 	case XDR_PEEK:
7187c478bd9Sstevel@tonic-gate 		/*
7197c478bd9Sstevel@tonic-gate 		 * Return the next 4 byte unit in the XDR stream.
7207c478bd9Sstevel@tonic-gate 		 */
7217c478bd9Sstevel@tonic-gate 		if (xdrs->x_handy < sizeof (int32_t))
7227c478bd9Sstevel@tonic-gate 			return (FALSE);
7237c478bd9Sstevel@tonic-gate 
7247c478bd9Sstevel@tonic-gate 		int32p = (int32_t *)info;
7257c478bd9Sstevel@tonic-gate 		*int32p = (int32_t)ntohl((uint32_t)
7267c478bd9Sstevel@tonic-gate 		    (*((int32_t *)(xdrp->xp_offp))));
7277c478bd9Sstevel@tonic-gate 
7287c478bd9Sstevel@tonic-gate 		return (TRUE);
7297c478bd9Sstevel@tonic-gate 
7307c478bd9Sstevel@tonic-gate 	case XDR_SKIPBYTES:
7317c478bd9Sstevel@tonic-gate 		/*
7327c478bd9Sstevel@tonic-gate 		 * Skip the next N bytes in the XDR stream.
7337c478bd9Sstevel@tonic-gate 		 */
7347c478bd9Sstevel@tonic-gate 		int32p = (int32_t *)info;
7357c478bd9Sstevel@tonic-gate 		len = RNDUP((int)(*int32p));
7367c478bd9Sstevel@tonic-gate 		if ((xdrs->x_handy -= len) < 0)
7377c478bd9Sstevel@tonic-gate 			return (FALSE);
7387c478bd9Sstevel@tonic-gate 		xdrp->xp_offp += len;
7397c478bd9Sstevel@tonic-gate 
7407c478bd9Sstevel@tonic-gate 		return (TRUE);
7417c478bd9Sstevel@tonic-gate 
7420a701b1eSRobert Gordon 	case XDR_RDMA_SET_FLAGS:
7437c478bd9Sstevel@tonic-gate 		/*
7440a701b1eSRobert Gordon 		 * Set the flags provided in the *info in xp_flags for rdma
7450a701b1eSRobert Gordon 		 * xdr stream control.
7467c478bd9Sstevel@tonic-gate 		 */
7477c478bd9Sstevel@tonic-gate 		int32p = (int32_t *)info;
7487c478bd9Sstevel@tonic-gate 		in_flags = (uint_t)(*int32p);
7497c478bd9Sstevel@tonic-gate 
7507c478bd9Sstevel@tonic-gate 		xdrp->xp_flags |= in_flags;
7517c478bd9Sstevel@tonic-gate 		return (TRUE);
7527c478bd9Sstevel@tonic-gate 
7530a701b1eSRobert Gordon 	case XDR_RDMA_GET_FLAGS:
7547c478bd9Sstevel@tonic-gate 		/*
7557c478bd9Sstevel@tonic-gate 		 * Get the flags provided in xp_flags return through *info
7567c478bd9Sstevel@tonic-gate 		 */
7577c478bd9Sstevel@tonic-gate 		int32p = (int32_t *)info;
7587c478bd9Sstevel@tonic-gate 
7597c478bd9Sstevel@tonic-gate 		*int32p = (int32_t)xdrp->xp_flags;
7607c478bd9Sstevel@tonic-gate 		return (TRUE);
7617c478bd9Sstevel@tonic-gate 
7620a701b1eSRobert Gordon 	case XDR_RDMA_GET_CHUNK_LEN:
7630a701b1eSRobert Gordon 		rcilp = (rdma_chunkinfo_lengths_t *)info;
7640a701b1eSRobert Gordon 		rcilp->rcil_len = xdrp->xp_reply_chunk_len;
7650a701b1eSRobert Gordon 		rcilp->rcil_len_alt = xdrp->xp_reply_chunk_len_alt;
7660a701b1eSRobert Gordon 
7670a701b1eSRobert Gordon 		return (TRUE);
7680a701b1eSRobert Gordon 
7690a701b1eSRobert Gordon 	case XDR_RDMA_ADD_CHUNK:
7700a701b1eSRobert Gordon 		/*
7710a701b1eSRobert Gordon 		 * Store wlist information
7720a701b1eSRobert Gordon 		 */
7730a701b1eSRobert Gordon 
7740a701b1eSRobert Gordon 		rcip = (rdma_chunkinfo_t *)info;
7750a701b1eSRobert Gordon 
7768ca9c6bbSFaramarz Jalalian - Sun Microsystems - Irvine United States 		DTRACE_PROBE2(krpc__i__xdrrdma__control__add__chunk,
7778ca9c6bbSFaramarz Jalalian - Sun Microsystems - Irvine United States 		    rci_type_t, rcip->rci_type, uint32, rcip->rci_len);
7780a701b1eSRobert Gordon 		switch (rcip->rci_type) {
7790a701b1eSRobert Gordon 		case RCI_WRITE_UIO_CHUNK:
7800a701b1eSRobert Gordon 			xdrp->xp_reply_chunk_len_alt += rcip->rci_len;
7810a701b1eSRobert Gordon 
7820a4b0810SKaren Rochford 			if ((rcip->rci_len + XDR_RDMA_BUF_OVERHEAD) <
7830a4b0810SKaren Rochford 			    xdrp->xp_min_chunk) {
7840a701b1eSRobert Gordon 				xdrp->xp_wcl = NULL;
7850a701b1eSRobert Gordon 				*(rcip->rci_clpp) = NULL;
7860a701b1eSRobert Gordon 				return (TRUE);
7870a701b1eSRobert Gordon 			}
7880a701b1eSRobert Gordon 			uiop = rcip->rci_a.rci_uiop;
7890a701b1eSRobert Gordon 
7900a701b1eSRobert Gordon 			for (i = 0; i < uiop->uio_iovcnt; i++) {
7910a701b1eSRobert Gordon 				rwl = clist_alloc();
792*35bbd688SKaren Rochford 				if (first == NULL)
793*35bbd688SKaren Rochford 					first = rwl;
7940a701b1eSRobert Gordon 				rwl->c_len = uiop->uio_iov[i].iov_len;
7950a701b1eSRobert Gordon 				rwl->u.c_daddr =
7960a701b1eSRobert Gordon 				    (uint64)(uintptr_t)
7970a701b1eSRobert Gordon 				    (uiop->uio_iov[i].iov_base);
7980a701b1eSRobert Gordon 				/*
7990a701b1eSRobert Gordon 				 * if userspace address, put adspace ptr in
8000a701b1eSRobert Gordon 				 * clist. If not, then do nothing since it's
8010a701b1eSRobert Gordon 				 * already set to NULL (from kmem_zalloc)
8020a701b1eSRobert Gordon 				 */
8030a701b1eSRobert Gordon 				if (uiop->uio_segflg == UIO_USERSPACE) {
8040a701b1eSRobert Gordon 					rwl->c_adspc = ttoproc(curthread)->p_as;
8050a701b1eSRobert Gordon 				}
8060a701b1eSRobert Gordon 
8070a701b1eSRobert Gordon 				if (prev == NULL)
8080a701b1eSRobert Gordon 					prev = rwl;
8090a701b1eSRobert Gordon 				else {
8100a701b1eSRobert Gordon 					prev->c_next = rwl;
8110a701b1eSRobert Gordon 					prev = rwl;
8120a701b1eSRobert Gordon 				}
8130a701b1eSRobert Gordon 			}
8140a701b1eSRobert Gordon 
8150a701b1eSRobert Gordon 			rwl->c_next = NULL;
816*35bbd688SKaren Rochford 			xdrp->xp_wcl = first;
817*35bbd688SKaren Rochford 			*(rcip->rci_clpp) = first;
8180a701b1eSRobert Gordon 
8190a701b1eSRobert Gordon 			break;
8200a701b1eSRobert Gordon 
8210a701b1eSRobert Gordon 		case RCI_WRITE_ADDR_CHUNK:
8220a701b1eSRobert Gordon 			rwl = clist_alloc();
8230a701b1eSRobert Gordon 
8240a701b1eSRobert Gordon 			rwl->c_len = rcip->rci_len;
8250a701b1eSRobert Gordon 			rwl->u.c_daddr3 = rcip->rci_a.rci_addr;
8260a701b1eSRobert Gordon 			rwl->c_next = NULL;
8270a701b1eSRobert Gordon 			xdrp->xp_reply_chunk_len_alt += rcip->rci_len;
8280a701b1eSRobert Gordon 
8290a701b1eSRobert Gordon 			xdrp->xp_wcl = rwl;
8300a701b1eSRobert Gordon 			*(rcip->rci_clpp) = rwl;
8310a701b1eSRobert Gordon 
8320a701b1eSRobert Gordon 			break;
8330a701b1eSRobert Gordon 
8340a701b1eSRobert Gordon 		case RCI_REPLY_CHUNK:
8350a701b1eSRobert Gordon 			xdrp->xp_reply_chunk_len += rcip->rci_len;
8360a701b1eSRobert Gordon 			break;
8370a701b1eSRobert Gordon 		}
8380a701b1eSRobert Gordon 		return (TRUE);
8390a701b1eSRobert Gordon 
8400a701b1eSRobert Gordon 	case XDR_RDMA_GET_WLIST:
8410a701b1eSRobert Gordon 		*((struct clist **)info) = xdrp->xp_wcl;
8420a701b1eSRobert Gordon 		return (TRUE);
8430a701b1eSRobert Gordon 
8440a701b1eSRobert Gordon 	case XDR_RDMA_SET_WLIST:
8450a701b1eSRobert Gordon 		xdrp->xp_wcl = (struct clist *)info;
8460a701b1eSRobert Gordon 		return (TRUE);
8470a701b1eSRobert Gordon 
8480a701b1eSRobert Gordon 	case XDR_RDMA_GET_RLIST:
8490a701b1eSRobert Gordon 		*((struct clist **)info) = xdrp->xp_rcl;
8500a701b1eSRobert Gordon 		return (TRUE);
8510a701b1eSRobert Gordon 
8520a701b1eSRobert Gordon 	case XDR_RDMA_GET_WCINFO:
8530a701b1eSRobert Gordon 		rwcip = (rdma_wlist_conn_info_t *)info;
8540a701b1eSRobert Gordon 
8550a701b1eSRobert Gordon 		rwcip->rwci_wlist = xdrp->xp_wcl;
8560a701b1eSRobert Gordon 		rwcip->rwci_conn = xdrp->xp_conn;
8570a701b1eSRobert Gordon 
8580a701b1eSRobert Gordon 		return (TRUE);
8590a701b1eSRobert Gordon 
8607c478bd9Sstevel@tonic-gate 	default:
8617c478bd9Sstevel@tonic-gate 		return (FALSE);
8627c478bd9Sstevel@tonic-gate 	}
8637c478bd9Sstevel@tonic-gate }
8647c478bd9Sstevel@tonic-gate 
8650a701b1eSRobert Gordon bool_t xdr_do_clist(XDR *, clist **);
8667c478bd9Sstevel@tonic-gate 
8677c478bd9Sstevel@tonic-gate /*
8680a701b1eSRobert Gordon  * Not all fields in struct clist are interesting to the RPC over RDMA
8690a701b1eSRobert Gordon  * protocol. Only XDR the interesting fields.
8707c478bd9Sstevel@tonic-gate  */
8717c478bd9Sstevel@tonic-gate bool_t
xdr_clist(XDR * xdrs,clist * objp)8727c478bd9Sstevel@tonic-gate xdr_clist(XDR *xdrs, clist *objp)
8737c478bd9Sstevel@tonic-gate {
8747c478bd9Sstevel@tonic-gate 	if (!xdr_uint32(xdrs, &objp->c_xdroff))
8757c478bd9Sstevel@tonic-gate 		return (FALSE);
8767c478bd9Sstevel@tonic-gate 	if (!xdr_uint32(xdrs, &objp->c_smemhandle.mrc_rmr))
8777c478bd9Sstevel@tonic-gate 		return (FALSE);
8780a701b1eSRobert Gordon 	if (!xdr_uint32(xdrs, &objp->c_len))
8790a701b1eSRobert Gordon 		return (FALSE);
8800a701b1eSRobert Gordon 	if (!xdr_uint64(xdrs, &objp->w.c_saddr))
8817c478bd9Sstevel@tonic-gate 		return (FALSE);
8820a701b1eSRobert Gordon 	if (!xdr_do_clist(xdrs, &objp->c_next))
8837c478bd9Sstevel@tonic-gate 		return (FALSE);
8847c478bd9Sstevel@tonic-gate 	return (TRUE);
8857c478bd9Sstevel@tonic-gate }
8867c478bd9Sstevel@tonic-gate 
8870a701b1eSRobert Gordon /*
8880a701b1eSRobert Gordon  * The following two functions are forms of xdr_pointer()
8890a701b1eSRobert Gordon  * and xdr_reference(). Since the generic versions just
8900a701b1eSRobert Gordon  * kmem_alloc() a new clist, we actually want to use the
8910a701b1eSRobert Gordon  * rdma_clist kmem_cache.
8920a701b1eSRobert Gordon  */
8930a701b1eSRobert Gordon 
8940a701b1eSRobert Gordon /*
8950a701b1eSRobert Gordon  * Generate or free a clist structure from the
8960a701b1eSRobert Gordon  * kmem_cache "rdma_clist"
8970a701b1eSRobert Gordon  */
8980a701b1eSRobert Gordon bool_t
xdr_ref_clist(XDR * xdrs,caddr_t * pp)8990a701b1eSRobert Gordon xdr_ref_clist(XDR *xdrs, caddr_t *pp)
9000a701b1eSRobert Gordon {
9010a701b1eSRobert Gordon 	caddr_t loc = *pp;
9020a701b1eSRobert Gordon 	bool_t stat;
9030a701b1eSRobert Gordon 
9040a701b1eSRobert Gordon 	if (loc == NULL) {
9050a701b1eSRobert Gordon 		switch (xdrs->x_op) {
9060a701b1eSRobert Gordon 		case XDR_FREE:
9070a701b1eSRobert Gordon 			return (TRUE);
9080a701b1eSRobert Gordon 
9090a701b1eSRobert Gordon 		case XDR_DECODE:
9100a701b1eSRobert Gordon 			*pp = loc = (caddr_t)clist_alloc();
9110a701b1eSRobert Gordon 			break;
9120a701b1eSRobert Gordon 
9130a701b1eSRobert Gordon 		case XDR_ENCODE:
9140a701b1eSRobert Gordon 			ASSERT(loc);
9150a701b1eSRobert Gordon 			break;
9160a701b1eSRobert Gordon 		}
9170a701b1eSRobert Gordon 	}
9180a701b1eSRobert Gordon 
9190a701b1eSRobert Gordon 	stat = xdr_clist(xdrs, (struct clist *)loc);
9200a701b1eSRobert Gordon 
9210a701b1eSRobert Gordon 	if (xdrs->x_op == XDR_FREE) {
9220a701b1eSRobert Gordon 		kmem_cache_free(clist_cache, loc);
9230a701b1eSRobert Gordon 		*pp = NULL;
9240a701b1eSRobert Gordon 	}
9250a701b1eSRobert Gordon 	return (stat);
9260a701b1eSRobert Gordon }
9270a701b1eSRobert Gordon 
9280a701b1eSRobert Gordon /*
9290a701b1eSRobert Gordon  * XDR a pointer to a possibly recursive clist. This differs
9300a701b1eSRobert Gordon  * with xdr_reference in that it can serialize/deserialiaze
9310a701b1eSRobert Gordon  * trees correctly.
9320a701b1eSRobert Gordon  *
9330a701b1eSRobert Gordon  *  What is sent is actually a union:
9340a701b1eSRobert Gordon  *
9350a701b1eSRobert Gordon  *  union object_pointer switch (boolean b) {
9360a701b1eSRobert Gordon  *  case TRUE: object_data data;
9370a701b1eSRobert Gordon  *  case FALSE: void nothing;
9380a701b1eSRobert Gordon  *  }
9390a701b1eSRobert Gordon  *
9400a701b1eSRobert Gordon  * > objpp: Pointer to the pointer to the object.
9410a701b1eSRobert Gordon  *
9420a701b1eSRobert Gordon  */
9430a701b1eSRobert Gordon 
9447c478bd9Sstevel@tonic-gate bool_t
xdr_do_clist(XDR * xdrs,clist ** objpp)9450a701b1eSRobert Gordon xdr_do_clist(XDR *xdrs, clist **objpp)
9467c478bd9Sstevel@tonic-gate {
9470a701b1eSRobert Gordon 	bool_t more_data;
9480a701b1eSRobert Gordon 
9490a701b1eSRobert Gordon 	more_data = (*objpp != NULL);
9500a701b1eSRobert Gordon 	if (!xdr_bool(xdrs, &more_data))
9510a701b1eSRobert Gordon 		return (FALSE);
9520a701b1eSRobert Gordon 	if (!more_data) {
9530a701b1eSRobert Gordon 		*objpp = NULL;
9540a701b1eSRobert Gordon 		return (TRUE);
9550a701b1eSRobert Gordon 	}
9560a701b1eSRobert Gordon 	return (xdr_ref_clist(xdrs, (caddr_t *)objpp));
9577c478bd9Sstevel@tonic-gate }
9587c478bd9Sstevel@tonic-gate 
9597c478bd9Sstevel@tonic-gate uint_t
xdr_getbufsize(XDR * xdrs)9607c478bd9Sstevel@tonic-gate xdr_getbufsize(XDR *xdrs)
9617c478bd9Sstevel@tonic-gate {
9620a701b1eSRobert Gordon 	xrdma_private_t *xdrp = (xrdma_private_t *)(xdrs->x_private);
9637c478bd9Sstevel@tonic-gate 
9647c478bd9Sstevel@tonic-gate 	return ((uint_t)xdrp->xp_buf_size);
9657c478bd9Sstevel@tonic-gate }
9660a701b1eSRobert Gordon 
9670a701b1eSRobert Gordon /* ARGSUSED */
9680a701b1eSRobert Gordon bool_t
xdr_encode_rlist_svc(XDR * xdrs,clist * rlist)9690a701b1eSRobert Gordon xdr_encode_rlist_svc(XDR *xdrs, clist *rlist)
9700a701b1eSRobert Gordon {
9710a701b1eSRobert Gordon 	bool_t	vfalse = FALSE;
9720a701b1eSRobert Gordon 
9730a701b1eSRobert Gordon 	ASSERT(rlist == NULL);
9740a701b1eSRobert Gordon 	return (xdr_bool(xdrs, &vfalse));
9750a701b1eSRobert Gordon }
9760a701b1eSRobert Gordon 
9770a701b1eSRobert Gordon bool_t
xdr_encode_wlist(XDR * xdrs,clist * w)9780a701b1eSRobert Gordon xdr_encode_wlist(XDR *xdrs, clist *w)
9790a701b1eSRobert Gordon {
9800a701b1eSRobert Gordon 	bool_t		vfalse = FALSE, vtrue = TRUE;
9810a701b1eSRobert Gordon 	int		i;
9820a701b1eSRobert Gordon 	uint_t		num_segment = 0;
9830a701b1eSRobert Gordon 	struct clist	*cl;
9840a701b1eSRobert Gordon 
9850a701b1eSRobert Gordon 	/* does a wlist exist? */
9860a701b1eSRobert Gordon 	if (w == NULL) {
9870a701b1eSRobert Gordon 		return (xdr_bool(xdrs, &vfalse));
9880a701b1eSRobert Gordon 	}
9890a701b1eSRobert Gordon 	/* Encode N consecutive segments, 1, N, HLOO, ..., HLOO, 0 */
9900a701b1eSRobert Gordon 	if (!xdr_bool(xdrs, &vtrue))
9910a701b1eSRobert Gordon 		return (FALSE);
9920a701b1eSRobert Gordon 
9930a701b1eSRobert Gordon 	for (cl = w; cl != NULL; cl = cl->c_next) {
9940a701b1eSRobert Gordon 		num_segment++;
9950a701b1eSRobert Gordon 	}
9960a701b1eSRobert Gordon 
9970a701b1eSRobert Gordon 	if (!xdr_uint32(xdrs, &num_segment))
9980a701b1eSRobert Gordon 		return (FALSE);
9990a701b1eSRobert Gordon 	for (i = 0; i < num_segment; i++) {
1000f837ee4aSSiddheshwar Mahesh 
1001f837ee4aSSiddheshwar Mahesh 		DTRACE_PROBE1(krpc__i__xdr_encode_wlist_len, uint_t, w->c_len);
1002f837ee4aSSiddheshwar Mahesh 
10030a701b1eSRobert Gordon 		if (!xdr_uint32(xdrs, &w->c_dmemhandle.mrc_rmr))
10040a701b1eSRobert Gordon 			return (FALSE);
10050a701b1eSRobert Gordon 
10060a701b1eSRobert Gordon 		if (!xdr_uint32(xdrs, &w->c_len))
10070a701b1eSRobert Gordon 			return (FALSE);
10080a701b1eSRobert Gordon 
10090a701b1eSRobert Gordon 		if (!xdr_uint64(xdrs, &w->u.c_daddr))
10100a701b1eSRobert Gordon 			return (FALSE);
10110a701b1eSRobert Gordon 
10120a701b1eSRobert Gordon 		w = w->c_next;
10130a701b1eSRobert Gordon 	}
10140a701b1eSRobert Gordon 
10150a701b1eSRobert Gordon 	if (!xdr_bool(xdrs, &vfalse))
10160a701b1eSRobert Gordon 		return (FALSE);
10170a701b1eSRobert Gordon 
10180a701b1eSRobert Gordon 	return (TRUE);
10190a701b1eSRobert Gordon }
10200a701b1eSRobert Gordon 
10210a701b1eSRobert Gordon 
10220a701b1eSRobert Gordon /*
10230a701b1eSRobert Gordon  * Conditionally decode a RDMA WRITE chunk list from XDR stream.
10240a701b1eSRobert Gordon  *
10250a701b1eSRobert Gordon  * If the next boolean in the XDR stream is false there is no
10260a701b1eSRobert Gordon  * RDMA WRITE chunk list present. Otherwise iterate over the
10270a701b1eSRobert Gordon  * array and for each entry: allocate a struct clist and decode.
10280a701b1eSRobert Gordon  * Pass back an indication via wlist_exists if we have seen a
10290a701b1eSRobert Gordon  * RDMA WRITE chunk list.
10300a701b1eSRobert Gordon  */
10310a701b1eSRobert Gordon bool_t
xdr_decode_wlist(XDR * xdrs,struct clist ** w,bool_t * wlist_exists)10320a701b1eSRobert Gordon xdr_decode_wlist(XDR *xdrs, struct clist **w, bool_t *wlist_exists)
10330a701b1eSRobert Gordon {
10340a701b1eSRobert Gordon 	struct clist	*tmp;
10350a701b1eSRobert Gordon 	bool_t		more = FALSE;
10360a701b1eSRobert Gordon 	uint32_t	seg_array_len;
10370a701b1eSRobert Gordon 	uint32_t	i;
10380a701b1eSRobert Gordon 
10390a701b1eSRobert Gordon 	if (!xdr_bool(xdrs, &more))
10400a701b1eSRobert Gordon 		return (FALSE);
10410a701b1eSRobert Gordon 
10420a701b1eSRobert Gordon 	/* is there a wlist? */
10430a701b1eSRobert Gordon 	if (more == FALSE) {
10440a701b1eSRobert Gordon 		*wlist_exists = FALSE;
10450a701b1eSRobert Gordon 		return (TRUE);
10460a701b1eSRobert Gordon 	}
10470a701b1eSRobert Gordon 	*wlist_exists = TRUE;
10480a701b1eSRobert Gordon 
10490a701b1eSRobert Gordon 	if (!xdr_uint32(xdrs, &seg_array_len))
10500a701b1eSRobert Gordon 		return (FALSE);
10510a701b1eSRobert Gordon 
10520a701b1eSRobert Gordon 	tmp = *w = clist_alloc();
10530a701b1eSRobert Gordon 	for (i = 0; i < seg_array_len; i++) {
1054f837ee4aSSiddheshwar Mahesh 
10550a701b1eSRobert Gordon 		if (!xdr_uint32(xdrs, &tmp->c_dmemhandle.mrc_rmr))
10560a701b1eSRobert Gordon 			return (FALSE);
10570a701b1eSRobert Gordon 		if (!xdr_uint32(xdrs, &tmp->c_len))
10580a701b1eSRobert Gordon 			return (FALSE);
1059f837ee4aSSiddheshwar Mahesh 
1060f837ee4aSSiddheshwar Mahesh 		DTRACE_PROBE1(krpc__i__xdr_decode_wlist_len,
1061f837ee4aSSiddheshwar Mahesh 		    uint_t, tmp->c_len);
1062f837ee4aSSiddheshwar Mahesh 
10630a701b1eSRobert Gordon 		if (!xdr_uint64(xdrs, &tmp->u.c_daddr))
10640a701b1eSRobert Gordon 			return (FALSE);
10650a701b1eSRobert Gordon 		if (i < seg_array_len - 1) {
10660a701b1eSRobert Gordon 			tmp->c_next = clist_alloc();
10670a701b1eSRobert Gordon 			tmp = tmp->c_next;
10680a701b1eSRobert Gordon 		} else {
10690a701b1eSRobert Gordon 			tmp->c_next = NULL;
10700a701b1eSRobert Gordon 		}
10710a701b1eSRobert Gordon 	}
10720a701b1eSRobert Gordon 
10730a701b1eSRobert Gordon 	more = FALSE;
10740a701b1eSRobert Gordon 	if (!xdr_bool(xdrs, &more))
10750a701b1eSRobert Gordon 		return (FALSE);
10760a701b1eSRobert Gordon 
10770a701b1eSRobert Gordon 	return (TRUE);
10780a701b1eSRobert Gordon }
10790a701b1eSRobert Gordon 
10800a701b1eSRobert Gordon /*
10810a701b1eSRobert Gordon  * Server side RDMA WRITE list decode.
10820a701b1eSRobert Gordon  * XDR context is memory ops
10830a701b1eSRobert Gordon  */
10840a701b1eSRobert Gordon bool_t
xdr_decode_wlist_svc(XDR * xdrs,struct clist ** wclp,bool_t * wwl,uint32_t * total_length,CONN * conn)10850a701b1eSRobert Gordon xdr_decode_wlist_svc(XDR *xdrs, struct clist **wclp, bool_t *wwl,
10860a701b1eSRobert Gordon     uint32_t *total_length, CONN *conn)
10870a701b1eSRobert Gordon {
10880a701b1eSRobert Gordon 	struct clist	*first, *ncl;
10890a701b1eSRobert Gordon 	char		*memp;
10900a701b1eSRobert Gordon 	uint32_t	num_wclist;
10910a701b1eSRobert Gordon 	uint32_t	wcl_length = 0;
10920a701b1eSRobert Gordon 	uint32_t	i;
10930a701b1eSRobert Gordon 	bool_t		more = FALSE;
10940a701b1eSRobert Gordon 
10950a701b1eSRobert Gordon 	*wclp = NULL;
10960a701b1eSRobert Gordon 	*wwl = FALSE;
10970a701b1eSRobert Gordon 	*total_length = 0;
10980a701b1eSRobert Gordon 
10990a701b1eSRobert Gordon 	if (!xdr_bool(xdrs, &more)) {
11000a701b1eSRobert Gordon 		return (FALSE);
11010a701b1eSRobert Gordon 	}
11020a701b1eSRobert Gordon 
11030a701b1eSRobert Gordon 	if (more == FALSE) {
11040a701b1eSRobert Gordon 		return (TRUE);
11050a701b1eSRobert Gordon 	}
11060a701b1eSRobert Gordon 
11070a701b1eSRobert Gordon 	*wwl = TRUE;
11080a701b1eSRobert Gordon 
11090a701b1eSRobert Gordon 	if (!xdr_uint32(xdrs, &num_wclist)) {
11100a701b1eSRobert Gordon 		DTRACE_PROBE(krpc__e__xdrrdma__wlistsvc__listlength);
11110a701b1eSRobert Gordon 		return (FALSE);
11120a701b1eSRobert Gordon 	}
11130a701b1eSRobert Gordon 
11140a701b1eSRobert Gordon 	first = ncl = clist_alloc();
11150a701b1eSRobert Gordon 
11160a701b1eSRobert Gordon 	for (i = 0; i < num_wclist; i++) {
1117f837ee4aSSiddheshwar Mahesh 
11180a701b1eSRobert Gordon 		if (!xdr_uint32(xdrs, &ncl->c_dmemhandle.mrc_rmr))
11190a701b1eSRobert Gordon 			goto err_out;
11200a701b1eSRobert Gordon 		if (!xdr_uint32(xdrs, &ncl->c_len))
11210a701b1eSRobert Gordon 			goto err_out;
11220a701b1eSRobert Gordon 		if (!xdr_uint64(xdrs, &ncl->u.c_daddr))
11230a701b1eSRobert Gordon 			goto err_out;
11240a701b1eSRobert Gordon 
11250a701b1eSRobert Gordon 		if (ncl->c_len > MAX_SVC_XFER_SIZE) {
11260a701b1eSRobert Gordon 			DTRACE_PROBE(
11270a701b1eSRobert Gordon 			    krpc__e__xdrrdma__wlistsvc__chunklist_toobig);
11280a701b1eSRobert Gordon 			ncl->c_len = MAX_SVC_XFER_SIZE;
11290a701b1eSRobert Gordon 		}
11300a701b1eSRobert Gordon 
1131f837ee4aSSiddheshwar Mahesh 		DTRACE_PROBE1(krpc__i__xdr_decode_wlist_svc_len,
1132f837ee4aSSiddheshwar Mahesh 		    uint_t, ncl->c_len);
1133f837ee4aSSiddheshwar Mahesh 
11340a701b1eSRobert Gordon 		wcl_length += ncl->c_len;
11350a701b1eSRobert Gordon 
11360a701b1eSRobert Gordon 		if (i < num_wclist - 1) {
11370a701b1eSRobert Gordon 			ncl->c_next = clist_alloc();
11380a701b1eSRobert Gordon 			ncl = ncl->c_next;
11390a701b1eSRobert Gordon 		}
11400a701b1eSRobert Gordon 	}
11410a701b1eSRobert Gordon 
11420a701b1eSRobert Gordon 	if (!xdr_bool(xdrs, &more))
11430a701b1eSRobert Gordon 		goto err_out;
11440a701b1eSRobert Gordon 
11450a701b1eSRobert Gordon 	first->rb_longbuf.type = RDMA_LONG_BUFFER;
11460a701b1eSRobert Gordon 	first->rb_longbuf.len =
11470a701b1eSRobert Gordon 	    wcl_length > WCL_BUF_LEN ? wcl_length : WCL_BUF_LEN;
11480a701b1eSRobert Gordon 
11490a701b1eSRobert Gordon 	if (rdma_buf_alloc(conn, &first->rb_longbuf)) {
11500a701b1eSRobert Gordon 		clist_free(first);
11510a701b1eSRobert Gordon 		return (FALSE);
11520a701b1eSRobert Gordon 	}
11530a701b1eSRobert Gordon 
11540a701b1eSRobert Gordon 	memp = first->rb_longbuf.addr;
11550a701b1eSRobert Gordon 
11560a701b1eSRobert Gordon 	ncl = first;
11570a701b1eSRobert Gordon 	for (i = 0; i < num_wclist; i++) {
11580a701b1eSRobert Gordon 		ncl->w.c_saddr3 = (caddr_t)memp;
11590a701b1eSRobert Gordon 		memp += ncl->c_len;
11600a701b1eSRobert Gordon 		ncl = ncl->c_next;
11610a701b1eSRobert Gordon 	}
11620a701b1eSRobert Gordon 
11630a701b1eSRobert Gordon 	*wclp = first;
11640a701b1eSRobert Gordon 	*total_length = wcl_length;
11650a701b1eSRobert Gordon 	return (TRUE);
11660a701b1eSRobert Gordon 
11670a701b1eSRobert Gordon err_out:
11680a701b1eSRobert Gordon 	clist_free(first);
11690a701b1eSRobert Gordon 	return (FALSE);
11700a701b1eSRobert Gordon }
11710a701b1eSRobert Gordon 
11720a701b1eSRobert Gordon /*
11730a701b1eSRobert Gordon  * XDR decode the long reply write chunk.
11740a701b1eSRobert Gordon  */
11750a701b1eSRobert Gordon bool_t
xdr_decode_reply_wchunk(XDR * xdrs,struct clist ** clist)11760a701b1eSRobert Gordon xdr_decode_reply_wchunk(XDR *xdrs, struct clist **clist)
11770a701b1eSRobert Gordon {
11780a701b1eSRobert Gordon 	bool_t		have_rchunk = FALSE;
11790a701b1eSRobert Gordon 	struct clist	*first = NULL, *ncl = NULL;
11800a701b1eSRobert Gordon 	uint32_t	num_wclist;
11810a701b1eSRobert Gordon 	uint32_t	i;
11820a701b1eSRobert Gordon 
11830a701b1eSRobert Gordon 	if (!xdr_bool(xdrs, &have_rchunk))
11840a701b1eSRobert Gordon 		return (FALSE);
11850a701b1eSRobert Gordon 
11860a701b1eSRobert Gordon 	if (have_rchunk == FALSE)
11870a701b1eSRobert Gordon 		return (TRUE);
11880a701b1eSRobert Gordon 
11890a701b1eSRobert Gordon 	if (!xdr_uint32(xdrs, &num_wclist)) {
11900a701b1eSRobert Gordon 		DTRACE_PROBE(krpc__e__xdrrdma__replywchunk__listlength);
11910a701b1eSRobert Gordon 		return (FALSE);
11920a701b1eSRobert Gordon 	}
11930a701b1eSRobert Gordon 
11940a701b1eSRobert Gordon 	if (num_wclist == 0) {
11950a701b1eSRobert Gordon 		return (FALSE);
11960a701b1eSRobert Gordon 	}
11970a701b1eSRobert Gordon 
11980a701b1eSRobert Gordon 	first = ncl = clist_alloc();
11990a701b1eSRobert Gordon 
12000a701b1eSRobert Gordon 	for (i = 0; i < num_wclist; i++) {
1201f837ee4aSSiddheshwar Mahesh 
1202f837ee4aSSiddheshwar Mahesh 		if (i > 0) {
1203f837ee4aSSiddheshwar Mahesh 			ncl->c_next = clist_alloc();
1204f837ee4aSSiddheshwar Mahesh 			ncl = ncl->c_next;
1205f837ee4aSSiddheshwar Mahesh 		}
1206f837ee4aSSiddheshwar Mahesh 
12070a701b1eSRobert Gordon 		if (!xdr_uint32(xdrs, &ncl->c_dmemhandle.mrc_rmr))
12080a701b1eSRobert Gordon 			goto err_out;
12090a701b1eSRobert Gordon 		if (!xdr_uint32(xdrs, &ncl->c_len))
12100a701b1eSRobert Gordon 			goto err_out;
12110a701b1eSRobert Gordon 		if (!xdr_uint64(xdrs, &ncl->u.c_daddr))
12120a701b1eSRobert Gordon 			goto err_out;
12130a701b1eSRobert Gordon 
12140a701b1eSRobert Gordon 		if (ncl->c_len > MAX_SVC_XFER_SIZE) {
12150a701b1eSRobert Gordon 			DTRACE_PROBE(
12160a701b1eSRobert Gordon 			    krpc__e__xdrrdma__replywchunk__chunklist_toobig);
12170a701b1eSRobert Gordon 			ncl->c_len = MAX_SVC_XFER_SIZE;
12180a701b1eSRobert Gordon 		}
12190a701b1eSRobert Gordon 		if (!(ncl->c_dmemhandle.mrc_rmr &&
12200a701b1eSRobert Gordon 		    (ncl->c_len > 0) && ncl->u.c_daddr))
12210a701b1eSRobert Gordon 			DTRACE_PROBE(
12220a701b1eSRobert Gordon 			    krpc__e__xdrrdma__replywchunk__invalid_segaddr);
12230a701b1eSRobert Gordon 
1224f837ee4aSSiddheshwar Mahesh 		DTRACE_PROBE1(krpc__i__xdr_decode_reply_wchunk_c_len,
1225f837ee4aSSiddheshwar Mahesh 		    uint32_t, ncl->c_len);
1226f837ee4aSSiddheshwar Mahesh 
12270a701b1eSRobert Gordon 	}
12280a701b1eSRobert Gordon 	*clist = first;
12290a701b1eSRobert Gordon 	return (TRUE);
12300a701b1eSRobert Gordon 
12310a701b1eSRobert Gordon err_out:
12320a701b1eSRobert Gordon 	clist_free(first);
12330a701b1eSRobert Gordon 	return (FALSE);
12340a701b1eSRobert Gordon }
12350a701b1eSRobert Gordon 
12360a701b1eSRobert Gordon 
12370a701b1eSRobert Gordon bool_t
xdr_encode_reply_wchunk(XDR * xdrs,struct clist * cl_longreply,uint32_t seg_array_len)12380a701b1eSRobert Gordon xdr_encode_reply_wchunk(XDR *xdrs,
12390a701b1eSRobert Gordon     struct clist *cl_longreply, uint32_t seg_array_len)
12400a701b1eSRobert Gordon {
12410a701b1eSRobert Gordon 	int		i;
12420a701b1eSRobert Gordon 	bool_t		long_reply_exists = TRUE;
12430a701b1eSRobert Gordon 	uint32_t	length;
12440a701b1eSRobert Gordon 	uint64		offset;
12450a701b1eSRobert Gordon 
12460a701b1eSRobert Gordon 	if (seg_array_len > 0) {
12470a701b1eSRobert Gordon 		if (!xdr_bool(xdrs, &long_reply_exists))
12480a701b1eSRobert Gordon 			return (FALSE);
12490a701b1eSRobert Gordon 		if (!xdr_uint32(xdrs, &seg_array_len))
12500a701b1eSRobert Gordon 			return (FALSE);
12510a701b1eSRobert Gordon 
12520a701b1eSRobert Gordon 		for (i = 0; i < seg_array_len; i++) {
12530a701b1eSRobert Gordon 			if (!cl_longreply)
12540a701b1eSRobert Gordon 				return (FALSE);
12550a701b1eSRobert Gordon 			length = cl_longreply->c_len;
12560a701b1eSRobert Gordon 			offset = (uint64) cl_longreply->u.c_daddr;
12570a701b1eSRobert Gordon 
1258f837ee4aSSiddheshwar Mahesh 			DTRACE_PROBE1(
1259f837ee4aSSiddheshwar Mahesh 			    krpc__i__xdr_encode_reply_wchunk_c_len,
1260f837ee4aSSiddheshwar Mahesh 			    uint32_t, length);
1261f837ee4aSSiddheshwar Mahesh 
12620a701b1eSRobert Gordon 			if (!xdr_uint32(xdrs,
12630a701b1eSRobert Gordon 			    &cl_longreply->c_dmemhandle.mrc_rmr))
12640a701b1eSRobert Gordon 				return (FALSE);
12650a701b1eSRobert Gordon 			if (!xdr_uint32(xdrs, &length))
12660a701b1eSRobert Gordon 				return (FALSE);
12670a701b1eSRobert Gordon 			if (!xdr_uint64(xdrs, &offset))
12680a701b1eSRobert Gordon 				return (FALSE);
12690a701b1eSRobert Gordon 			cl_longreply = cl_longreply->c_next;
12700a701b1eSRobert Gordon 		}
12710a701b1eSRobert Gordon 	} else {
12720a701b1eSRobert Gordon 		long_reply_exists = FALSE;
12730a701b1eSRobert Gordon 		if (!xdr_bool(xdrs, &long_reply_exists))
12740a701b1eSRobert Gordon 			return (FALSE);
12750a701b1eSRobert Gordon 	}
12760a701b1eSRobert Gordon 	return (TRUE);
12770a701b1eSRobert Gordon }
12780a701b1eSRobert Gordon bool_t
xdrrdma_read_from_client(struct clist * rlist,CONN ** conn,uint_t count)1279f837ee4aSSiddheshwar Mahesh xdrrdma_read_from_client(struct clist *rlist, CONN **conn, uint_t count)
12800a701b1eSRobert Gordon {
12810a701b1eSRobert Gordon 	struct clist	*rdclist;
12820a701b1eSRobert Gordon 	struct clist	cl;
12830a701b1eSRobert Gordon 	uint_t		total_len = 0;
12840a701b1eSRobert Gordon 	uint32_t	status;
12850a701b1eSRobert Gordon 	bool_t		retval = TRUE;
12860a701b1eSRobert Gordon 
1287f837ee4aSSiddheshwar Mahesh 	rlist->rb_longbuf.type = RDMA_LONG_BUFFER;
1288f837ee4aSSiddheshwar Mahesh 	rlist->rb_longbuf.len =
12890a701b1eSRobert Gordon 	    count > RCL_BUF_LEN ? count : RCL_BUF_LEN;
12900a701b1eSRobert Gordon 
1291f837ee4aSSiddheshwar Mahesh 	if (rdma_buf_alloc(*conn, &rlist->rb_longbuf)) {
1292f837ee4aSSiddheshwar Mahesh 		return (FALSE);
1293f837ee4aSSiddheshwar Mahesh 	}
1294f837ee4aSSiddheshwar Mahesh 
1295f837ee4aSSiddheshwar Mahesh 	/*
1296f837ee4aSSiddheshwar Mahesh 	 * The entire buffer is registered with the first chunk.
1297f837ee4aSSiddheshwar Mahesh 	 * Later chunks will use the same registered memory handle.
1298f837ee4aSSiddheshwar Mahesh 	 */
1299f837ee4aSSiddheshwar Mahesh 
1300f837ee4aSSiddheshwar Mahesh 	cl = *rlist;
1301f837ee4aSSiddheshwar Mahesh 	cl.c_next = NULL;
1302f837ee4aSSiddheshwar Mahesh 	if (clist_register(*conn, &cl, CLIST_REG_DST) != RDMA_SUCCESS) {
1303f837ee4aSSiddheshwar Mahesh 		rdma_buf_free(*conn, &rlist->rb_longbuf);
1304f837ee4aSSiddheshwar Mahesh 		DTRACE_PROBE(
1305f837ee4aSSiddheshwar Mahesh 		    krpc__e__xdrrdma__readfromclient__clist__reg);
13060a701b1eSRobert Gordon 		return (FALSE);
13070a701b1eSRobert Gordon 	}
13080a701b1eSRobert Gordon 
1309f837ee4aSSiddheshwar Mahesh 	rlist->c_regtype = CLIST_REG_DST;
1310f837ee4aSSiddheshwar Mahesh 	rlist->c_dmemhandle = cl.c_dmemhandle;
1311f837ee4aSSiddheshwar Mahesh 	rlist->c_dsynchandle = cl.c_dsynchandle;
1312f837ee4aSSiddheshwar Mahesh 
1313f837ee4aSSiddheshwar Mahesh 	for (rdclist = rlist;
13140a701b1eSRobert Gordon 	    rdclist != NULL; rdclist = rdclist->c_next) {
13150a701b1eSRobert Gordon 		total_len += rdclist->c_len;
13160a701b1eSRobert Gordon #if (defined(OBJ32)||defined(DEBUG32))
13170a701b1eSRobert Gordon 		rdclist->u.c_daddr3 =
1318f837ee4aSSiddheshwar Mahesh 		    (caddr_t)((char *)rlist->rb_longbuf.addr +
13190a701b1eSRobert Gordon 		    (uint32) rdclist->u.c_daddr3);
13200a701b1eSRobert Gordon #else
13210a701b1eSRobert Gordon 		rdclist->u.c_daddr3 =
1322f837ee4aSSiddheshwar Mahesh 		    (caddr_t)((char *)rlist->rb_longbuf.addr +
13230a701b1eSRobert Gordon 		    (uint64) rdclist->u.c_daddr);
13240a701b1eSRobert Gordon 
13250a701b1eSRobert Gordon #endif
13260a701b1eSRobert Gordon 		cl = (*rdclist);
13270a701b1eSRobert Gordon 		cl.c_next = NULL;
13280a701b1eSRobert Gordon 
1329f837ee4aSSiddheshwar Mahesh 		/*
1330f837ee4aSSiddheshwar Mahesh 		 * Use the same memory handle for all the chunks
1331f837ee4aSSiddheshwar Mahesh 		 */
1332f837ee4aSSiddheshwar Mahesh 		cl.c_dmemhandle = rlist->c_dmemhandle;
1333f837ee4aSSiddheshwar Mahesh 		cl.c_dsynchandle = rlist->c_dsynchandle;
1334f837ee4aSSiddheshwar Mahesh 
13350a701b1eSRobert Gordon 
13360a701b1eSRobert Gordon 		DTRACE_PROBE1(krpc__i__xdrrdma__readfromclient__buflen,
13370a701b1eSRobert Gordon 		    int, rdclist->c_len);
13380a701b1eSRobert Gordon 
13390a701b1eSRobert Gordon 		/*
13400a701b1eSRobert Gordon 		 * Now read the chunk in
13410a701b1eSRobert Gordon 		 */
13420a701b1eSRobert Gordon 		if (rdclist->c_next == NULL) {
13430a701b1eSRobert Gordon 			status = RDMA_READ(*conn, &cl, WAIT);
13440a701b1eSRobert Gordon 		} else {
13450a701b1eSRobert Gordon 			status = RDMA_READ(*conn, &cl, NOWAIT);
13460a701b1eSRobert Gordon 		}
13470a701b1eSRobert Gordon 		if (status != RDMA_SUCCESS) {
13480a701b1eSRobert Gordon 			DTRACE_PROBE(
13490a701b1eSRobert Gordon 			    krpc__e__xdrrdma__readfromclient__readfailed);
1350f837ee4aSSiddheshwar Mahesh 			rdma_buf_free(*conn, &rlist->rb_longbuf);
13510a701b1eSRobert Gordon 			return (FALSE);
13520a701b1eSRobert Gordon 		}
13530a701b1eSRobert Gordon 	}
13540a701b1eSRobert Gordon 
1355f837ee4aSSiddheshwar Mahesh 	cl = (*rlist);
13560a701b1eSRobert Gordon 	cl.c_next = NULL;
13570a701b1eSRobert Gordon 	cl.c_len = total_len;
1358f837ee4aSSiddheshwar Mahesh 	if (clist_syncmem(*conn, &cl, CLIST_REG_DST) != RDMA_SUCCESS) {
13590a701b1eSRobert Gordon 		retval = FALSE;
13600a701b1eSRobert Gordon 	}
13610a701b1eSRobert Gordon 	return (retval);
13620a701b1eSRobert Gordon }
13630a701b1eSRobert Gordon 
13640a701b1eSRobert Gordon bool_t
xdrrdma_free_clist(CONN * conn,struct clist * clp)13650a701b1eSRobert Gordon xdrrdma_free_clist(CONN *conn, struct clist *clp)
13660a701b1eSRobert Gordon {
13670a701b1eSRobert Gordon 	rdma_buf_free(conn, &clp->rb_longbuf);
13680a701b1eSRobert Gordon 	clist_free(clp);
13690a701b1eSRobert Gordon 	return (TRUE);
13700a701b1eSRobert Gordon }
13710a701b1eSRobert Gordon 
13720a701b1eSRobert Gordon bool_t
xdrrdma_send_read_data(XDR * xdrs,uint_t data_len,struct clist * wcl)1373f837ee4aSSiddheshwar Mahesh xdrrdma_send_read_data(XDR *xdrs, uint_t data_len, struct clist *wcl)
13740a701b1eSRobert Gordon {
13750a701b1eSRobert Gordon 	int status;
13760a701b1eSRobert Gordon 	xrdma_private_t	*xdrp = (xrdma_private_t *)(xdrs->x_private);
13770a701b1eSRobert Gordon 	struct xdr_ops *xops = xdrrdma_xops();
1378f837ee4aSSiddheshwar Mahesh 	struct clist *tcl, *wrcl, *cl;
1379f837ee4aSSiddheshwar Mahesh 	struct clist fcl;
1380f837ee4aSSiddheshwar Mahesh 	int rndup_present, rnduplen;
1381f837ee4aSSiddheshwar Mahesh 
1382f837ee4aSSiddheshwar Mahesh 	rndup_present = 0;
1383f837ee4aSSiddheshwar Mahesh 	wrcl = NULL;
13840a701b1eSRobert Gordon 
13850a701b1eSRobert Gordon 	/* caller is doing a sizeof */
13860a701b1eSRobert Gordon 	if (xdrs->x_ops != &xdrrdma_ops || xdrs->x_ops == xops)
13870a701b1eSRobert Gordon 		return (TRUE);
13880a701b1eSRobert Gordon 
1389f837ee4aSSiddheshwar Mahesh 	/* copy of the first chunk */
1390f837ee4aSSiddheshwar Mahesh 	fcl = *wcl;
1391f837ee4aSSiddheshwar Mahesh 	fcl.c_next = NULL;
1392f837ee4aSSiddheshwar Mahesh 
1393f837ee4aSSiddheshwar Mahesh 	/*
1394f837ee4aSSiddheshwar Mahesh 	 * The entire buffer is registered with the first chunk.
1395f837ee4aSSiddheshwar Mahesh 	 * Later chunks will use the same registered memory handle.
1396f837ee4aSSiddheshwar Mahesh 	 */
1397f837ee4aSSiddheshwar Mahesh 
1398f837ee4aSSiddheshwar Mahesh 	status = clist_register(xdrp->xp_conn, &fcl, CLIST_REG_SOURCE);
13990a701b1eSRobert Gordon 	if (status != RDMA_SUCCESS) {
14000a701b1eSRobert Gordon 		return (FALSE);
14010a701b1eSRobert Gordon 	}
14020a701b1eSRobert Gordon 
1403f837ee4aSSiddheshwar Mahesh 	wcl->c_regtype = CLIST_REG_SOURCE;
1404f837ee4aSSiddheshwar Mahesh 	wcl->c_smemhandle = fcl.c_smemhandle;
1405f837ee4aSSiddheshwar Mahesh 	wcl->c_ssynchandle = fcl.c_ssynchandle;
1406f837ee4aSSiddheshwar Mahesh 
1407f837ee4aSSiddheshwar Mahesh 	/*
1408f837ee4aSSiddheshwar Mahesh 	 * Only transfer the read data ignoring any trailing
1409f837ee4aSSiddheshwar Mahesh 	 * roundup chunks. A bit of work, but it saves an
1410f837ee4aSSiddheshwar Mahesh 	 * unnecessary extra RDMA_WRITE containing only
1411f837ee4aSSiddheshwar Mahesh 	 * roundup bytes.
1412f837ee4aSSiddheshwar Mahesh 	 */
1413f837ee4aSSiddheshwar Mahesh 
1414f837ee4aSSiddheshwar Mahesh 	rnduplen = clist_len(wcl) - data_len;
1415f837ee4aSSiddheshwar Mahesh 
1416f837ee4aSSiddheshwar Mahesh 	if (rnduplen) {
1417f837ee4aSSiddheshwar Mahesh 
1418f837ee4aSSiddheshwar Mahesh 		tcl = wcl->c_next;
1419f837ee4aSSiddheshwar Mahesh 
1420f837ee4aSSiddheshwar Mahesh 		/*
1421f837ee4aSSiddheshwar Mahesh 		 * Check if there is a trailing roundup chunk
1422f837ee4aSSiddheshwar Mahesh 		 */
1423f837ee4aSSiddheshwar Mahesh 		while (tcl) {
1424f837ee4aSSiddheshwar Mahesh 			if ((tcl->c_next == NULL) && (tcl->c_len == rnduplen)) {
1425f837ee4aSSiddheshwar Mahesh 				rndup_present = 1;
1426f837ee4aSSiddheshwar Mahesh 				break;
1427f837ee4aSSiddheshwar Mahesh 			}
1428f837ee4aSSiddheshwar Mahesh 			tcl = tcl->c_next;
1429f837ee4aSSiddheshwar Mahesh 		}
1430f837ee4aSSiddheshwar Mahesh 
1431f837ee4aSSiddheshwar Mahesh 		/*
1432f837ee4aSSiddheshwar Mahesh 		 * Make a copy chunk list skipping the last chunk
1433f837ee4aSSiddheshwar Mahesh 		 */
1434f837ee4aSSiddheshwar Mahesh 		if (rndup_present) {
1435f837ee4aSSiddheshwar Mahesh 			cl = wcl;
1436f837ee4aSSiddheshwar Mahesh 			tcl = NULL;
1437f837ee4aSSiddheshwar Mahesh 			while (cl) {
1438f837ee4aSSiddheshwar Mahesh 				if (tcl == NULL) {
1439f837ee4aSSiddheshwar Mahesh 					tcl = clist_alloc();
1440f837ee4aSSiddheshwar Mahesh 					wrcl = tcl;
1441f837ee4aSSiddheshwar Mahesh 				} else {
1442f837ee4aSSiddheshwar Mahesh 					tcl->c_next = clist_alloc();
1443f837ee4aSSiddheshwar Mahesh 					tcl = tcl->c_next;
1444f837ee4aSSiddheshwar Mahesh 				}
1445f837ee4aSSiddheshwar Mahesh 
1446f837ee4aSSiddheshwar Mahesh 				*tcl = *cl;
1447f837ee4aSSiddheshwar Mahesh 				cl = cl->c_next;
1448f837ee4aSSiddheshwar Mahesh 				/* last chunk */
1449f837ee4aSSiddheshwar Mahesh 				if (cl->c_next == NULL)
1450f837ee4aSSiddheshwar Mahesh 					break;
1451f837ee4aSSiddheshwar Mahesh 			}
1452f837ee4aSSiddheshwar Mahesh 			tcl->c_next = NULL;
1453f837ee4aSSiddheshwar Mahesh 		}
1454f837ee4aSSiddheshwar Mahesh 	}
1455f837ee4aSSiddheshwar Mahesh 
1456f837ee4aSSiddheshwar Mahesh 	if (wrcl == NULL) {
1457f837ee4aSSiddheshwar Mahesh 		/* No roundup chunks */
1458f837ee4aSSiddheshwar Mahesh 		wrcl = wcl;
1459f837ee4aSSiddheshwar Mahesh 	}
1460f837ee4aSSiddheshwar Mahesh 
1461f837ee4aSSiddheshwar Mahesh 	/*
1462f837ee4aSSiddheshwar Mahesh 	 * Set the registered memory handles for the
1463f837ee4aSSiddheshwar Mahesh 	 * rest of the chunks same as the first chunk.
1464f837ee4aSSiddheshwar Mahesh 	 */
1465f837ee4aSSiddheshwar Mahesh 	tcl = wrcl->c_next;
1466f837ee4aSSiddheshwar Mahesh 	while (tcl) {
1467f837ee4aSSiddheshwar Mahesh 		tcl->c_smemhandle = fcl.c_smemhandle;
1468f837ee4aSSiddheshwar Mahesh 		tcl->c_ssynchandle = fcl.c_ssynchandle;
1469f837ee4aSSiddheshwar Mahesh 		tcl = tcl->c_next;
1470f837ee4aSSiddheshwar Mahesh 	}
1471f837ee4aSSiddheshwar Mahesh 
1472f837ee4aSSiddheshwar Mahesh 	/*
1473f837ee4aSSiddheshwar Mahesh 	 * Sync the total len beginning from the first chunk.
1474f837ee4aSSiddheshwar Mahesh 	 */
1475f837ee4aSSiddheshwar Mahesh 	fcl.c_len = clist_len(wrcl);
1476f837ee4aSSiddheshwar Mahesh 	status = clist_syncmem(xdrp->xp_conn, &fcl, CLIST_REG_SOURCE);
14770a701b1eSRobert Gordon 	if (status != RDMA_SUCCESS) {
14780a701b1eSRobert Gordon 		return (FALSE);
14790a701b1eSRobert Gordon 	}
14800a701b1eSRobert Gordon 
1481f837ee4aSSiddheshwar Mahesh 	status = RDMA_WRITE(xdrp->xp_conn, wrcl, WAIT);
1482f837ee4aSSiddheshwar Mahesh 
1483f837ee4aSSiddheshwar Mahesh 	if (rndup_present)
1484f837ee4aSSiddheshwar Mahesh 		clist_free(wrcl);
1485f837ee4aSSiddheshwar Mahesh 
14860a701b1eSRobert Gordon 	if (status != RDMA_SUCCESS) {
14870a701b1eSRobert Gordon 		return (FALSE);
14880a701b1eSRobert Gordon 	}
14890a701b1eSRobert Gordon 
14900a701b1eSRobert Gordon 	return (TRUE);
14910a701b1eSRobert Gordon }
1492f837ee4aSSiddheshwar Mahesh 
1493f837ee4aSSiddheshwar Mahesh 
1494f837ee4aSSiddheshwar Mahesh /*
1495f837ee4aSSiddheshwar Mahesh  * Reads one chunk at a time
1496f837ee4aSSiddheshwar Mahesh  */
1497f837ee4aSSiddheshwar Mahesh 
1498f837ee4aSSiddheshwar Mahesh static bool_t
xdrrdma_read_a_chunk(XDR * xdrs,CONN ** conn)1499f837ee4aSSiddheshwar Mahesh xdrrdma_read_a_chunk(XDR *xdrs, CONN **conn)
1500f837ee4aSSiddheshwar Mahesh {
1501f837ee4aSSiddheshwar Mahesh 	int status;
1502f837ee4aSSiddheshwar Mahesh 	int32_t len = 0;
1503f837ee4aSSiddheshwar Mahesh 	xrdma_private_t	*xdrp = (xrdma_private_t *)(xdrs->x_private);
1504f837ee4aSSiddheshwar Mahesh 	struct clist *cle = *(xdrp->xp_rcl_next);
1505f837ee4aSSiddheshwar Mahesh 	struct clist *rclp = xdrp->xp_rcl;
1506f837ee4aSSiddheshwar Mahesh 	struct clist *clp;
1507f837ee4aSSiddheshwar Mahesh 
1508f837ee4aSSiddheshwar Mahesh 	/*
1509f837ee4aSSiddheshwar Mahesh 	 * len is used later to decide xdr offset in
1510f837ee4aSSiddheshwar Mahesh 	 * the chunk factoring any 4-byte XDR alignment
1511f837ee4aSSiddheshwar Mahesh 	 * (See read chunk example top of this file)
1512f837ee4aSSiddheshwar Mahesh 	 */
1513f837ee4aSSiddheshwar Mahesh 	while (rclp != cle) {
1514f837ee4aSSiddheshwar Mahesh 		len += rclp->c_len;
1515f837ee4aSSiddheshwar Mahesh 		rclp = rclp->c_next;
1516f837ee4aSSiddheshwar Mahesh 	}
1517f837ee4aSSiddheshwar Mahesh 
1518f837ee4aSSiddheshwar Mahesh 	len = RNDUP(len) - len;
1519f837ee4aSSiddheshwar Mahesh 
1520f837ee4aSSiddheshwar Mahesh 	ASSERT(xdrs->x_handy <= 0);
1521f837ee4aSSiddheshwar Mahesh 
1522f837ee4aSSiddheshwar Mahesh 	/*
1523f837ee4aSSiddheshwar Mahesh 	 * If this is the first chunk to contain the RPC
1524f837ee4aSSiddheshwar Mahesh 	 * message set xp_off to the xdr offset of the
1525f837ee4aSSiddheshwar Mahesh 	 * inline message.
1526f837ee4aSSiddheshwar Mahesh 	 */
1527f837ee4aSSiddheshwar Mahesh 	if (xdrp->xp_off == 0)
1528f837ee4aSSiddheshwar Mahesh 		xdrp->xp_off = (xdrp->xp_offp - xdrs->x_base);
1529f837ee4aSSiddheshwar Mahesh 
1530f837ee4aSSiddheshwar Mahesh 	if (cle == NULL || (cle->c_xdroff != xdrp->xp_off))
1531f837ee4aSSiddheshwar Mahesh 		return (FALSE);
1532f837ee4aSSiddheshwar Mahesh 
1533f837ee4aSSiddheshwar Mahesh 	/*
1534f837ee4aSSiddheshwar Mahesh 	 * Make a copy of the chunk to read from client.
1535f837ee4aSSiddheshwar Mahesh 	 * Chunks are read on demand, so read only one
1536f837ee4aSSiddheshwar Mahesh 	 * for now.
1537f837ee4aSSiddheshwar Mahesh 	 */
1538f837ee4aSSiddheshwar Mahesh 
1539f837ee4aSSiddheshwar Mahesh 	rclp = clist_alloc();
1540f837ee4aSSiddheshwar Mahesh 	*rclp = *cle;
1541f837ee4aSSiddheshwar Mahesh 	rclp->c_next = NULL;
1542f837ee4aSSiddheshwar Mahesh 
1543f837ee4aSSiddheshwar Mahesh 	xdrp->xp_rcl_next = &cle->c_next;
1544f837ee4aSSiddheshwar Mahesh 
1545f837ee4aSSiddheshwar Mahesh 	/*
1546f837ee4aSSiddheshwar Mahesh 	 * If there is a roundup present, then skip those
1547f837ee4aSSiddheshwar Mahesh 	 * bytes when reading.
1548f837ee4aSSiddheshwar Mahesh 	 */
1549f837ee4aSSiddheshwar Mahesh 	if (len) {
1550f837ee4aSSiddheshwar Mahesh 		rclp->w.c_saddr =
1551f837ee4aSSiddheshwar Mahesh 		    (uint64)(uintptr_t)rclp->w.c_saddr + len;
1552f837ee4aSSiddheshwar Mahesh 			rclp->c_len = rclp->c_len - len;
1553f837ee4aSSiddheshwar Mahesh 	}
1554f837ee4aSSiddheshwar Mahesh 
1555f837ee4aSSiddheshwar Mahesh 	status = xdrrdma_read_from_client(rclp, conn, rclp->c_len);
1556f837ee4aSSiddheshwar Mahesh 
1557f837ee4aSSiddheshwar Mahesh 	if (status == FALSE) {
1558f837ee4aSSiddheshwar Mahesh 		clist_free(rclp);
1559f837ee4aSSiddheshwar Mahesh 		return (status);
1560f837ee4aSSiddheshwar Mahesh 	}
1561f837ee4aSSiddheshwar Mahesh 
1562f837ee4aSSiddheshwar Mahesh 	xdrp->xp_offp = rclp->rb_longbuf.addr;
1563f837ee4aSSiddheshwar Mahesh 	xdrs->x_base = xdrp->xp_offp;
1564f837ee4aSSiddheshwar Mahesh 	xdrs->x_handy = rclp->c_len;
1565f837ee4aSSiddheshwar Mahesh 
1566f837ee4aSSiddheshwar Mahesh 	/*
1567f837ee4aSSiddheshwar Mahesh 	 * This copy of read chunks containing the XDR
1568f837ee4aSSiddheshwar Mahesh 	 * message is freed later in xdrrdma_destroy()
1569f837ee4aSSiddheshwar Mahesh 	 */
1570f837ee4aSSiddheshwar Mahesh 
1571f837ee4aSSiddheshwar Mahesh 	if (xdrp->xp_rcl_xdr) {
1572f837ee4aSSiddheshwar Mahesh 		/* Add the chunk to end of the list */
1573f837ee4aSSiddheshwar Mahesh 		clp = xdrp->xp_rcl_xdr;
1574f837ee4aSSiddheshwar Mahesh 		while (clp->c_next != NULL)
1575f837ee4aSSiddheshwar Mahesh 			clp = clp->c_next;
1576f837ee4aSSiddheshwar Mahesh 		clp->c_next = rclp;
1577f837ee4aSSiddheshwar Mahesh 	} else {
1578f837ee4aSSiddheshwar Mahesh 		xdrp->xp_rcl_xdr = rclp;
1579f837ee4aSSiddheshwar Mahesh 	}
1580f837ee4aSSiddheshwar Mahesh 	return (TRUE);
1581f837ee4aSSiddheshwar Mahesh }
1582f837ee4aSSiddheshwar Mahesh 
1583f837ee4aSSiddheshwar Mahesh static void
xdrrdma_free_xdr_chunks(CONN * conn,struct clist * xdr_rcl)1584f837ee4aSSiddheshwar Mahesh xdrrdma_free_xdr_chunks(CONN *conn, struct clist *xdr_rcl)
1585f837ee4aSSiddheshwar Mahesh {
1586f837ee4aSSiddheshwar Mahesh 	struct clist *cl;
1587f837ee4aSSiddheshwar Mahesh 
1588f837ee4aSSiddheshwar Mahesh 	(void) clist_deregister(conn, xdr_rcl);
1589f837ee4aSSiddheshwar Mahesh 
1590f837ee4aSSiddheshwar Mahesh 	/*
1591f837ee4aSSiddheshwar Mahesh 	 * Read chunks containing parts XDR message are
1592f837ee4aSSiddheshwar Mahesh 	 * special: in case of multiple chunks each has
1593f837ee4aSSiddheshwar Mahesh 	 * its own buffer.
1594f837ee4aSSiddheshwar Mahesh 	 */
1595f837ee4aSSiddheshwar Mahesh 
1596f837ee4aSSiddheshwar Mahesh 	cl = xdr_rcl;
1597f837ee4aSSiddheshwar Mahesh 	while (cl) {
1598f837ee4aSSiddheshwar Mahesh 		rdma_buf_free(conn, &cl->rb_longbuf);
1599f837ee4aSSiddheshwar Mahesh 		cl = cl->c_next;
1600f837ee4aSSiddheshwar Mahesh 	}
1601f837ee4aSSiddheshwar Mahesh 
1602f837ee4aSSiddheshwar Mahesh 	clist_free(xdr_rcl);
1603f837ee4aSSiddheshwar Mahesh }
1604