17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 50a701b1eSRobert Gordon * Common Development and Distribution License (the "License"). 60a701b1eSRobert Gordon * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*60536ef9SKaren Rochford * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 267c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 277c478bd9Sstevel@tonic-gate /* 287c478bd9Sstevel@tonic-gate * Portions of this source code were derived from Berkeley 297c478bd9Sstevel@tonic-gate * 4.3 BSD under license from the Regents of the University of 307c478bd9Sstevel@tonic-gate * California. 317c478bd9Sstevel@tonic-gate */ 327c478bd9Sstevel@tonic-gate 337c478bd9Sstevel@tonic-gate /* 347c478bd9Sstevel@tonic-gate * Server side of RPC over RDMA in the kernel. 357c478bd9Sstevel@tonic-gate */ 367c478bd9Sstevel@tonic-gate 377c478bd9Sstevel@tonic-gate #include <sys/param.h> 387c478bd9Sstevel@tonic-gate #include <sys/types.h> 397c478bd9Sstevel@tonic-gate #include <sys/user.h> 407c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 417c478bd9Sstevel@tonic-gate #include <sys/proc.h> 427c478bd9Sstevel@tonic-gate #include <sys/file.h> 437c478bd9Sstevel@tonic-gate #include <sys/errno.h> 447c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 457c478bd9Sstevel@tonic-gate #include <sys/debug.h> 467c478bd9Sstevel@tonic-gate #include <sys/systm.h> 477c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 487c478bd9Sstevel@tonic-gate #include <sys/kstat.h> 497c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 507c478bd9Sstevel@tonic-gate #include <sys/debug.h> 517c478bd9Sstevel@tonic-gate 527c478bd9Sstevel@tonic-gate #include <rpc/types.h> 537c478bd9Sstevel@tonic-gate #include <rpc/xdr.h> 547c478bd9Sstevel@tonic-gate #include <rpc/auth.h> 557c478bd9Sstevel@tonic-gate #include <rpc/clnt.h> 567c478bd9Sstevel@tonic-gate #include <rpc/rpc_msg.h> 577c478bd9Sstevel@tonic-gate #include <rpc/svc.h> 587c478bd9Sstevel@tonic-gate #include <rpc/rpc_rdma.h> 597c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 607c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 617c478bd9Sstevel@tonic-gate 627c478bd9Sstevel@tonic-gate #include <inet/common.h> 637c478bd9Sstevel@tonic-gate #include <inet/ip.h> 647c478bd9Sstevel@tonic-gate #include <inet/ip6.h> 657c478bd9Sstevel@tonic-gate 660a701b1eSRobert Gordon #include <nfs/nfs.h> 670a701b1eSRobert Gordon #include <sys/sdt.h> 680a701b1eSRobert Gordon 690a701b1eSRobert Gordon #define SVC_RDMA_SUCCESS 0 700a701b1eSRobert Gordon #define SVC_RDMA_FAIL -1 710a701b1eSRobert Gordon 720a701b1eSRobert Gordon #define SVC_CREDIT_FACTOR (0.5) 730a701b1eSRobert Gordon 740a701b1eSRobert Gordon #define MSG_IS_RPCSEC_GSS(msg) \ 750a701b1eSRobert Gordon ((msg)->rm_reply.rp_acpt.ar_verf.oa_flavor == RPCSEC_GSS) 760a701b1eSRobert Gordon 770a701b1eSRobert Gordon 780a701b1eSRobert Gordon uint32_t rdma_bufs_granted = RDMA_BUFS_GRANT; 790a701b1eSRobert Gordon 807c478bd9Sstevel@tonic-gate /* 817c478bd9Sstevel@tonic-gate * RDMA transport specific data associated with SVCMASTERXPRT 827c478bd9Sstevel@tonic-gate */ 837c478bd9Sstevel@tonic-gate struct rdma_data { 847c478bd9Sstevel@tonic-gate SVCMASTERXPRT *rd_xprt; /* back ptr to SVCMASTERXPRT */ 857c478bd9Sstevel@tonic-gate struct rdma_svc_data rd_data; /* rdma data */ 867c478bd9Sstevel@tonic-gate rdma_mod_t *r_mod; /* RDMA module containing ops ptr */ 877c478bd9Sstevel@tonic-gate }; 887c478bd9Sstevel@tonic-gate 897c478bd9Sstevel@tonic-gate /* 907c478bd9Sstevel@tonic-gate * Plugin connection specific data stashed away in clone SVCXPRT 917c478bd9Sstevel@tonic-gate */ 927c478bd9Sstevel@tonic-gate struct clone_rdma_data { 93*60536ef9SKaren Rochford bool_t cloned; /* xprt cloned for thread processing */ 947c478bd9Sstevel@tonic-gate CONN *conn; /* RDMA connection */ 957c478bd9Sstevel@tonic-gate rdma_buf_t rpcbuf; /* RPC req/resp buffer */ 960a701b1eSRobert Gordon struct clist *cl_reply; /* reply chunk buffer info */ 970a701b1eSRobert Gordon struct clist *cl_wlist; /* write list clist */ 987c478bd9Sstevel@tonic-gate }; 997c478bd9Sstevel@tonic-gate 100*60536ef9SKaren Rochford 1017c478bd9Sstevel@tonic-gate #define MAXADDRLEN 128 /* max length for address mask */ 1027c478bd9Sstevel@tonic-gate 1037c478bd9Sstevel@tonic-gate /* 1047c478bd9Sstevel@tonic-gate * Routines exported through ops vector. 1057c478bd9Sstevel@tonic-gate */ 1067c478bd9Sstevel@tonic-gate static bool_t svc_rdma_krecv(SVCXPRT *, mblk_t *, struct rpc_msg *); 1077c478bd9Sstevel@tonic-gate static bool_t svc_rdma_ksend(SVCXPRT *, struct rpc_msg *); 1087c478bd9Sstevel@tonic-gate static bool_t svc_rdma_kgetargs(SVCXPRT *, xdrproc_t, caddr_t); 1097c478bd9Sstevel@tonic-gate static bool_t svc_rdma_kfreeargs(SVCXPRT *, xdrproc_t, caddr_t); 1107c478bd9Sstevel@tonic-gate void svc_rdma_kdestroy(SVCMASTERXPRT *); 1117c478bd9Sstevel@tonic-gate static int svc_rdma_kdup(struct svc_req *, caddr_t, int, 1127c478bd9Sstevel@tonic-gate struct dupreq **, bool_t *); 1137c478bd9Sstevel@tonic-gate static void svc_rdma_kdupdone(struct dupreq *, caddr_t, 1147c478bd9Sstevel@tonic-gate void (*)(), int, int); 1157c478bd9Sstevel@tonic-gate static int32_t *svc_rdma_kgetres(SVCXPRT *, int); 1167c478bd9Sstevel@tonic-gate static void svc_rdma_kfreeres(SVCXPRT *); 1177c478bd9Sstevel@tonic-gate static void svc_rdma_kclone_destroy(SVCXPRT *); 1187c478bd9Sstevel@tonic-gate static void svc_rdma_kstart(SVCMASTERXPRT *); 1197c478bd9Sstevel@tonic-gate void svc_rdma_kstop(SVCMASTERXPRT *); 120*60536ef9SKaren Rochford static void svc_rdma_kclone_xprt(SVCXPRT *, SVCXPRT *); 1217c478bd9Sstevel@tonic-gate 1220a701b1eSRobert Gordon static int svc_process_long_reply(SVCXPRT *, xdrproc_t, 1230a701b1eSRobert Gordon caddr_t, struct rpc_msg *, bool_t, int *, 1240a701b1eSRobert Gordon int *, int *, unsigned int *); 1250a701b1eSRobert Gordon 1260a701b1eSRobert Gordon static int svc_compose_rpcmsg(SVCXPRT *, CONN *, xdrproc_t, 1270a701b1eSRobert Gordon caddr_t, rdma_buf_t *, XDR **, struct rpc_msg *, 1280a701b1eSRobert Gordon bool_t, uint_t *); 1290a701b1eSRobert Gordon static bool_t rpcmsg_length(xdrproc_t, 1300a701b1eSRobert Gordon caddr_t, 1310a701b1eSRobert Gordon struct rpc_msg *, bool_t, int); 1320a701b1eSRobert Gordon 1337c478bd9Sstevel@tonic-gate /* 1347c478bd9Sstevel@tonic-gate * Server transport operations vector. 1357c478bd9Sstevel@tonic-gate */ 1367c478bd9Sstevel@tonic-gate struct svc_ops rdma_svc_ops = { 1377c478bd9Sstevel@tonic-gate svc_rdma_krecv, /* Get requests */ 1387c478bd9Sstevel@tonic-gate svc_rdma_kgetargs, /* Deserialize arguments */ 1397c478bd9Sstevel@tonic-gate svc_rdma_ksend, /* Send reply */ 1407c478bd9Sstevel@tonic-gate svc_rdma_kfreeargs, /* Free argument data space */ 1417c478bd9Sstevel@tonic-gate svc_rdma_kdestroy, /* Destroy transport handle */ 1427c478bd9Sstevel@tonic-gate svc_rdma_kdup, /* Check entry in dup req cache */ 1437c478bd9Sstevel@tonic-gate svc_rdma_kdupdone, /* Mark entry in dup req cache as done */ 1447c478bd9Sstevel@tonic-gate svc_rdma_kgetres, /* Get pointer to response buffer */ 1457c478bd9Sstevel@tonic-gate svc_rdma_kfreeres, /* Destroy pre-serialized response header */ 1467c478bd9Sstevel@tonic-gate svc_rdma_kclone_destroy, /* Destroy a clone xprt */ 147*60536ef9SKaren Rochford svc_rdma_kstart, /* Tell `ready-to-receive' to rpcmod */ 148*60536ef9SKaren Rochford svc_rdma_kclone_xprt /* Transport specific clone xprt */ 1497c478bd9Sstevel@tonic-gate }; 1507c478bd9Sstevel@tonic-gate 1517c478bd9Sstevel@tonic-gate /* 1527c478bd9Sstevel@tonic-gate * Server statistics 1537c478bd9Sstevel@tonic-gate * NOTE: This structure type is duplicated in the NFS fast path. 1547c478bd9Sstevel@tonic-gate */ 1557c478bd9Sstevel@tonic-gate struct { 1567c478bd9Sstevel@tonic-gate kstat_named_t rscalls; 1577c478bd9Sstevel@tonic-gate kstat_named_t rsbadcalls; 1587c478bd9Sstevel@tonic-gate kstat_named_t rsnullrecv; 1597c478bd9Sstevel@tonic-gate kstat_named_t rsbadlen; 1607c478bd9Sstevel@tonic-gate kstat_named_t rsxdrcall; 1617c478bd9Sstevel@tonic-gate kstat_named_t rsdupchecks; 1627c478bd9Sstevel@tonic-gate kstat_named_t rsdupreqs; 1637c478bd9Sstevel@tonic-gate kstat_named_t rslongrpcs; 1640a701b1eSRobert Gordon kstat_named_t rstotalreplies; 1650a701b1eSRobert Gordon kstat_named_t rstotallongreplies; 1660a701b1eSRobert Gordon kstat_named_t rstotalinlinereplies; 1677c478bd9Sstevel@tonic-gate } rdmarsstat = { 1687c478bd9Sstevel@tonic-gate { "calls", KSTAT_DATA_UINT64 }, 1697c478bd9Sstevel@tonic-gate { "badcalls", KSTAT_DATA_UINT64 }, 1707c478bd9Sstevel@tonic-gate { "nullrecv", KSTAT_DATA_UINT64 }, 1717c478bd9Sstevel@tonic-gate { "badlen", KSTAT_DATA_UINT64 }, 1727c478bd9Sstevel@tonic-gate { "xdrcall", KSTAT_DATA_UINT64 }, 1737c478bd9Sstevel@tonic-gate { "dupchecks", KSTAT_DATA_UINT64 }, 1747c478bd9Sstevel@tonic-gate { "dupreqs", KSTAT_DATA_UINT64 }, 1750a701b1eSRobert Gordon { "longrpcs", KSTAT_DATA_UINT64 }, 1760a701b1eSRobert Gordon { "totalreplies", KSTAT_DATA_UINT64 }, 1770a701b1eSRobert Gordon { "totallongreplies", KSTAT_DATA_UINT64 }, 1780a701b1eSRobert Gordon { "totalinlinereplies", KSTAT_DATA_UINT64 }, 1797c478bd9Sstevel@tonic-gate }; 1807c478bd9Sstevel@tonic-gate 1817c478bd9Sstevel@tonic-gate kstat_named_t *rdmarsstat_ptr = (kstat_named_t *)&rdmarsstat; 1827c478bd9Sstevel@tonic-gate uint_t rdmarsstat_ndata = sizeof (rdmarsstat) / sizeof (kstat_named_t); 1837c478bd9Sstevel@tonic-gate 1840a701b1eSRobert Gordon #define RSSTAT_INCR(x) atomic_add_64(&rdmarsstat.x.value.ui64, 1) 1857c478bd9Sstevel@tonic-gate /* 1867c478bd9Sstevel@tonic-gate * Create a transport record. 1877c478bd9Sstevel@tonic-gate * The transport record, output buffer, and private data structure 1887c478bd9Sstevel@tonic-gate * are allocated. The output buffer is serialized into using xdrmem. 1897c478bd9Sstevel@tonic-gate * There is one transport record per user process which implements a 1907c478bd9Sstevel@tonic-gate * set of services. 1917c478bd9Sstevel@tonic-gate */ 1927c478bd9Sstevel@tonic-gate /* ARGSUSED */ 1937c478bd9Sstevel@tonic-gate int 1947c478bd9Sstevel@tonic-gate svc_rdma_kcreate(char *netid, SVC_CALLOUT_TABLE *sct, int id, 1950a701b1eSRobert Gordon rdma_xprt_group_t *started_xprts) 1967c478bd9Sstevel@tonic-gate { 1977c478bd9Sstevel@tonic-gate int error; 1987c478bd9Sstevel@tonic-gate SVCMASTERXPRT *xprt; 1997c478bd9Sstevel@tonic-gate struct rdma_data *rd; 2007c478bd9Sstevel@tonic-gate rdma_registry_t *rmod; 2017c478bd9Sstevel@tonic-gate rdma_xprt_record_t *xprt_rec; 2027c478bd9Sstevel@tonic-gate queue_t *q; 2037c478bd9Sstevel@tonic-gate /* 2047c478bd9Sstevel@tonic-gate * modload the RDMA plugins is not already done. 2057c478bd9Sstevel@tonic-gate */ 2067c478bd9Sstevel@tonic-gate if (!rdma_modloaded) { 2070a701b1eSRobert Gordon /*CONSTANTCONDITION*/ 2080a701b1eSRobert Gordon ASSERT(sizeof (struct clone_rdma_data) <= SVC_P2LEN); 2090a701b1eSRobert Gordon 2107c478bd9Sstevel@tonic-gate mutex_enter(&rdma_modload_lock); 2117c478bd9Sstevel@tonic-gate if (!rdma_modloaded) { 2127c478bd9Sstevel@tonic-gate error = rdma_modload(); 2137c478bd9Sstevel@tonic-gate } 2147c478bd9Sstevel@tonic-gate mutex_exit(&rdma_modload_lock); 2157c478bd9Sstevel@tonic-gate 2167c478bd9Sstevel@tonic-gate if (error) 2177c478bd9Sstevel@tonic-gate return (error); 2187c478bd9Sstevel@tonic-gate } 2197c478bd9Sstevel@tonic-gate 2207c478bd9Sstevel@tonic-gate /* 2217c478bd9Sstevel@tonic-gate * master_xprt_count is the count of master transport handles 2227c478bd9Sstevel@tonic-gate * that were successfully created and are ready to recieve for 2237c478bd9Sstevel@tonic-gate * RDMA based access. 2247c478bd9Sstevel@tonic-gate */ 2257c478bd9Sstevel@tonic-gate error = 0; 2267c478bd9Sstevel@tonic-gate xprt_rec = NULL; 2277c478bd9Sstevel@tonic-gate rw_enter(&rdma_lock, RW_READER); 2287c478bd9Sstevel@tonic-gate if (rdma_mod_head == NULL) { 2297c478bd9Sstevel@tonic-gate started_xprts->rtg_count = 0; 2307c478bd9Sstevel@tonic-gate rw_exit(&rdma_lock); 2317c478bd9Sstevel@tonic-gate if (rdma_dev_available) 2327c478bd9Sstevel@tonic-gate return (EPROTONOSUPPORT); 2337c478bd9Sstevel@tonic-gate else 2347c478bd9Sstevel@tonic-gate return (ENODEV); 2357c478bd9Sstevel@tonic-gate } 2367c478bd9Sstevel@tonic-gate 2377c478bd9Sstevel@tonic-gate /* 2387c478bd9Sstevel@tonic-gate * If we have reached here, then atleast one RDMA plugin has loaded. 2397c478bd9Sstevel@tonic-gate * Create a master_xprt, make it start listenining on the device, 2407c478bd9Sstevel@tonic-gate * if an error is generated, record it, we might need to shut 2417c478bd9Sstevel@tonic-gate * the master_xprt. 2427c478bd9Sstevel@tonic-gate * SVC_START() calls svc_rdma_kstart which calls plugin binding 2437c478bd9Sstevel@tonic-gate * routines. 2447c478bd9Sstevel@tonic-gate */ 2457c478bd9Sstevel@tonic-gate for (rmod = rdma_mod_head; rmod != NULL; rmod = rmod->r_next) { 2467c478bd9Sstevel@tonic-gate 2477c478bd9Sstevel@tonic-gate /* 2487c478bd9Sstevel@tonic-gate * One SVCMASTERXPRT per RDMA plugin. 2497c478bd9Sstevel@tonic-gate */ 2507c478bd9Sstevel@tonic-gate xprt = kmem_zalloc(sizeof (*xprt), KM_SLEEP); 2517c478bd9Sstevel@tonic-gate xprt->xp_ops = &rdma_svc_ops; 2527c478bd9Sstevel@tonic-gate xprt->xp_sct = sct; 2537c478bd9Sstevel@tonic-gate xprt->xp_type = T_RDMA; 2547c478bd9Sstevel@tonic-gate mutex_init(&xprt->xp_req_lock, NULL, MUTEX_DEFAULT, NULL); 2557c478bd9Sstevel@tonic-gate mutex_init(&xprt->xp_thread_lock, NULL, MUTEX_DEFAULT, NULL); 2567c478bd9Sstevel@tonic-gate xprt->xp_req_head = (mblk_t *)0; 2577c478bd9Sstevel@tonic-gate xprt->xp_req_tail = (mblk_t *)0; 2587c478bd9Sstevel@tonic-gate xprt->xp_threads = 0; 2597c478bd9Sstevel@tonic-gate xprt->xp_detached_threads = 0; 2607c478bd9Sstevel@tonic-gate 2617c478bd9Sstevel@tonic-gate rd = kmem_zalloc(sizeof (*rd), KM_SLEEP); 2627c478bd9Sstevel@tonic-gate xprt->xp_p2 = (caddr_t)rd; 2637c478bd9Sstevel@tonic-gate rd->rd_xprt = xprt; 2647c478bd9Sstevel@tonic-gate rd->r_mod = rmod->r_mod; 2657c478bd9Sstevel@tonic-gate 2667c478bd9Sstevel@tonic-gate q = &rd->rd_data.q; 2677c478bd9Sstevel@tonic-gate xprt->xp_wq = q; 2687c478bd9Sstevel@tonic-gate q->q_ptr = &rd->rd_xprt; 2697c478bd9Sstevel@tonic-gate xprt->xp_netid = NULL; 2707c478bd9Sstevel@tonic-gate 2717c478bd9Sstevel@tonic-gate xprt->xp_addrmask.maxlen = 2727c478bd9Sstevel@tonic-gate xprt->xp_addrmask.len = sizeof (struct sockaddr_in); 2737c478bd9Sstevel@tonic-gate xprt->xp_addrmask.buf = 2747c478bd9Sstevel@tonic-gate kmem_zalloc(xprt->xp_addrmask.len, KM_SLEEP); 2757c478bd9Sstevel@tonic-gate ((struct sockaddr_in *)xprt->xp_addrmask.buf)->sin_addr.s_addr = 2767c478bd9Sstevel@tonic-gate (uint32_t)~0; 2777c478bd9Sstevel@tonic-gate ((struct sockaddr_in *)xprt->xp_addrmask.buf)->sin_family = 2787c478bd9Sstevel@tonic-gate (ushort_t)~0; 2797c478bd9Sstevel@tonic-gate 2807c478bd9Sstevel@tonic-gate /* 2817c478bd9Sstevel@tonic-gate * Each of the plugins will have their own Service ID 2827c478bd9Sstevel@tonic-gate * to listener specific mapping, like port number for VI 2837c478bd9Sstevel@tonic-gate * and service name for IB. 2847c478bd9Sstevel@tonic-gate */ 2857c478bd9Sstevel@tonic-gate rd->rd_data.svcid = id; 2867c478bd9Sstevel@tonic-gate error = svc_xprt_register(xprt, id); 2877c478bd9Sstevel@tonic-gate if (error) { 2880a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__xprt__reg); 2897c478bd9Sstevel@tonic-gate goto cleanup; 2907c478bd9Sstevel@tonic-gate } 2917c478bd9Sstevel@tonic-gate 2927c478bd9Sstevel@tonic-gate SVC_START(xprt); 2937c478bd9Sstevel@tonic-gate if (!rd->rd_data.active) { 2947c478bd9Sstevel@tonic-gate svc_xprt_unregister(xprt); 2957c478bd9Sstevel@tonic-gate error = rd->rd_data.err_code; 2967c478bd9Sstevel@tonic-gate goto cleanup; 2977c478bd9Sstevel@tonic-gate } 2987c478bd9Sstevel@tonic-gate 2997c478bd9Sstevel@tonic-gate /* 3007c478bd9Sstevel@tonic-gate * This is set only when there is atleast one or more 3017c478bd9Sstevel@tonic-gate * transports successfully created. We insert the pointer 3027c478bd9Sstevel@tonic-gate * to the created RDMA master xprt into a separately maintained 3037c478bd9Sstevel@tonic-gate * list. This way we can easily reference it later to cleanup, 3047c478bd9Sstevel@tonic-gate * when NFS kRPC service pool is going away/unregistered. 3057c478bd9Sstevel@tonic-gate */ 3067c478bd9Sstevel@tonic-gate started_xprts->rtg_count ++; 3077c478bd9Sstevel@tonic-gate xprt_rec = kmem_alloc(sizeof (*xprt_rec), KM_SLEEP); 3087c478bd9Sstevel@tonic-gate xprt_rec->rtr_xprt_ptr = xprt; 3097c478bd9Sstevel@tonic-gate xprt_rec->rtr_next = started_xprts->rtg_listhead; 3107c478bd9Sstevel@tonic-gate started_xprts->rtg_listhead = xprt_rec; 3117c478bd9Sstevel@tonic-gate continue; 3127c478bd9Sstevel@tonic-gate cleanup: 3137c478bd9Sstevel@tonic-gate SVC_DESTROY(xprt); 3147c478bd9Sstevel@tonic-gate if (error == RDMA_FAILED) 3157c478bd9Sstevel@tonic-gate error = EPROTONOSUPPORT; 3167c478bd9Sstevel@tonic-gate } 3177c478bd9Sstevel@tonic-gate 3187c478bd9Sstevel@tonic-gate rw_exit(&rdma_lock); 3197c478bd9Sstevel@tonic-gate 3207c478bd9Sstevel@tonic-gate /* 3217c478bd9Sstevel@tonic-gate * Don't return any error even if a single plugin was started 3227c478bd9Sstevel@tonic-gate * successfully. 3237c478bd9Sstevel@tonic-gate */ 3247c478bd9Sstevel@tonic-gate if (started_xprts->rtg_count == 0) 3257c478bd9Sstevel@tonic-gate return (error); 3267c478bd9Sstevel@tonic-gate return (0); 3277c478bd9Sstevel@tonic-gate } 3287c478bd9Sstevel@tonic-gate 3297c478bd9Sstevel@tonic-gate /* 3307c478bd9Sstevel@tonic-gate * Cleanup routine for freeing up memory allocated by 3317c478bd9Sstevel@tonic-gate * svc_rdma_kcreate() 3327c478bd9Sstevel@tonic-gate */ 3337c478bd9Sstevel@tonic-gate void 3347c478bd9Sstevel@tonic-gate svc_rdma_kdestroy(SVCMASTERXPRT *xprt) 3357c478bd9Sstevel@tonic-gate { 3367c478bd9Sstevel@tonic-gate struct rdma_data *rd = (struct rdma_data *)xprt->xp_p2; 3377c478bd9Sstevel@tonic-gate 3387c478bd9Sstevel@tonic-gate 3397c478bd9Sstevel@tonic-gate mutex_destroy(&xprt->xp_req_lock); 3407c478bd9Sstevel@tonic-gate mutex_destroy(&xprt->xp_thread_lock); 3417c478bd9Sstevel@tonic-gate kmem_free(rd, sizeof (*rd)); 3427c478bd9Sstevel@tonic-gate kmem_free(xprt->xp_addrmask.buf, xprt->xp_addrmask.maxlen); 3437c478bd9Sstevel@tonic-gate kmem_free(xprt, sizeof (*xprt)); 3447c478bd9Sstevel@tonic-gate } 3457c478bd9Sstevel@tonic-gate 3467c478bd9Sstevel@tonic-gate 3477c478bd9Sstevel@tonic-gate static void 3487c478bd9Sstevel@tonic-gate svc_rdma_kstart(SVCMASTERXPRT *xprt) 3497c478bd9Sstevel@tonic-gate { 3507c478bd9Sstevel@tonic-gate struct rdma_svc_data *svcdata; 3517c478bd9Sstevel@tonic-gate rdma_mod_t *rmod; 3527c478bd9Sstevel@tonic-gate 3537c478bd9Sstevel@tonic-gate svcdata = &((struct rdma_data *)xprt->xp_p2)->rd_data; 3547c478bd9Sstevel@tonic-gate rmod = ((struct rdma_data *)xprt->xp_p2)->r_mod; 3557c478bd9Sstevel@tonic-gate 3567c478bd9Sstevel@tonic-gate /* 3577c478bd9Sstevel@tonic-gate * Create a listener for module at this port 3587c478bd9Sstevel@tonic-gate */ 3597c478bd9Sstevel@tonic-gate 36051f34d4bSRajkumar Sivaprakasam if (rmod->rdma_count != 0) 36151f34d4bSRajkumar Sivaprakasam (*rmod->rdma_ops->rdma_svc_listen)(svcdata); 36251f34d4bSRajkumar Sivaprakasam else 36351f34d4bSRajkumar Sivaprakasam svcdata->err_code = RDMA_FAILED; 3647c478bd9Sstevel@tonic-gate } 3657c478bd9Sstevel@tonic-gate 3667c478bd9Sstevel@tonic-gate void 3677c478bd9Sstevel@tonic-gate svc_rdma_kstop(SVCMASTERXPRT *xprt) 3687c478bd9Sstevel@tonic-gate { 3697c478bd9Sstevel@tonic-gate struct rdma_svc_data *svcdata; 3707c478bd9Sstevel@tonic-gate rdma_mod_t *rmod; 3717c478bd9Sstevel@tonic-gate 3727c478bd9Sstevel@tonic-gate svcdata = &((struct rdma_data *)xprt->xp_p2)->rd_data; 3737c478bd9Sstevel@tonic-gate rmod = ((struct rdma_data *)xprt->xp_p2)->r_mod; 3747c478bd9Sstevel@tonic-gate 3757c478bd9Sstevel@tonic-gate /* 37651f34d4bSRajkumar Sivaprakasam * Call the stop listener routine for each plugin. If rdma_count is 37751f34d4bSRajkumar Sivaprakasam * already zero set active to zero. 3787c478bd9Sstevel@tonic-gate */ 37951f34d4bSRajkumar Sivaprakasam if (rmod->rdma_count != 0) 38051f34d4bSRajkumar Sivaprakasam (*rmod->rdma_ops->rdma_svc_stop)(svcdata); 38151f34d4bSRajkumar Sivaprakasam else 38251f34d4bSRajkumar Sivaprakasam svcdata->active = 0; 3837c478bd9Sstevel@tonic-gate if (svcdata->active) 3840a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__kstop); 3857c478bd9Sstevel@tonic-gate } 3867c478bd9Sstevel@tonic-gate 3877c478bd9Sstevel@tonic-gate /* ARGSUSED */ 3887c478bd9Sstevel@tonic-gate static void 3897c478bd9Sstevel@tonic-gate svc_rdma_kclone_destroy(SVCXPRT *clone_xprt) 3907c478bd9Sstevel@tonic-gate { 391*60536ef9SKaren Rochford 392*60536ef9SKaren Rochford struct clone_rdma_data *cdrp; 393*60536ef9SKaren Rochford cdrp = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 394*60536ef9SKaren Rochford 395*60536ef9SKaren Rochford /* 396*60536ef9SKaren Rochford * Only free buffers and release connection when cloned is set. 397*60536ef9SKaren Rochford */ 398*60536ef9SKaren Rochford if (cdrp->cloned != TRUE) 399*60536ef9SKaren Rochford return; 400*60536ef9SKaren Rochford 401*60536ef9SKaren Rochford rdma_buf_free(cdrp->conn, &cdrp->rpcbuf); 402*60536ef9SKaren Rochford if (cdrp->cl_reply) { 403*60536ef9SKaren Rochford clist_free(cdrp->cl_reply); 404*60536ef9SKaren Rochford cdrp->cl_reply = NULL; 405*60536ef9SKaren Rochford } 406*60536ef9SKaren Rochford RDMA_REL_CONN(cdrp->conn); 407*60536ef9SKaren Rochford 408*60536ef9SKaren Rochford cdrp->cloned = 0; 4097c478bd9Sstevel@tonic-gate } 4107c478bd9Sstevel@tonic-gate 411*60536ef9SKaren Rochford /* 412*60536ef9SKaren Rochford * Clone the xprt specific information. It will be freed by 413*60536ef9SKaren Rochford * SVC_CLONE_DESTROY. 414*60536ef9SKaren Rochford */ 415*60536ef9SKaren Rochford static void 416*60536ef9SKaren Rochford svc_rdma_kclone_xprt(SVCXPRT *src_xprt, SVCXPRT *dst_xprt) 417*60536ef9SKaren Rochford { 418*60536ef9SKaren Rochford struct clone_rdma_data *srcp2; 419*60536ef9SKaren Rochford struct clone_rdma_data *dstp2; 420*60536ef9SKaren Rochford 421*60536ef9SKaren Rochford srcp2 = (struct clone_rdma_data *)src_xprt->xp_p2buf; 422*60536ef9SKaren Rochford dstp2 = (struct clone_rdma_data *)dst_xprt->xp_p2buf; 423*60536ef9SKaren Rochford 424*60536ef9SKaren Rochford if (srcp2->conn != NULL) { 425*60536ef9SKaren Rochford srcp2->cloned = TRUE; 426*60536ef9SKaren Rochford *dstp2 = *srcp2; 427*60536ef9SKaren Rochford } 428*60536ef9SKaren Rochford } 429*60536ef9SKaren Rochford 430*60536ef9SKaren Rochford 4317c478bd9Sstevel@tonic-gate static bool_t 4327c478bd9Sstevel@tonic-gate svc_rdma_krecv(SVCXPRT *clone_xprt, mblk_t *mp, struct rpc_msg *msg) 4337c478bd9Sstevel@tonic-gate { 4340a701b1eSRobert Gordon XDR *xdrs; 4350a701b1eSRobert Gordon CONN *conn; 4360a701b1eSRobert Gordon rdma_recv_data_t *rdp = (rdma_recv_data_t *)mp->b_rptr; 4370a701b1eSRobert Gordon struct clone_rdma_data *crdp; 4380a701b1eSRobert Gordon struct clist *cl = NULL; 4390a701b1eSRobert Gordon struct clist *wcl = NULL; 4400a701b1eSRobert Gordon struct clist *cllong = NULL; 4410a701b1eSRobert Gordon 4420a701b1eSRobert Gordon rdma_stat status; 4430a701b1eSRobert Gordon uint32_t vers, op, pos, xid; 4440a701b1eSRobert Gordon uint32_t rdma_credit; 4450a701b1eSRobert Gordon uint32_t wcl_total_length = 0; 4460a701b1eSRobert Gordon bool_t wwl = FALSE; 4470a701b1eSRobert Gordon 4480a701b1eSRobert Gordon crdp = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 4497c478bd9Sstevel@tonic-gate RSSTAT_INCR(rscalls); 4507c478bd9Sstevel@tonic-gate conn = rdp->conn; 4517c478bd9Sstevel@tonic-gate 4527c478bd9Sstevel@tonic-gate status = rdma_svc_postrecv(conn); 4537c478bd9Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 4540a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__krecv__postrecv); 4550a701b1eSRobert Gordon goto badrpc_call; 4567c478bd9Sstevel@tonic-gate } 4577c478bd9Sstevel@tonic-gate 4587c478bd9Sstevel@tonic-gate xdrs = &clone_xprt->xp_xdrin; 4597c478bd9Sstevel@tonic-gate xdrmem_create(xdrs, rdp->rpcmsg.addr, rdp->rpcmsg.len, XDR_DECODE); 4607c478bd9Sstevel@tonic-gate xid = *(uint32_t *)rdp->rpcmsg.addr; 4617c478bd9Sstevel@tonic-gate XDR_SETPOS(xdrs, sizeof (uint32_t)); 4620a701b1eSRobert Gordon 4637c478bd9Sstevel@tonic-gate if (! xdr_u_int(xdrs, &vers) || 4640a701b1eSRobert Gordon ! xdr_u_int(xdrs, &rdma_credit) || 4657c478bd9Sstevel@tonic-gate ! xdr_u_int(xdrs, &op)) { 4660a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__krecv__uint); 4670a701b1eSRobert Gordon goto xdr_err; 4687c478bd9Sstevel@tonic-gate } 4690a701b1eSRobert Gordon 4700a701b1eSRobert Gordon /* Checking if the status of the recv operation was normal */ 4710a701b1eSRobert Gordon if (rdp->status != 0) { 4720a701b1eSRobert Gordon DTRACE_PROBE1(krpc__e__svcrdma__krecv__invalid__status, 4730a701b1eSRobert Gordon int, rdp->status); 4740a701b1eSRobert Gordon goto badrpc_call; 4757c478bd9Sstevel@tonic-gate } 4767c478bd9Sstevel@tonic-gate 4777c478bd9Sstevel@tonic-gate if (! xdr_do_clist(xdrs, &cl)) { 4780a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__krecv__do__clist); 4790a701b1eSRobert Gordon goto xdr_err; 4807c478bd9Sstevel@tonic-gate } 4817c478bd9Sstevel@tonic-gate 4820a701b1eSRobert Gordon if (!xdr_decode_wlist_svc(xdrs, &wcl, &wwl, &wcl_total_length, conn)) { 4830a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__krecv__decode__wlist); 4840a701b1eSRobert Gordon if (cl) 4850a701b1eSRobert Gordon clist_free(cl); 4860a701b1eSRobert Gordon goto xdr_err; 4870a701b1eSRobert Gordon } 4880a701b1eSRobert Gordon crdp->cl_wlist = wcl; 4890a701b1eSRobert Gordon 4900a701b1eSRobert Gordon crdp->cl_reply = NULL; 4910a701b1eSRobert Gordon (void) xdr_decode_reply_wchunk(xdrs, &crdp->cl_reply); 4920a701b1eSRobert Gordon 4937c478bd9Sstevel@tonic-gate /* 4947c478bd9Sstevel@tonic-gate * A chunk at 0 offset indicates that the RPC call message 4957c478bd9Sstevel@tonic-gate * is in a chunk. Get the RPC call message chunk. 4967c478bd9Sstevel@tonic-gate */ 4977c478bd9Sstevel@tonic-gate if (cl != NULL && op == RDMA_NOMSG) { 4987c478bd9Sstevel@tonic-gate 4997c478bd9Sstevel@tonic-gate /* Remove RPC call message chunk from chunklist */ 5007c478bd9Sstevel@tonic-gate cllong = cl; 5017c478bd9Sstevel@tonic-gate cl = cl->c_next; 5027c478bd9Sstevel@tonic-gate cllong->c_next = NULL; 5037c478bd9Sstevel@tonic-gate 5040a701b1eSRobert Gordon 5057c478bd9Sstevel@tonic-gate /* Allocate and register memory for the RPC call msg chunk */ 5060a701b1eSRobert Gordon cllong->rb_longbuf.type = RDMA_LONG_BUFFER; 5070a701b1eSRobert Gordon cllong->rb_longbuf.len = cllong->c_len > LONG_REPLY_LEN ? 5080a701b1eSRobert Gordon cllong->c_len : LONG_REPLY_LEN; 5090a701b1eSRobert Gordon 5100a701b1eSRobert Gordon if (rdma_buf_alloc(conn, &cllong->rb_longbuf)) { 5117c478bd9Sstevel@tonic-gate clist_free(cllong); 5120a701b1eSRobert Gordon goto cll_malloc_err; 5137c478bd9Sstevel@tonic-gate } 5140a701b1eSRobert Gordon 5150a701b1eSRobert Gordon cllong->u.c_daddr3 = cllong->rb_longbuf.addr; 5160a701b1eSRobert Gordon 5170a701b1eSRobert Gordon if (cllong->u.c_daddr == NULL) { 5180a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__krecv__nomem); 5190a701b1eSRobert Gordon rdma_buf_free(conn, &cllong->rb_longbuf); 5200a701b1eSRobert Gordon clist_free(cllong); 5210a701b1eSRobert Gordon goto cll_malloc_err; 5220a701b1eSRobert Gordon } 5230a701b1eSRobert Gordon 5240a701b1eSRobert Gordon status = clist_register(conn, cllong, CLIST_REG_DST); 5257c478bd9Sstevel@tonic-gate if (status) { 5260a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__krecv__clist__reg); 5270a701b1eSRobert Gordon rdma_buf_free(conn, &cllong->rb_longbuf); 5287c478bd9Sstevel@tonic-gate clist_free(cllong); 5290a701b1eSRobert Gordon goto cll_malloc_err; 5307c478bd9Sstevel@tonic-gate } 5317c478bd9Sstevel@tonic-gate 5327c478bd9Sstevel@tonic-gate /* 5337c478bd9Sstevel@tonic-gate * Now read the RPC call message in 5347c478bd9Sstevel@tonic-gate */ 5357c478bd9Sstevel@tonic-gate status = RDMA_READ(conn, cllong, WAIT); 5367c478bd9Sstevel@tonic-gate if (status) { 5370a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__krecv__read); 538f837ee4aSSiddheshwar Mahesh (void) clist_deregister(conn, cllong); 5390a701b1eSRobert Gordon rdma_buf_free(conn, &cllong->rb_longbuf); 5407c478bd9Sstevel@tonic-gate clist_free(cllong); 5410a701b1eSRobert Gordon goto cll_malloc_err; 5427c478bd9Sstevel@tonic-gate } 5437c478bd9Sstevel@tonic-gate 5440a701b1eSRobert Gordon status = clist_syncmem(conn, cllong, CLIST_REG_DST); 545f837ee4aSSiddheshwar Mahesh (void) clist_deregister(conn, cllong); 5467c478bd9Sstevel@tonic-gate 5470a701b1eSRobert Gordon xdrrdma_create(xdrs, (caddr_t)(uintptr_t)cllong->u.c_daddr3, 5487c478bd9Sstevel@tonic-gate cllong->c_len, 0, cl, XDR_DECODE, conn); 5497c478bd9Sstevel@tonic-gate 5500a701b1eSRobert Gordon crdp->rpcbuf = cllong->rb_longbuf; 5510a701b1eSRobert Gordon crdp->rpcbuf.len = cllong->c_len; 5527c478bd9Sstevel@tonic-gate clist_free(cllong); 5537c478bd9Sstevel@tonic-gate RDMA_BUF_FREE(conn, &rdp->rpcmsg); 5547c478bd9Sstevel@tonic-gate } else { 5557c478bd9Sstevel@tonic-gate pos = XDR_GETPOS(xdrs); 5567c478bd9Sstevel@tonic-gate xdrrdma_create(xdrs, rdp->rpcmsg.addr + pos, 5570a701b1eSRobert Gordon rdp->rpcmsg.len - pos, 0, cl, XDR_DECODE, conn); 5580a701b1eSRobert Gordon crdp->rpcbuf = rdp->rpcmsg; 5590a701b1eSRobert Gordon 5600a701b1eSRobert Gordon /* Use xdrrdmablk_ops to indicate there is a read chunk list */ 5610a701b1eSRobert Gordon if (cl != NULL) { 5620a701b1eSRobert Gordon int32_t flg = XDR_RDMA_RLIST_REG; 5630a701b1eSRobert Gordon 5640a701b1eSRobert Gordon XDR_CONTROL(xdrs, XDR_RDMA_SET_FLAGS, &flg); 5650a701b1eSRobert Gordon xdrs->x_ops = &xdrrdmablk_ops; 5660a701b1eSRobert Gordon } 5677c478bd9Sstevel@tonic-gate } 5680a701b1eSRobert Gordon 5690a701b1eSRobert Gordon if (crdp->cl_wlist) { 5700a701b1eSRobert Gordon int32_t flg = XDR_RDMA_WLIST_REG; 5710a701b1eSRobert Gordon 5720a701b1eSRobert Gordon XDR_CONTROL(xdrs, XDR_RDMA_SET_WLIST, crdp->cl_wlist); 5730a701b1eSRobert Gordon XDR_CONTROL(xdrs, XDR_RDMA_SET_FLAGS, &flg); 5740a701b1eSRobert Gordon } 5750a701b1eSRobert Gordon 5767c478bd9Sstevel@tonic-gate if (! xdr_callmsg(xdrs, msg)) { 5770a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__krecv__callmsg); 5787c478bd9Sstevel@tonic-gate RSSTAT_INCR(rsxdrcall); 5790a701b1eSRobert Gordon goto callmsg_err; 5807c478bd9Sstevel@tonic-gate } 5817c478bd9Sstevel@tonic-gate 5827c478bd9Sstevel@tonic-gate /* 5837c478bd9Sstevel@tonic-gate * Point the remote transport address in the service_transport 5847c478bd9Sstevel@tonic-gate * handle at the address in the request. 5857c478bd9Sstevel@tonic-gate */ 5867c478bd9Sstevel@tonic-gate clone_xprt->xp_rtaddr.buf = conn->c_raddr.buf; 5877c478bd9Sstevel@tonic-gate clone_xprt->xp_rtaddr.len = conn->c_raddr.len; 5887c478bd9Sstevel@tonic-gate clone_xprt->xp_rtaddr.maxlen = conn->c_raddr.len; 5897523bef8SSiddheshwar Mahesh 5907523bef8SSiddheshwar Mahesh clone_xprt->xp_lcladdr.buf = conn->c_laddr.buf; 5917523bef8SSiddheshwar Mahesh clone_xprt->xp_lcladdr.len = conn->c_laddr.len; 5927523bef8SSiddheshwar Mahesh clone_xprt->xp_lcladdr.maxlen = conn->c_laddr.len; 5937523bef8SSiddheshwar Mahesh 5947523bef8SSiddheshwar Mahesh /* 5957523bef8SSiddheshwar Mahesh * In case of RDMA, connection management is 5967523bef8SSiddheshwar Mahesh * entirely done in rpcib module and netid in the 5977523bef8SSiddheshwar Mahesh * SVCMASTERXPRT is NULL. Initialize the clone netid 5987523bef8SSiddheshwar Mahesh * from the connection. 5997523bef8SSiddheshwar Mahesh */ 6007523bef8SSiddheshwar Mahesh 6017523bef8SSiddheshwar Mahesh clone_xprt->xp_netid = conn->c_netid; 6027523bef8SSiddheshwar Mahesh 6037c478bd9Sstevel@tonic-gate clone_xprt->xp_xid = xid; 6040a701b1eSRobert Gordon crdp->conn = conn; 6050a701b1eSRobert Gordon 6067c478bd9Sstevel@tonic-gate freeb(mp); 6070a701b1eSRobert Gordon 6087c478bd9Sstevel@tonic-gate return (TRUE); 6090a701b1eSRobert Gordon 6100a701b1eSRobert Gordon callmsg_err: 6110a701b1eSRobert Gordon rdma_buf_free(conn, &crdp->rpcbuf); 6120a701b1eSRobert Gordon 6130a701b1eSRobert Gordon cll_malloc_err: 6140a701b1eSRobert Gordon if (cl) 6150a701b1eSRobert Gordon clist_free(cl); 6160a701b1eSRobert Gordon xdr_err: 6170a701b1eSRobert Gordon XDR_DESTROY(xdrs); 6180a701b1eSRobert Gordon 6190a701b1eSRobert Gordon badrpc_call: 6200a701b1eSRobert Gordon RDMA_BUF_FREE(conn, &rdp->rpcmsg); 6210a701b1eSRobert Gordon RDMA_REL_CONN(conn); 6220a701b1eSRobert Gordon freeb(mp); 6230a701b1eSRobert Gordon RSSTAT_INCR(rsbadcalls); 6240a701b1eSRobert Gordon return (FALSE); 6250a701b1eSRobert Gordon } 6260a701b1eSRobert Gordon 6270a701b1eSRobert Gordon static int 6280a701b1eSRobert Gordon svc_process_long_reply(SVCXPRT * clone_xprt, 6290a701b1eSRobert Gordon xdrproc_t xdr_results, caddr_t xdr_location, 6300a701b1eSRobert Gordon struct rpc_msg *msg, bool_t has_args, int *msglen, 6310a701b1eSRobert Gordon int *freelen, int *numchunks, unsigned int *final_len) 6320a701b1eSRobert Gordon { 6330a701b1eSRobert Gordon int status; 6340a701b1eSRobert Gordon XDR xdrslong; 6350a701b1eSRobert Gordon struct clist *wcl = NULL; 6360a701b1eSRobert Gordon int count = 0; 6370a701b1eSRobert Gordon int alloc_len; 6380a701b1eSRobert Gordon char *memp; 6390a701b1eSRobert Gordon rdma_buf_t long_rpc = {0}; 6400a701b1eSRobert Gordon struct clone_rdma_data *crdp; 6410a701b1eSRobert Gordon 6420a701b1eSRobert Gordon crdp = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 6430a701b1eSRobert Gordon 6440a701b1eSRobert Gordon bzero(&xdrslong, sizeof (xdrslong)); 6450a701b1eSRobert Gordon 6460a701b1eSRobert Gordon /* Choose a size for the long rpc response */ 6470a701b1eSRobert Gordon if (MSG_IS_RPCSEC_GSS(msg)) { 6480a701b1eSRobert Gordon alloc_len = RNDUP(MAX_AUTH_BYTES + *msglen); 6490a701b1eSRobert Gordon } else { 6500a701b1eSRobert Gordon alloc_len = RNDUP(*msglen); 6510a701b1eSRobert Gordon } 6520a701b1eSRobert Gordon 6530a701b1eSRobert Gordon if (alloc_len <= 64 * 1024) { 6540a701b1eSRobert Gordon if (alloc_len > 32 * 1024) { 6550a701b1eSRobert Gordon alloc_len = 64 * 1024; 6560a701b1eSRobert Gordon } else { 6570a701b1eSRobert Gordon if (alloc_len > 16 * 1024) { 6580a701b1eSRobert Gordon alloc_len = 32 * 1024; 6590a701b1eSRobert Gordon } else { 6600a701b1eSRobert Gordon alloc_len = 16 * 1024; 6610a701b1eSRobert Gordon } 6620a701b1eSRobert Gordon } 6630a701b1eSRobert Gordon } 6640a701b1eSRobert Gordon 6650a701b1eSRobert Gordon long_rpc.type = RDMA_LONG_BUFFER; 6660a701b1eSRobert Gordon long_rpc.len = alloc_len; 6670a701b1eSRobert Gordon if (rdma_buf_alloc(crdp->conn, &long_rpc)) { 6680a701b1eSRobert Gordon return (SVC_RDMA_FAIL); 6690a701b1eSRobert Gordon } 6700a701b1eSRobert Gordon 6710a701b1eSRobert Gordon memp = long_rpc.addr; 6720a701b1eSRobert Gordon xdrmem_create(&xdrslong, memp, alloc_len, XDR_ENCODE); 6730a701b1eSRobert Gordon 6740a701b1eSRobert Gordon msg->rm_xid = clone_xprt->xp_xid; 6750a701b1eSRobert Gordon 6760a701b1eSRobert Gordon if (!(xdr_replymsg(&xdrslong, msg) && 6770a701b1eSRobert Gordon (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, &xdrslong, 6780a701b1eSRobert Gordon xdr_results, xdr_location)))) { 6790a701b1eSRobert Gordon rdma_buf_free(crdp->conn, &long_rpc); 6800a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__longrep__authwrap); 6810a701b1eSRobert Gordon return (SVC_RDMA_FAIL); 6820a701b1eSRobert Gordon } 6830a701b1eSRobert Gordon 6840a701b1eSRobert Gordon *final_len = XDR_GETPOS(&xdrslong); 6850a701b1eSRobert Gordon 686f837ee4aSSiddheshwar Mahesh DTRACE_PROBE1(krpc__i__replylen, uint_t, *final_len); 6870a701b1eSRobert Gordon *numchunks = 0; 6880a701b1eSRobert Gordon *freelen = 0; 6890a701b1eSRobert Gordon 6900a701b1eSRobert Gordon wcl = crdp->cl_reply; 6910a701b1eSRobert Gordon wcl->rb_longbuf = long_rpc; 6920a701b1eSRobert Gordon 6930a701b1eSRobert Gordon count = *final_len; 694f837ee4aSSiddheshwar Mahesh while ((wcl != NULL) && (count > 0)) { 695f837ee4aSSiddheshwar Mahesh 6960a701b1eSRobert Gordon if (wcl->c_dmemhandle.mrc_rmr == 0) 6970a701b1eSRobert Gordon break; 6980a701b1eSRobert Gordon 699f837ee4aSSiddheshwar Mahesh DTRACE_PROBE2(krpc__i__write__chunks, uint32_t, count, 700f837ee4aSSiddheshwar Mahesh uint32_t, wcl->c_len); 701f837ee4aSSiddheshwar Mahesh 7020a701b1eSRobert Gordon if (wcl->c_len > count) { 7030a701b1eSRobert Gordon wcl->c_len = count; 7040a701b1eSRobert Gordon } 7050a701b1eSRobert Gordon wcl->w.c_saddr3 = (caddr_t)memp; 7060a701b1eSRobert Gordon 7070a701b1eSRobert Gordon count -= wcl->c_len; 7080a701b1eSRobert Gordon *numchunks += 1; 7090a701b1eSRobert Gordon memp += wcl->c_len; 7100a701b1eSRobert Gordon wcl = wcl->c_next; 7110a701b1eSRobert Gordon } 7120a701b1eSRobert Gordon 713f837ee4aSSiddheshwar Mahesh /* 714f837ee4aSSiddheshwar Mahesh * Make rest of the chunks 0-len 715f837ee4aSSiddheshwar Mahesh */ 716f837ee4aSSiddheshwar Mahesh while (wcl != NULL) { 717f837ee4aSSiddheshwar Mahesh if (wcl->c_dmemhandle.mrc_rmr == 0) 718f837ee4aSSiddheshwar Mahesh break; 719f837ee4aSSiddheshwar Mahesh wcl->c_len = 0; 720f837ee4aSSiddheshwar Mahesh wcl = wcl->c_next; 721f837ee4aSSiddheshwar Mahesh } 722f837ee4aSSiddheshwar Mahesh 7230a701b1eSRobert Gordon wcl = crdp->cl_reply; 7240a701b1eSRobert Gordon 7250a701b1eSRobert Gordon /* 7260a701b1eSRobert Gordon * MUST fail if there are still more data 7270a701b1eSRobert Gordon */ 7280a701b1eSRobert Gordon if (count > 0) { 7290a701b1eSRobert Gordon rdma_buf_free(crdp->conn, &long_rpc); 7300a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__longrep__dlen__clist); 7310a701b1eSRobert Gordon return (SVC_RDMA_FAIL); 7320a701b1eSRobert Gordon } 7330a701b1eSRobert Gordon 7340a701b1eSRobert Gordon if (clist_register(crdp->conn, wcl, CLIST_REG_SOURCE) != RDMA_SUCCESS) { 7350a701b1eSRobert Gordon rdma_buf_free(crdp->conn, &long_rpc); 7360a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__longrep__clistreg); 7370a701b1eSRobert Gordon return (SVC_RDMA_FAIL); 7380a701b1eSRobert Gordon } 7390a701b1eSRobert Gordon 7400a701b1eSRobert Gordon status = clist_syncmem(crdp->conn, wcl, CLIST_REG_SOURCE); 7410a701b1eSRobert Gordon 7420a701b1eSRobert Gordon if (status) { 743f837ee4aSSiddheshwar Mahesh (void) clist_deregister(crdp->conn, wcl); 7440a701b1eSRobert Gordon rdma_buf_free(crdp->conn, &long_rpc); 7450a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__longrep__syncmem); 7460a701b1eSRobert Gordon return (SVC_RDMA_FAIL); 7470a701b1eSRobert Gordon } 7480a701b1eSRobert Gordon 7490a701b1eSRobert Gordon status = RDMA_WRITE(crdp->conn, wcl, WAIT); 7500a701b1eSRobert Gordon 751f837ee4aSSiddheshwar Mahesh (void) clist_deregister(crdp->conn, wcl); 7520a701b1eSRobert Gordon rdma_buf_free(crdp->conn, &wcl->rb_longbuf); 7530a701b1eSRobert Gordon 7540a701b1eSRobert Gordon if (status != RDMA_SUCCESS) { 7550a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__longrep__write); 7560a701b1eSRobert Gordon return (SVC_RDMA_FAIL); 7570a701b1eSRobert Gordon } 7580a701b1eSRobert Gordon 7590a701b1eSRobert Gordon return (SVC_RDMA_SUCCESS); 7600a701b1eSRobert Gordon } 7610a701b1eSRobert Gordon 7620a701b1eSRobert Gordon 7630a701b1eSRobert Gordon static int 7640a701b1eSRobert Gordon svc_compose_rpcmsg(SVCXPRT * clone_xprt, CONN * conn, xdrproc_t xdr_results, 7650a701b1eSRobert Gordon caddr_t xdr_location, rdma_buf_t *rpcreply, XDR ** xdrs, 7660a701b1eSRobert Gordon struct rpc_msg *msg, bool_t has_args, uint_t *len) 7670a701b1eSRobert Gordon { 7680a701b1eSRobert Gordon /* 7690a701b1eSRobert Gordon * Get a pre-allocated buffer for rpc reply 7700a701b1eSRobert Gordon */ 7710a701b1eSRobert Gordon rpcreply->type = SEND_BUFFER; 7720a701b1eSRobert Gordon if (rdma_buf_alloc(conn, rpcreply)) { 7730a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__rpcmsg__reply__nofreebufs); 7740a701b1eSRobert Gordon return (SVC_RDMA_FAIL); 7750a701b1eSRobert Gordon } 7760a701b1eSRobert Gordon 7770a701b1eSRobert Gordon xdrrdma_create(*xdrs, rpcreply->addr, rpcreply->len, 7780a701b1eSRobert Gordon 0, NULL, XDR_ENCODE, conn); 7790a701b1eSRobert Gordon 7800a701b1eSRobert Gordon msg->rm_xid = clone_xprt->xp_xid; 7810a701b1eSRobert Gordon 7820a701b1eSRobert Gordon if (has_args) { 7830a701b1eSRobert Gordon if (!(xdr_replymsg(*xdrs, msg) && 7840a701b1eSRobert Gordon (!has_args || 7850a701b1eSRobert Gordon SVCAUTH_WRAP(&clone_xprt->xp_auth, *xdrs, 7860a701b1eSRobert Gordon xdr_results, xdr_location)))) { 7870a701b1eSRobert Gordon rdma_buf_free(conn, rpcreply); 7880a701b1eSRobert Gordon DTRACE_PROBE( 7890a701b1eSRobert Gordon krpc__e__svcrdma__rpcmsg__reply__authwrap1); 7900a701b1eSRobert Gordon return (SVC_RDMA_FAIL); 7910a701b1eSRobert Gordon } 7920a701b1eSRobert Gordon } else { 7930a701b1eSRobert Gordon if (!xdr_replymsg(*xdrs, msg)) { 7940a701b1eSRobert Gordon rdma_buf_free(conn, rpcreply); 7950a701b1eSRobert Gordon DTRACE_PROBE( 7960a701b1eSRobert Gordon krpc__e__svcrdma__rpcmsg__reply__authwrap2); 7970a701b1eSRobert Gordon return (SVC_RDMA_FAIL); 7980a701b1eSRobert Gordon } 7990a701b1eSRobert Gordon } 8000a701b1eSRobert Gordon 8010a701b1eSRobert Gordon *len = XDR_GETPOS(*xdrs); 8020a701b1eSRobert Gordon 8030a701b1eSRobert Gordon return (SVC_RDMA_SUCCESS); 8047c478bd9Sstevel@tonic-gate } 8057c478bd9Sstevel@tonic-gate 8067c478bd9Sstevel@tonic-gate /* 8077c478bd9Sstevel@tonic-gate * Send rpc reply. 8087c478bd9Sstevel@tonic-gate */ 8097c478bd9Sstevel@tonic-gate static bool_t 8100a701b1eSRobert Gordon svc_rdma_ksend(SVCXPRT * clone_xprt, struct rpc_msg *msg) 8117c478bd9Sstevel@tonic-gate { 8120a701b1eSRobert Gordon XDR *xdrs_rpc = &(clone_xprt->xp_xdrout); 8130a701b1eSRobert Gordon XDR xdrs_rhdr; 8140a701b1eSRobert Gordon CONN *conn = NULL; 8150a701b1eSRobert Gordon rdma_buf_t rbuf_resp = {0}, rbuf_rpc_resp = {0}; 8160a701b1eSRobert Gordon 8170a701b1eSRobert Gordon struct clone_rdma_data *crdp; 8180a701b1eSRobert Gordon struct clist *cl_read = NULL; 8190a701b1eSRobert Gordon struct clist *cl_send = NULL; 8200a701b1eSRobert Gordon struct clist *cl_write = NULL; 8210a701b1eSRobert Gordon xdrproc_t xdr_results; /* results XDR encoding function */ 8220a701b1eSRobert Gordon caddr_t xdr_location; /* response results pointer */ 8230a701b1eSRobert Gordon 8247c478bd9Sstevel@tonic-gate int retval = FALSE; 8250a701b1eSRobert Gordon int status, msglen, num_wreply_segments = 0; 8260a701b1eSRobert Gordon uint32_t rdma_credit = 0; 8270a701b1eSRobert Gordon int freelen = 0; 8280a701b1eSRobert Gordon bool_t has_args; 8290a701b1eSRobert Gordon uint_t final_resp_len, rdma_response_op, vers; 8307c478bd9Sstevel@tonic-gate 8310a701b1eSRobert Gordon bzero(&xdrs_rhdr, sizeof (XDR)); 8320a701b1eSRobert Gordon crdp = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 8330a701b1eSRobert Gordon conn = crdp->conn; 8347c478bd9Sstevel@tonic-gate 8357c478bd9Sstevel@tonic-gate /* 8367c478bd9Sstevel@tonic-gate * If there is a result procedure specified in the reply message, 8377c478bd9Sstevel@tonic-gate * it will be processed in the xdr_replymsg and SVCAUTH_WRAP. 8387c478bd9Sstevel@tonic-gate * We need to make sure it won't be processed twice, so we null 8397c478bd9Sstevel@tonic-gate * it for xdr_replymsg here. 8407c478bd9Sstevel@tonic-gate */ 8417c478bd9Sstevel@tonic-gate has_args = FALSE; 8427c478bd9Sstevel@tonic-gate if (msg->rm_reply.rp_stat == MSG_ACCEPTED && 8437c478bd9Sstevel@tonic-gate msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { 8447c478bd9Sstevel@tonic-gate if ((xdr_results = msg->acpted_rply.ar_results.proc) != NULL) { 8457c478bd9Sstevel@tonic-gate has_args = TRUE; 8467c478bd9Sstevel@tonic-gate xdr_location = msg->acpted_rply.ar_results.where; 8477c478bd9Sstevel@tonic-gate msg->acpted_rply.ar_results.proc = xdr_void; 8487c478bd9Sstevel@tonic-gate msg->acpted_rply.ar_results.where = NULL; 8497c478bd9Sstevel@tonic-gate } 8507c478bd9Sstevel@tonic-gate } 8517c478bd9Sstevel@tonic-gate 8527c478bd9Sstevel@tonic-gate /* 8530a701b1eSRobert Gordon * Given the limit on the inline response size (RPC_MSG_SZ), 8540a701b1eSRobert Gordon * there is a need to make a guess as to the overall size of 8550a701b1eSRobert Gordon * the response. If the resultant size is beyond the inline 8560a701b1eSRobert Gordon * size, then the server needs to use the "reply chunk list" 8570a701b1eSRobert Gordon * provided by the client (if the client provided one). An 8580a701b1eSRobert Gordon * example of this type of response would be a READDIR 8590a701b1eSRobert Gordon * response (e.g. a small directory read would fit in RPC_MSG_SZ 8600a701b1eSRobert Gordon * and that is the preference but it may not fit) 8610a701b1eSRobert Gordon * 8620a701b1eSRobert Gordon * Combine the encoded size and the size of the true results 8630a701b1eSRobert Gordon * and then make the decision about where to encode and send results. 8640a701b1eSRobert Gordon * 8650a701b1eSRobert Gordon * One important note, this calculation is ignoring the size 8660a701b1eSRobert Gordon * of the encoding of the authentication overhead. The reason 8670a701b1eSRobert Gordon * for this is rooted in the complexities of access to the 8680a701b1eSRobert Gordon * encoded size of RPCSEC_GSS related authentiation, 8690a701b1eSRobert Gordon * integrity, and privacy. 8700a701b1eSRobert Gordon * 8710a701b1eSRobert Gordon * If it turns out that the encoded authentication bumps the 8720a701b1eSRobert Gordon * response over the RPC_MSG_SZ limit, then it may need to 8730a701b1eSRobert Gordon * attempt to encode for the reply chunk list. 8740a701b1eSRobert Gordon */ 8750a701b1eSRobert Gordon 8760a701b1eSRobert Gordon /* 8770a701b1eSRobert Gordon * Calculating the "sizeof" the RPC response header and the 8780a701b1eSRobert Gordon * encoded results. 8797c478bd9Sstevel@tonic-gate */ 8807c478bd9Sstevel@tonic-gate msglen = xdr_sizeof(xdr_replymsg, msg); 8810a701b1eSRobert Gordon 8820a701b1eSRobert Gordon if (msglen > 0) { 8830a701b1eSRobert Gordon RSSTAT_INCR(rstotalreplies); 8840a701b1eSRobert Gordon } 8850a701b1eSRobert Gordon if (has_args) 8867c478bd9Sstevel@tonic-gate msglen += xdrrdma_sizeof(xdr_results, xdr_location, 8870a701b1eSRobert Gordon rdma_minchunk, NULL, NULL); 8887c478bd9Sstevel@tonic-gate 8890a701b1eSRobert Gordon DTRACE_PROBE1(krpc__i__svcrdma__ksend__msglen, int, msglen); 8907c478bd9Sstevel@tonic-gate 8910a701b1eSRobert Gordon status = SVC_RDMA_SUCCESS; 8927c478bd9Sstevel@tonic-gate 8930a701b1eSRobert Gordon if (msglen < RPC_MSG_SZ) { 8947c478bd9Sstevel@tonic-gate /* 8950a701b1eSRobert Gordon * Looks like the response will fit in the inline 8960a701b1eSRobert Gordon * response; let's try 8977c478bd9Sstevel@tonic-gate */ 8980a701b1eSRobert Gordon RSSTAT_INCR(rstotalinlinereplies); 8997c478bd9Sstevel@tonic-gate 9000a701b1eSRobert Gordon rdma_response_op = RDMA_MSG; 9017c478bd9Sstevel@tonic-gate 9020a701b1eSRobert Gordon status = svc_compose_rpcmsg(clone_xprt, conn, xdr_results, 9030a701b1eSRobert Gordon xdr_location, &rbuf_rpc_resp, &xdrs_rpc, msg, 9040a701b1eSRobert Gordon has_args, &final_resp_len); 9050a701b1eSRobert Gordon 9060a701b1eSRobert Gordon DTRACE_PROBE1(krpc__i__srdma__ksend__compose_status, 9070a701b1eSRobert Gordon int, status); 9080a701b1eSRobert Gordon DTRACE_PROBE1(krpc__i__srdma__ksend__compose_len, 9090a701b1eSRobert Gordon int, final_resp_len); 9100a701b1eSRobert Gordon 9110a701b1eSRobert Gordon if (status == SVC_RDMA_SUCCESS && crdp->cl_reply) { 9120a701b1eSRobert Gordon clist_free(crdp->cl_reply); 9130a701b1eSRobert Gordon crdp->cl_reply = NULL; 9147c478bd9Sstevel@tonic-gate } 9150a701b1eSRobert Gordon } 9167c478bd9Sstevel@tonic-gate 9170a701b1eSRobert Gordon /* 9180a701b1eSRobert Gordon * If the encode failed (size?) or the message really is 9190a701b1eSRobert Gordon * larger than what is allowed, try the response chunk list. 9200a701b1eSRobert Gordon */ 9210a701b1eSRobert Gordon if (status != SVC_RDMA_SUCCESS || msglen >= RPC_MSG_SZ) { 9227c478bd9Sstevel@tonic-gate /* 9230a701b1eSRobert Gordon * attempting to use a reply chunk list when there 9240a701b1eSRobert Gordon * isn't one won't get very far... 9257c478bd9Sstevel@tonic-gate */ 9260a701b1eSRobert Gordon if (crdp->cl_reply == NULL) { 9270a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__ksend__noreplycl); 9280a701b1eSRobert Gordon goto out; 9297c478bd9Sstevel@tonic-gate } 9307c478bd9Sstevel@tonic-gate 9310a701b1eSRobert Gordon RSSTAT_INCR(rstotallongreplies); 9327c478bd9Sstevel@tonic-gate 9330a701b1eSRobert Gordon msglen = xdr_sizeof(xdr_replymsg, msg); 9340a701b1eSRobert Gordon msglen += xdrrdma_sizeof(xdr_results, xdr_location, 0, 9350a701b1eSRobert Gordon NULL, NULL); 9367c478bd9Sstevel@tonic-gate 9370a701b1eSRobert Gordon status = svc_process_long_reply(clone_xprt, xdr_results, 9380a701b1eSRobert Gordon xdr_location, msg, has_args, &msglen, &freelen, 9390a701b1eSRobert Gordon &num_wreply_segments, &final_resp_len); 9407c478bd9Sstevel@tonic-gate 9410a701b1eSRobert Gordon DTRACE_PROBE1(krpc__i__svcrdma__ksend__longreplen, 9420a701b1eSRobert Gordon int, final_resp_len); 9430a701b1eSRobert Gordon 9440a701b1eSRobert Gordon if (status != SVC_RDMA_SUCCESS) { 9450a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__ksend__compose__failed); 9467c478bd9Sstevel@tonic-gate goto out; 9477c478bd9Sstevel@tonic-gate } 9480a701b1eSRobert Gordon 9490a701b1eSRobert Gordon rdma_response_op = RDMA_NOMSG; 9507c478bd9Sstevel@tonic-gate } 9517c478bd9Sstevel@tonic-gate 9520a701b1eSRobert Gordon DTRACE_PROBE1(krpc__i__svcrdma__ksend__rdmamsg__len, 9530a701b1eSRobert Gordon int, final_resp_len); 9540a701b1eSRobert Gordon 9550a701b1eSRobert Gordon rbuf_resp.type = SEND_BUFFER; 9560a701b1eSRobert Gordon if (rdma_buf_alloc(conn, &rbuf_resp)) { 9570a701b1eSRobert Gordon rdma_buf_free(conn, &rbuf_rpc_resp); 9580a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__ksend__nofreebufs); 9597c478bd9Sstevel@tonic-gate goto out; 9607c478bd9Sstevel@tonic-gate } 9617c478bd9Sstevel@tonic-gate 9620a701b1eSRobert Gordon rdma_credit = rdma_bufs_granted; 9630a701b1eSRobert Gordon 9640a701b1eSRobert Gordon vers = RPCRDMA_VERS; 9650a701b1eSRobert Gordon xdrmem_create(&xdrs_rhdr, rbuf_resp.addr, rbuf_resp.len, XDR_ENCODE); 9660a701b1eSRobert Gordon (*(uint32_t *)rbuf_resp.addr) = msg->rm_xid; 9670a701b1eSRobert Gordon /* Skip xid and set the xdr position accordingly. */ 9680a701b1eSRobert Gordon XDR_SETPOS(&xdrs_rhdr, sizeof (uint32_t)); 9690a701b1eSRobert Gordon if (!xdr_u_int(&xdrs_rhdr, &vers) || 9700a701b1eSRobert Gordon !xdr_u_int(&xdrs_rhdr, &rdma_credit) || 9710a701b1eSRobert Gordon !xdr_u_int(&xdrs_rhdr, &rdma_response_op)) { 9720a701b1eSRobert Gordon rdma_buf_free(conn, &rbuf_rpc_resp); 9730a701b1eSRobert Gordon rdma_buf_free(conn, &rbuf_resp); 9740a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__ksend__uint); 9750a701b1eSRobert Gordon goto out; 9767c478bd9Sstevel@tonic-gate } 9777c478bd9Sstevel@tonic-gate 9787c478bd9Sstevel@tonic-gate /* 9790a701b1eSRobert Gordon * Now XDR the read chunk list, actually always NULL 9807c478bd9Sstevel@tonic-gate */ 9810a701b1eSRobert Gordon (void) xdr_encode_rlist_svc(&xdrs_rhdr, cl_read); 9820a701b1eSRobert Gordon 9837c478bd9Sstevel@tonic-gate /* 9840a701b1eSRobert Gordon * encode write list -- we already drove RDMA_WRITEs 9857c478bd9Sstevel@tonic-gate */ 9860a701b1eSRobert Gordon cl_write = crdp->cl_wlist; 9870a701b1eSRobert Gordon if (!xdr_encode_wlist(&xdrs_rhdr, cl_write)) { 9880a701b1eSRobert Gordon DTRACE_PROBE(krpc__e__svcrdma__ksend__enc__wlist); 9890a701b1eSRobert Gordon rdma_buf_free(conn, &rbuf_rpc_resp); 9900a701b1eSRobert Gordon rdma_buf_free(conn, &rbuf_resp); 9917c478bd9Sstevel@tonic-gate goto out; 9927c478bd9Sstevel@tonic-gate } 9937c478bd9Sstevel@tonic-gate 9947c478bd9Sstevel@tonic-gate /* 9950a701b1eSRobert Gordon * XDR encode the RDMA_REPLY write chunk 9967c478bd9Sstevel@tonic-gate */ 9970a701b1eSRobert Gordon if (!xdr_encode_reply_wchunk(&xdrs_rhdr, crdp->cl_reply, 9980a701b1eSRobert Gordon num_wreply_segments)) { 9990a701b1eSRobert Gordon rdma_buf_free(conn, &rbuf_rpc_resp); 10000a701b1eSRobert Gordon rdma_buf_free(conn, &rbuf_resp); 10010a701b1eSRobert Gordon goto out; 10020a701b1eSRobert Gordon } 10037c478bd9Sstevel@tonic-gate 10040a701b1eSRobert Gordon clist_add(&cl_send, 0, XDR_GETPOS(&xdrs_rhdr), &rbuf_resp.handle, 10050a701b1eSRobert Gordon rbuf_resp.addr, NULL, NULL); 10067c478bd9Sstevel@tonic-gate 10070a701b1eSRobert Gordon if (rdma_response_op == RDMA_MSG) { 10080a701b1eSRobert Gordon clist_add(&cl_send, 0, final_resp_len, &rbuf_rpc_resp.handle, 10090a701b1eSRobert Gordon rbuf_rpc_resp.addr, NULL, NULL); 10107c478bd9Sstevel@tonic-gate } 10117c478bd9Sstevel@tonic-gate 10120a701b1eSRobert Gordon status = RDMA_SEND(conn, cl_send, msg->rm_xid); 10137c478bd9Sstevel@tonic-gate 10140a701b1eSRobert Gordon if (status == RDMA_SUCCESS) { 10150a701b1eSRobert Gordon retval = TRUE; 10167c478bd9Sstevel@tonic-gate } 10177c478bd9Sstevel@tonic-gate 10187c478bd9Sstevel@tonic-gate out: 10197c478bd9Sstevel@tonic-gate /* 10207c478bd9Sstevel@tonic-gate * Free up sendlist chunks 10217c478bd9Sstevel@tonic-gate */ 10220a701b1eSRobert Gordon if (cl_send != NULL) 10230a701b1eSRobert Gordon clist_free(cl_send); 10247c478bd9Sstevel@tonic-gate 10257c478bd9Sstevel@tonic-gate /* 10267c478bd9Sstevel@tonic-gate * Destroy private data for xdr rdma 10277c478bd9Sstevel@tonic-gate */ 10280a701b1eSRobert Gordon if (clone_xprt->xp_xdrout.x_ops != NULL) { 10290a701b1eSRobert Gordon XDR_DESTROY(&(clone_xprt->xp_xdrout)); 10300a701b1eSRobert Gordon } 10310a701b1eSRobert Gordon 10320a701b1eSRobert Gordon if (crdp->cl_reply) { 10330a701b1eSRobert Gordon clist_free(crdp->cl_reply); 10340a701b1eSRobert Gordon crdp->cl_reply = NULL; 10350a701b1eSRobert Gordon } 10367c478bd9Sstevel@tonic-gate 10377c478bd9Sstevel@tonic-gate /* 10387c478bd9Sstevel@tonic-gate * This is completely disgusting. If public is set it is 10397c478bd9Sstevel@tonic-gate * a pointer to a structure whose first field is the address 10407c478bd9Sstevel@tonic-gate * of the function to free that structure and any related 10417c478bd9Sstevel@tonic-gate * stuff. (see rrokfree in nfs_xdr.c). 10427c478bd9Sstevel@tonic-gate */ 10430a701b1eSRobert Gordon if (xdrs_rpc->x_public) { 10447c478bd9Sstevel@tonic-gate /* LINTED pointer alignment */ 10450a701b1eSRobert Gordon (**((int (**)()) xdrs_rpc->x_public)) (xdrs_rpc->x_public); 10460a701b1eSRobert Gordon } 10470a701b1eSRobert Gordon 10480a701b1eSRobert Gordon if (xdrs_rhdr.x_ops != NULL) { 10490a701b1eSRobert Gordon XDR_DESTROY(&xdrs_rhdr); 10507c478bd9Sstevel@tonic-gate } 10517c478bd9Sstevel@tonic-gate 10527c478bd9Sstevel@tonic-gate return (retval); 10537c478bd9Sstevel@tonic-gate } 10547c478bd9Sstevel@tonic-gate 10557c478bd9Sstevel@tonic-gate /* 10567c478bd9Sstevel@tonic-gate * Deserialize arguments. 10577c478bd9Sstevel@tonic-gate */ 10587c478bd9Sstevel@tonic-gate static bool_t 10597c478bd9Sstevel@tonic-gate svc_rdma_kgetargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, caddr_t args_ptr) 10607c478bd9Sstevel@tonic-gate { 10617c478bd9Sstevel@tonic-gate if ((SVCAUTH_UNWRAP(&clone_xprt->xp_auth, &clone_xprt->xp_xdrin, 10627c478bd9Sstevel@tonic-gate xdr_args, args_ptr)) != TRUE) 10637c478bd9Sstevel@tonic-gate return (FALSE); 10647c478bd9Sstevel@tonic-gate return (TRUE); 10657c478bd9Sstevel@tonic-gate } 10667c478bd9Sstevel@tonic-gate 10677c478bd9Sstevel@tonic-gate static bool_t 10687c478bd9Sstevel@tonic-gate svc_rdma_kfreeargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 10697c478bd9Sstevel@tonic-gate caddr_t args_ptr) 10707c478bd9Sstevel@tonic-gate { 10710a701b1eSRobert Gordon struct clone_rdma_data *crdp; 10727c478bd9Sstevel@tonic-gate bool_t retval; 10737c478bd9Sstevel@tonic-gate 1074*60536ef9SKaren Rochford /* 1075*60536ef9SKaren Rochford * If the cloned bit is true, then this transport specific 1076*60536ef9SKaren Rochford * rmda data has been duplicated into another cloned xprt. Do 1077*60536ef9SKaren Rochford * not free, or release the connection, it is still in use. The 1078*60536ef9SKaren Rochford * buffers will be freed and the connection released later by 1079*60536ef9SKaren Rochford * SVC_CLONE_DESTROY(). 1080*60536ef9SKaren Rochford */ 10810a701b1eSRobert Gordon crdp = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 1082*60536ef9SKaren Rochford if (crdp->cloned == TRUE) { 1083*60536ef9SKaren Rochford crdp->cloned = 0; 1084*60536ef9SKaren Rochford return (TRUE); 1085*60536ef9SKaren Rochford } 10860a701b1eSRobert Gordon 10870a701b1eSRobert Gordon /* 10880a701b1eSRobert Gordon * Free the args if needed then XDR_DESTROY 10890a701b1eSRobert Gordon */ 10907c478bd9Sstevel@tonic-gate if (args_ptr) { 10917c478bd9Sstevel@tonic-gate XDR *xdrs = &clone_xprt->xp_xdrin; 10927c478bd9Sstevel@tonic-gate 10937c478bd9Sstevel@tonic-gate xdrs->x_op = XDR_FREE; 10947c478bd9Sstevel@tonic-gate retval = (*xdr_args)(xdrs, args_ptr); 10957c478bd9Sstevel@tonic-gate } 10960a701b1eSRobert Gordon 10977c478bd9Sstevel@tonic-gate XDR_DESTROY(&(clone_xprt->xp_xdrin)); 10980a701b1eSRobert Gordon rdma_buf_free(crdp->conn, &crdp->rpcbuf); 10990a701b1eSRobert Gordon if (crdp->cl_reply) { 11000a701b1eSRobert Gordon clist_free(crdp->cl_reply); 11010a701b1eSRobert Gordon crdp->cl_reply = NULL; 11020a701b1eSRobert Gordon } 11030a701b1eSRobert Gordon RDMA_REL_CONN(crdp->conn); 11040a701b1eSRobert Gordon 11057c478bd9Sstevel@tonic-gate return (retval); 11067c478bd9Sstevel@tonic-gate } 11077c478bd9Sstevel@tonic-gate 11087c478bd9Sstevel@tonic-gate /* ARGSUSED */ 11097c478bd9Sstevel@tonic-gate static int32_t * 11107c478bd9Sstevel@tonic-gate svc_rdma_kgetres(SVCXPRT *clone_xprt, int size) 11117c478bd9Sstevel@tonic-gate { 11127c478bd9Sstevel@tonic-gate return (NULL); 11137c478bd9Sstevel@tonic-gate } 11147c478bd9Sstevel@tonic-gate 11157c478bd9Sstevel@tonic-gate /* ARGSUSED */ 11167c478bd9Sstevel@tonic-gate static void 11177c478bd9Sstevel@tonic-gate svc_rdma_kfreeres(SVCXPRT *clone_xprt) 11187c478bd9Sstevel@tonic-gate { 11197c478bd9Sstevel@tonic-gate } 11207c478bd9Sstevel@tonic-gate 11217c478bd9Sstevel@tonic-gate /* 11227c478bd9Sstevel@tonic-gate * the dup cacheing routines below provide a cache of non-failure 11237c478bd9Sstevel@tonic-gate * transaction id's. rpc service routines can use this to detect 11247c478bd9Sstevel@tonic-gate * retransmissions and re-send a non-failure response. 11257c478bd9Sstevel@tonic-gate */ 11267c478bd9Sstevel@tonic-gate 11277c478bd9Sstevel@tonic-gate /* 11287c478bd9Sstevel@tonic-gate * MAXDUPREQS is the number of cached items. It should be adjusted 11297c478bd9Sstevel@tonic-gate * to the service load so that there is likely to be a response entry 11307c478bd9Sstevel@tonic-gate * when the first retransmission comes in. 11317c478bd9Sstevel@tonic-gate */ 11327c478bd9Sstevel@tonic-gate #define MAXDUPREQS 1024 11337c478bd9Sstevel@tonic-gate 11347c478bd9Sstevel@tonic-gate /* 11357c478bd9Sstevel@tonic-gate * This should be appropriately scaled to MAXDUPREQS. 11367c478bd9Sstevel@tonic-gate */ 11377c478bd9Sstevel@tonic-gate #define DRHASHSZ 257 11387c478bd9Sstevel@tonic-gate 11397c478bd9Sstevel@tonic-gate #if ((DRHASHSZ & (DRHASHSZ - 1)) == 0) 11407c478bd9Sstevel@tonic-gate #define XIDHASH(xid) ((xid) & (DRHASHSZ - 1)) 11417c478bd9Sstevel@tonic-gate #else 11427c478bd9Sstevel@tonic-gate #define XIDHASH(xid) ((xid) % DRHASHSZ) 11437c478bd9Sstevel@tonic-gate #endif 11447c478bd9Sstevel@tonic-gate #define DRHASH(dr) XIDHASH((dr)->dr_xid) 11457c478bd9Sstevel@tonic-gate #define REQTOXID(req) ((req)->rq_xprt->xp_xid) 11467c478bd9Sstevel@tonic-gate 11477c478bd9Sstevel@tonic-gate static int rdmandupreqs = 0; 114859418bdeSgt int rdmamaxdupreqs = MAXDUPREQS; 11497c478bd9Sstevel@tonic-gate static kmutex_t rdmadupreq_lock; 11507c478bd9Sstevel@tonic-gate static struct dupreq *rdmadrhashtbl[DRHASHSZ]; 11517c478bd9Sstevel@tonic-gate static int rdmadrhashstat[DRHASHSZ]; 11527c478bd9Sstevel@tonic-gate 11537c478bd9Sstevel@tonic-gate static void unhash(struct dupreq *); 11547c478bd9Sstevel@tonic-gate 11557c478bd9Sstevel@tonic-gate /* 11567c478bd9Sstevel@tonic-gate * rdmadrmru points to the head of a circular linked list in lru order. 11577c478bd9Sstevel@tonic-gate * rdmadrmru->dr_next == drlru 11587c478bd9Sstevel@tonic-gate */ 11597c478bd9Sstevel@tonic-gate struct dupreq *rdmadrmru; 11607c478bd9Sstevel@tonic-gate 11617c478bd9Sstevel@tonic-gate /* 11627c478bd9Sstevel@tonic-gate * svc_rdma_kdup searches the request cache and returns 0 if the 11637c478bd9Sstevel@tonic-gate * request is not found in the cache. If it is found, then it 11647c478bd9Sstevel@tonic-gate * returns the state of the request (in progress or done) and 11657c478bd9Sstevel@tonic-gate * the status or attributes that were part of the original reply. 11667c478bd9Sstevel@tonic-gate */ 11677c478bd9Sstevel@tonic-gate static int 11687c478bd9Sstevel@tonic-gate svc_rdma_kdup(struct svc_req *req, caddr_t res, int size, struct dupreq **drpp, 11697c478bd9Sstevel@tonic-gate bool_t *dupcachedp) 11707c478bd9Sstevel@tonic-gate { 11717c478bd9Sstevel@tonic-gate struct dupreq *dr; 11727c478bd9Sstevel@tonic-gate uint32_t xid; 11737c478bd9Sstevel@tonic-gate uint32_t drhash; 11747c478bd9Sstevel@tonic-gate int status; 11757c478bd9Sstevel@tonic-gate 11767c478bd9Sstevel@tonic-gate xid = REQTOXID(req); 11777c478bd9Sstevel@tonic-gate mutex_enter(&rdmadupreq_lock); 11787c478bd9Sstevel@tonic-gate RSSTAT_INCR(rsdupchecks); 11797c478bd9Sstevel@tonic-gate /* 11807c478bd9Sstevel@tonic-gate * Check to see whether an entry already exists in the cache. 11817c478bd9Sstevel@tonic-gate */ 11827c478bd9Sstevel@tonic-gate dr = rdmadrhashtbl[XIDHASH(xid)]; 11837c478bd9Sstevel@tonic-gate while (dr != NULL) { 11847c478bd9Sstevel@tonic-gate if (dr->dr_xid == xid && 11857c478bd9Sstevel@tonic-gate dr->dr_proc == req->rq_proc && 11867c478bd9Sstevel@tonic-gate dr->dr_prog == req->rq_prog && 11877c478bd9Sstevel@tonic-gate dr->dr_vers == req->rq_vers && 11887c478bd9Sstevel@tonic-gate dr->dr_addr.len == req->rq_xprt->xp_rtaddr.len && 11897c478bd9Sstevel@tonic-gate bcmp((caddr_t)dr->dr_addr.buf, 11907c478bd9Sstevel@tonic-gate (caddr_t)req->rq_xprt->xp_rtaddr.buf, 11917c478bd9Sstevel@tonic-gate dr->dr_addr.len) == 0) { 11927c478bd9Sstevel@tonic-gate status = dr->dr_status; 11937c478bd9Sstevel@tonic-gate if (status == DUP_DONE) { 11947c478bd9Sstevel@tonic-gate bcopy(dr->dr_resp.buf, res, size); 11957c478bd9Sstevel@tonic-gate if (dupcachedp != NULL) 11967c478bd9Sstevel@tonic-gate *dupcachedp = (dr->dr_resfree != NULL); 11977c478bd9Sstevel@tonic-gate } else { 11987c478bd9Sstevel@tonic-gate dr->dr_status = DUP_INPROGRESS; 11997c478bd9Sstevel@tonic-gate *drpp = dr; 12007c478bd9Sstevel@tonic-gate } 12017c478bd9Sstevel@tonic-gate RSSTAT_INCR(rsdupreqs); 12027c478bd9Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12037c478bd9Sstevel@tonic-gate return (status); 12047c478bd9Sstevel@tonic-gate } 12057c478bd9Sstevel@tonic-gate dr = dr->dr_chain; 12067c478bd9Sstevel@tonic-gate } 12077c478bd9Sstevel@tonic-gate 12087c478bd9Sstevel@tonic-gate /* 12097c478bd9Sstevel@tonic-gate * There wasn't an entry, either allocate a new one or recycle 12107c478bd9Sstevel@tonic-gate * an old one. 12117c478bd9Sstevel@tonic-gate */ 12127c478bd9Sstevel@tonic-gate if (rdmandupreqs < rdmamaxdupreqs) { 12137c478bd9Sstevel@tonic-gate dr = kmem_alloc(sizeof (*dr), KM_NOSLEEP); 12147c478bd9Sstevel@tonic-gate if (dr == NULL) { 12157c478bd9Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12167c478bd9Sstevel@tonic-gate return (DUP_ERROR); 12177c478bd9Sstevel@tonic-gate } 12187c478bd9Sstevel@tonic-gate dr->dr_resp.buf = NULL; 12197c478bd9Sstevel@tonic-gate dr->dr_resp.maxlen = 0; 12207c478bd9Sstevel@tonic-gate dr->dr_addr.buf = NULL; 12217c478bd9Sstevel@tonic-gate dr->dr_addr.maxlen = 0; 12227c478bd9Sstevel@tonic-gate if (rdmadrmru) { 12237c478bd9Sstevel@tonic-gate dr->dr_next = rdmadrmru->dr_next; 12247c478bd9Sstevel@tonic-gate rdmadrmru->dr_next = dr; 12257c478bd9Sstevel@tonic-gate } else { 12267c478bd9Sstevel@tonic-gate dr->dr_next = dr; 12277c478bd9Sstevel@tonic-gate } 12287c478bd9Sstevel@tonic-gate rdmandupreqs++; 12297c478bd9Sstevel@tonic-gate } else { 12307c478bd9Sstevel@tonic-gate dr = rdmadrmru->dr_next; 12317c478bd9Sstevel@tonic-gate while (dr->dr_status == DUP_INPROGRESS) { 12327c478bd9Sstevel@tonic-gate dr = dr->dr_next; 12337c478bd9Sstevel@tonic-gate if (dr == rdmadrmru->dr_next) { 12347c478bd9Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12357c478bd9Sstevel@tonic-gate return (DUP_ERROR); 12367c478bd9Sstevel@tonic-gate } 12377c478bd9Sstevel@tonic-gate } 12387c478bd9Sstevel@tonic-gate unhash(dr); 12397c478bd9Sstevel@tonic-gate if (dr->dr_resfree) { 12407c478bd9Sstevel@tonic-gate (*dr->dr_resfree)(dr->dr_resp.buf); 12417c478bd9Sstevel@tonic-gate } 12427c478bd9Sstevel@tonic-gate } 12437c478bd9Sstevel@tonic-gate dr->dr_resfree = NULL; 12447c478bd9Sstevel@tonic-gate rdmadrmru = dr; 12457c478bd9Sstevel@tonic-gate 12467c478bd9Sstevel@tonic-gate dr->dr_xid = REQTOXID(req); 12477c478bd9Sstevel@tonic-gate dr->dr_prog = req->rq_prog; 12487c478bd9Sstevel@tonic-gate dr->dr_vers = req->rq_vers; 12497c478bd9Sstevel@tonic-gate dr->dr_proc = req->rq_proc; 12507c478bd9Sstevel@tonic-gate if (dr->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { 12517c478bd9Sstevel@tonic-gate if (dr->dr_addr.buf != NULL) 12527c478bd9Sstevel@tonic-gate kmem_free(dr->dr_addr.buf, dr->dr_addr.maxlen); 12537c478bd9Sstevel@tonic-gate dr->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; 12547c478bd9Sstevel@tonic-gate dr->dr_addr.buf = kmem_alloc(dr->dr_addr.maxlen, KM_NOSLEEP); 12557c478bd9Sstevel@tonic-gate if (dr->dr_addr.buf == NULL) { 12567c478bd9Sstevel@tonic-gate dr->dr_addr.maxlen = 0; 12577c478bd9Sstevel@tonic-gate dr->dr_status = DUP_DROP; 12587c478bd9Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12597c478bd9Sstevel@tonic-gate return (DUP_ERROR); 12607c478bd9Sstevel@tonic-gate } 12617c478bd9Sstevel@tonic-gate } 12627c478bd9Sstevel@tonic-gate dr->dr_addr.len = req->rq_xprt->xp_rtaddr.len; 12637c478bd9Sstevel@tonic-gate bcopy(req->rq_xprt->xp_rtaddr.buf, dr->dr_addr.buf, dr->dr_addr.len); 12647c478bd9Sstevel@tonic-gate if (dr->dr_resp.maxlen < size) { 12657c478bd9Sstevel@tonic-gate if (dr->dr_resp.buf != NULL) 12667c478bd9Sstevel@tonic-gate kmem_free(dr->dr_resp.buf, dr->dr_resp.maxlen); 12677c478bd9Sstevel@tonic-gate dr->dr_resp.maxlen = (unsigned int)size; 12687c478bd9Sstevel@tonic-gate dr->dr_resp.buf = kmem_alloc(size, KM_NOSLEEP); 12697c478bd9Sstevel@tonic-gate if (dr->dr_resp.buf == NULL) { 12707c478bd9Sstevel@tonic-gate dr->dr_resp.maxlen = 0; 12717c478bd9Sstevel@tonic-gate dr->dr_status = DUP_DROP; 12727c478bd9Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12737c478bd9Sstevel@tonic-gate return (DUP_ERROR); 12747c478bd9Sstevel@tonic-gate } 12757c478bd9Sstevel@tonic-gate } 12767c478bd9Sstevel@tonic-gate dr->dr_status = DUP_INPROGRESS; 12777c478bd9Sstevel@tonic-gate 12787c478bd9Sstevel@tonic-gate drhash = (uint32_t)DRHASH(dr); 12797c478bd9Sstevel@tonic-gate dr->dr_chain = rdmadrhashtbl[drhash]; 12807c478bd9Sstevel@tonic-gate rdmadrhashtbl[drhash] = dr; 12817c478bd9Sstevel@tonic-gate rdmadrhashstat[drhash]++; 12827c478bd9Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12837c478bd9Sstevel@tonic-gate *drpp = dr; 12847c478bd9Sstevel@tonic-gate return (DUP_NEW); 12857c478bd9Sstevel@tonic-gate } 12867c478bd9Sstevel@tonic-gate 12877c478bd9Sstevel@tonic-gate /* 12887c478bd9Sstevel@tonic-gate * svc_rdma_kdupdone marks the request done (DUP_DONE or DUP_DROP) 12897c478bd9Sstevel@tonic-gate * and stores the response. 12907c478bd9Sstevel@tonic-gate */ 12917c478bd9Sstevel@tonic-gate static void 12927c478bd9Sstevel@tonic-gate svc_rdma_kdupdone(struct dupreq *dr, caddr_t res, void (*dis_resfree)(), 12937c478bd9Sstevel@tonic-gate int size, int status) 12947c478bd9Sstevel@tonic-gate { 12957c478bd9Sstevel@tonic-gate ASSERT(dr->dr_resfree == NULL); 12967c478bd9Sstevel@tonic-gate if (status == DUP_DONE) { 12977c478bd9Sstevel@tonic-gate bcopy(res, dr->dr_resp.buf, size); 12987c478bd9Sstevel@tonic-gate dr->dr_resfree = dis_resfree; 12997c478bd9Sstevel@tonic-gate } 13007c478bd9Sstevel@tonic-gate dr->dr_status = status; 13017c478bd9Sstevel@tonic-gate } 13027c478bd9Sstevel@tonic-gate 13037c478bd9Sstevel@tonic-gate /* 13047c478bd9Sstevel@tonic-gate * This routine expects that the mutex, rdmadupreq_lock, is already held. 13057c478bd9Sstevel@tonic-gate */ 13067c478bd9Sstevel@tonic-gate static void 13077c478bd9Sstevel@tonic-gate unhash(struct dupreq *dr) 13087c478bd9Sstevel@tonic-gate { 13097c478bd9Sstevel@tonic-gate struct dupreq *drt; 13107c478bd9Sstevel@tonic-gate struct dupreq *drtprev = NULL; 13117c478bd9Sstevel@tonic-gate uint32_t drhash; 13127c478bd9Sstevel@tonic-gate 13137c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&rdmadupreq_lock)); 13147c478bd9Sstevel@tonic-gate 13157c478bd9Sstevel@tonic-gate drhash = (uint32_t)DRHASH(dr); 13167c478bd9Sstevel@tonic-gate drt = rdmadrhashtbl[drhash]; 13177c478bd9Sstevel@tonic-gate while (drt != NULL) { 13187c478bd9Sstevel@tonic-gate if (drt == dr) { 13197c478bd9Sstevel@tonic-gate rdmadrhashstat[drhash]--; 13207c478bd9Sstevel@tonic-gate if (drtprev == NULL) { 13217c478bd9Sstevel@tonic-gate rdmadrhashtbl[drhash] = drt->dr_chain; 13227c478bd9Sstevel@tonic-gate } else { 13237c478bd9Sstevel@tonic-gate drtprev->dr_chain = drt->dr_chain; 13247c478bd9Sstevel@tonic-gate } 13257c478bd9Sstevel@tonic-gate return; 13267c478bd9Sstevel@tonic-gate } 13277c478bd9Sstevel@tonic-gate drtprev = drt; 13287c478bd9Sstevel@tonic-gate drt = drt->dr_chain; 13297c478bd9Sstevel@tonic-gate } 13307c478bd9Sstevel@tonic-gate } 13310a701b1eSRobert Gordon 13320a701b1eSRobert Gordon bool_t 13330a701b1eSRobert Gordon rdma_get_wchunk(struct svc_req *req, iovec_t *iov, struct clist *wlist) 13340a701b1eSRobert Gordon { 13350a701b1eSRobert Gordon struct clist *clist; 13360a701b1eSRobert Gordon uint32_t tlen; 13370a701b1eSRobert Gordon 13380a701b1eSRobert Gordon if (req->rq_xprt->xp_type != T_RDMA) { 13390a701b1eSRobert Gordon return (FALSE); 13400a701b1eSRobert Gordon } 13410a701b1eSRobert Gordon 13420a701b1eSRobert Gordon tlen = 0; 13430a701b1eSRobert Gordon clist = wlist; 13440a701b1eSRobert Gordon while (clist) { 13450a701b1eSRobert Gordon tlen += clist->c_len; 13460a701b1eSRobert Gordon clist = clist->c_next; 13470a701b1eSRobert Gordon } 13480a701b1eSRobert Gordon 13490a701b1eSRobert Gordon /* 13500a701b1eSRobert Gordon * set iov to addr+len of first segment of first wchunk of 13510a701b1eSRobert Gordon * wlist sent by client. krecv() already malloc'd a buffer 13520a701b1eSRobert Gordon * large enough, but registration is deferred until we write 13530a701b1eSRobert Gordon * the buffer back to (NFS) client using RDMA_WRITE. 13540a701b1eSRobert Gordon */ 13550a701b1eSRobert Gordon iov->iov_base = (caddr_t)(uintptr_t)wlist->w.c_saddr; 13560a701b1eSRobert Gordon iov->iov_len = tlen; 13570a701b1eSRobert Gordon 13580a701b1eSRobert Gordon return (TRUE); 13590a701b1eSRobert Gordon } 1360f837ee4aSSiddheshwar Mahesh 1361f837ee4aSSiddheshwar Mahesh /* 1362f837ee4aSSiddheshwar Mahesh * routine to setup the read chunk lists 1363f837ee4aSSiddheshwar Mahesh */ 1364f837ee4aSSiddheshwar Mahesh 1365f837ee4aSSiddheshwar Mahesh int 1366f837ee4aSSiddheshwar Mahesh rdma_setup_read_chunks(struct clist *wcl, uint32_t count, int *wcl_len) 1367f837ee4aSSiddheshwar Mahesh { 1368f837ee4aSSiddheshwar Mahesh int data_len, avail_len; 1369f837ee4aSSiddheshwar Mahesh uint_t round_len; 1370f837ee4aSSiddheshwar Mahesh 1371f837ee4aSSiddheshwar Mahesh data_len = avail_len = 0; 1372f837ee4aSSiddheshwar Mahesh 1373f837ee4aSSiddheshwar Mahesh while (wcl != NULL && count > 0) { 1374f837ee4aSSiddheshwar Mahesh if (wcl->c_dmemhandle.mrc_rmr == 0) 1375f837ee4aSSiddheshwar Mahesh break; 1376f837ee4aSSiddheshwar Mahesh 1377f837ee4aSSiddheshwar Mahesh if (wcl->c_len < count) { 1378f837ee4aSSiddheshwar Mahesh data_len += wcl->c_len; 1379f837ee4aSSiddheshwar Mahesh avail_len = 0; 1380f837ee4aSSiddheshwar Mahesh } else { 1381f837ee4aSSiddheshwar Mahesh data_len += count; 1382f837ee4aSSiddheshwar Mahesh avail_len = wcl->c_len - count; 1383f837ee4aSSiddheshwar Mahesh wcl->c_len = count; 1384f837ee4aSSiddheshwar Mahesh } 1385f837ee4aSSiddheshwar Mahesh count -= wcl->c_len; 1386f837ee4aSSiddheshwar Mahesh 1387f837ee4aSSiddheshwar Mahesh if (count == 0) 1388f837ee4aSSiddheshwar Mahesh break; 1389f837ee4aSSiddheshwar Mahesh 1390f837ee4aSSiddheshwar Mahesh wcl = wcl->c_next; 1391f837ee4aSSiddheshwar Mahesh } 1392f837ee4aSSiddheshwar Mahesh 1393f837ee4aSSiddheshwar Mahesh /* 1394f837ee4aSSiddheshwar Mahesh * MUST fail if there are still more data 1395f837ee4aSSiddheshwar Mahesh */ 1396f837ee4aSSiddheshwar Mahesh if (count > 0) { 1397f837ee4aSSiddheshwar Mahesh DTRACE_PROBE2(krpc__e__rdma_setup_read_chunks_clist_len, 1398f837ee4aSSiddheshwar Mahesh int, data_len, int, count); 1399f837ee4aSSiddheshwar Mahesh return (FALSE); 1400f837ee4aSSiddheshwar Mahesh } 1401f837ee4aSSiddheshwar Mahesh 1402f837ee4aSSiddheshwar Mahesh /* 1403f837ee4aSSiddheshwar Mahesh * Round up the last chunk to 4-byte boundary 1404f837ee4aSSiddheshwar Mahesh */ 1405f837ee4aSSiddheshwar Mahesh *wcl_len = roundup(data_len, BYTES_PER_XDR_UNIT); 1406f837ee4aSSiddheshwar Mahesh round_len = *wcl_len - data_len; 1407f837ee4aSSiddheshwar Mahesh 1408f837ee4aSSiddheshwar Mahesh if (round_len) { 1409f837ee4aSSiddheshwar Mahesh 1410f837ee4aSSiddheshwar Mahesh /* 1411f837ee4aSSiddheshwar Mahesh * If there is space in the current chunk, 1412f837ee4aSSiddheshwar Mahesh * add the roundup to the chunk. 1413f837ee4aSSiddheshwar Mahesh */ 1414f837ee4aSSiddheshwar Mahesh if (avail_len >= round_len) { 1415f837ee4aSSiddheshwar Mahesh wcl->c_len += round_len; 1416f837ee4aSSiddheshwar Mahesh } else { 1417f837ee4aSSiddheshwar Mahesh /* 1418f837ee4aSSiddheshwar Mahesh * try the next one. 1419f837ee4aSSiddheshwar Mahesh */ 1420f837ee4aSSiddheshwar Mahesh wcl = wcl->c_next; 1421f837ee4aSSiddheshwar Mahesh if ((wcl == NULL) || (wcl->c_len < round_len)) { 1422f837ee4aSSiddheshwar Mahesh DTRACE_PROBE1( 1423f837ee4aSSiddheshwar Mahesh krpc__e__rdma_setup_read_chunks_rndup, 1424f837ee4aSSiddheshwar Mahesh int, round_len); 1425f837ee4aSSiddheshwar Mahesh return (FALSE); 1426f837ee4aSSiddheshwar Mahesh } 1427f837ee4aSSiddheshwar Mahesh wcl->c_len = round_len; 1428f837ee4aSSiddheshwar Mahesh } 1429f837ee4aSSiddheshwar Mahesh } 1430f837ee4aSSiddheshwar Mahesh 1431f837ee4aSSiddheshwar Mahesh wcl = wcl->c_next; 1432f837ee4aSSiddheshwar Mahesh 1433f837ee4aSSiddheshwar Mahesh /* 1434f837ee4aSSiddheshwar Mahesh * Make rest of the chunks 0-len 1435f837ee4aSSiddheshwar Mahesh */ 1436f837ee4aSSiddheshwar Mahesh 1437f837ee4aSSiddheshwar Mahesh clist_zero_len(wcl); 1438f837ee4aSSiddheshwar Mahesh 1439f837ee4aSSiddheshwar Mahesh return (TRUE); 1440f837ee4aSSiddheshwar Mahesh } 1441