17c478bdstevel@tonic-gate/* 27c478bdstevel@tonic-gate * CDDL HEADER START 37c478bdstevel@tonic-gate * 47c478bdstevel@tonic-gate * The contents of this file are subject to the terms of the 50a701b1Robert Gordon * Common Development and Distribution License (the "License"). 60a701b1Robert Gordon * You may not use this file except in compliance with the License. 77c478bdstevel@tonic-gate * 87c478bdstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bdstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bdstevel@tonic-gate * See the License for the specific language governing permissions 117c478bdstevel@tonic-gate * and limitations under the License. 127c478bdstevel@tonic-gate * 137c478bdstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bdstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bdstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bdstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bdstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bdstevel@tonic-gate * 197c478bdstevel@tonic-gate * CDDL HEADER END 207c478bdstevel@tonic-gate */ 217c478bdstevel@tonic-gate/* 220a4b081Karen Rochford * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved. 23201f5ebSebastien Roy * Copyright (c) 2012 by Delphix. All rights reserved. 242695d4fMarcel Telka * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 254a3b052Andy Fiddaman * Copyright 2012 Marcel Telka <marcel@telka.sk> 264a3b052Andy Fiddaman * Copyright 2018 OmniOS Community Edition (OmniOSce) Association. 277c478bdstevel@tonic-gate */ 287c478bdstevel@tonic-gate/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 297c478bdstevel@tonic-gate/* All Rights Reserved */ 307c478bdstevel@tonic-gate/* 317c478bdstevel@tonic-gate * Portions of this source code were derived from Berkeley 327c478bdstevel@tonic-gate * 4.3 BSD under license from the Regents of the University of 337c478bdstevel@tonic-gate * California. 347c478bdstevel@tonic-gate */ 357c478bdstevel@tonic-gate 367c478bdstevel@tonic-gate/* 377c478bdstevel@tonic-gate * Server side of RPC over RDMA in the kernel. 387c478bdstevel@tonic-gate */ 397c478bdstevel@tonic-gate 407c478bdstevel@tonic-gate#include <sys/param.h> 417c478bdstevel@tonic-gate#include <sys/types.h> 427c478bdstevel@tonic-gate#include <sys/user.h> 437c478bdstevel@tonic-gate#include <sys/sysmacros.h> 447c478bdstevel@tonic-gate#include <sys/proc.h> 457c478bdstevel@tonic-gate#include <sys/file.h> 467c478bdstevel@tonic-gate#include <sys/errno.h> 477c478bdstevel@tonic-gate#include <sys/kmem.h> 487c478bdstevel@tonic-gate#include <sys/debug.h> 497c478bdstevel@tonic-gate#include <sys/systm.h> 507c478bdstevel@tonic-gate#include <sys/cmn_err.h> 517c478bdstevel@tonic-gate#include <sys/kstat.h> 527c478bdstevel@tonic-gate#include <sys/vtrace.h> 537c478bdstevel@tonic-gate#include <sys/debug.h> 547c478bdstevel@tonic-gate 557c478bdstevel@tonic-gate#include <rpc/types.h> 567c478bdstevel@tonic-gate#include <rpc/xdr.h> 577c478bdstevel@tonic-gate#include <rpc/auth.h> 587c478bdstevel@tonic-gate#include <rpc/clnt.h> 597c478bdstevel@tonic-gate#include <rpc/rpc_msg.h> 607c478bdstevel@tonic-gate#include <rpc/svc.h> 617c478bdstevel@tonic-gate#include <rpc/rpc_rdma.h> 627c478bdstevel@tonic-gate#include <sys/ddi.h> 637c478bdstevel@tonic-gate#include <sys/sunddi.h> 647c478bdstevel@tonic-gate 657c478bdstevel@tonic-gate#include <inet/common.h> 667c478bdstevel@tonic-gate#include <inet/ip.h> 677c478bdstevel@tonic-gate#include <inet/ip6.h> 687c478bdstevel@tonic-gate 690a701b1Robert Gordon#include <nfs/nfs.h> 700a701b1Robert Gordon#include <sys/sdt.h> 710a701b1Robert Gordon 720a701b1Robert Gordon#define SVC_RDMA_SUCCESS 0 730a701b1Robert Gordon#define SVC_RDMA_FAIL -1 740a701b1Robert Gordon 750a701b1Robert Gordon#define SVC_CREDIT_FACTOR (0.5) 760a701b1Robert Gordon 770a701b1Robert Gordon#define MSG_IS_RPCSEC_GSS(msg) \ 780a701b1Robert Gordon ((msg)->rm_reply.rp_acpt.ar_verf.oa_flavor == RPCSEC_GSS) 790a701b1Robert Gordon 800a701b1Robert Gordon 810a701b1Robert Gordonuint32_t rdma_bufs_granted = RDMA_BUFS_GRANT; 820a701b1Robert Gordon 837c478bdstevel@tonic-gate/* 847c478bdstevel@tonic-gate * RDMA transport specific data associated with SVCMASTERXPRT 857c478bdstevel@tonic-gate */ 867c478bdstevel@tonic-gatestruct rdma_data { 874a3b052Andy Fiddaman SVCMASTERXPRT *rd_xprt; /* back ptr to SVCMASTERXPRT */ 887c478bdstevel@tonic-gate struct rdma_svc_data rd_data; /* rdma data */ 897c478bdstevel@tonic-gate rdma_mod_t *r_mod; /* RDMA module containing ops ptr */ 907c478bdstevel@tonic-gate}; 917c478bdstevel@tonic-gate 927c478bdstevel@tonic-gate/* 937c478bdstevel@tonic-gate * Plugin connection specific data stashed away in clone SVCXPRT 947c478bdstevel@tonic-gate */ 957c478bdstevel@tonic-gatestruct clone_rdma_data { 9660536efKaren Rochford bool_t cloned; /* xprt cloned for thread processing */ 977c478bdstevel@tonic-gate CONN *conn; /* RDMA connection */ 987c478bdstevel@tonic-gate rdma_buf_t rpcbuf; /* RPC req/resp buffer */ 990a701b1Robert Gordon struct clist *cl_reply; /* reply chunk buffer info */ 1000a701b1Robert Gordon struct clist *cl_wlist; /* write list clist */ 1017c478bdstevel@tonic-gate}; 1027c478bdstevel@tonic-gate 10360536efKaren Rochford 1047c478bdstevel@tonic-gate#define MAXADDRLEN 128 /* max length for address mask */ 1057c478bdstevel@tonic-gate 1067c478bdstevel@tonic-gate/* 1077c478bdstevel@tonic-gate * Routines exported through ops vector. 1087c478bdstevel@tonic-gate */ 1097c478bdstevel@tonic-gatestatic bool_t svc_rdma_krecv(SVCXPRT *, mblk_t *, struct rpc_msg *); 1107c478bdstevel@tonic-gatestatic bool_t svc_rdma_ksend(SVCXPRT *, struct rpc_msg *); 1117c478bdstevel@tonic-gatestatic bool_t svc_rdma_kgetargs(SVCXPRT *, xdrproc_t, caddr_t); 1127c478bdstevel@tonic-gatestatic bool_t svc_rdma_kfreeargs(SVCXPRT *, xdrproc_t, caddr_t); 1137c478bdstevel@tonic-gatevoid svc_rdma_kdestroy(SVCMASTERXPRT *); 1147c478bdstevel@tonic-gatestatic int svc_rdma_kdup(struct svc_req *, caddr_t, int, 1157c478bdstevel@tonic-gate struct dupreq **, bool_t *); 1167c478bdstevel@tonic-gatestatic void svc_rdma_kdupdone(struct dupreq *, caddr_t, 1177c478bdstevel@tonic-gate void (*)(), int, int); 1187c478bdstevel@tonic-gatestatic int32_t *svc_rdma_kgetres(SVCXPRT *, int); 1197c478bdstevel@tonic-gatestatic void svc_rdma_kfreeres(SVCXPRT *); 1207c478bdstevel@tonic-gatestatic void svc_rdma_kclone_destroy(SVCXPRT *); 1217c478bdstevel@tonic-gatestatic void svc_rdma_kstart(SVCMASTERXPRT *); 1227c478bdstevel@tonic-gatevoid svc_rdma_kstop(SVCMASTERXPRT *); 12360536efKaren Rochfordstatic void svc_rdma_kclone_xprt(SVCXPRT *, SVCXPRT *); 1240a4b081Karen Rochfordstatic void svc_rdma_ktattrs(SVCXPRT *, int, void **); 1257c478bdstevel@tonic-gate 1260a701b1Robert Gordonstatic int svc_process_long_reply(SVCXPRT *, xdrproc_t, 1270a701b1Robert Gordon caddr_t, struct rpc_msg *, bool_t, int *, 1280a701b1Robert Gordon int *, int *, unsigned int *); 1290a701b1Robert Gordon 1300a701b1Robert Gordonstatic int svc_compose_rpcmsg(SVCXPRT *, CONN *, xdrproc_t, 1310a701b1Robert Gordon caddr_t, rdma_buf_t *, XDR **, struct rpc_msg *, 1320a701b1Robert Gordon bool_t, uint_t *); 1330a701b1Robert Gordonstatic bool_t rpcmsg_length(xdrproc_t, 1340a701b1Robert Gordon caddr_t, 1350a701b1Robert Gordon struct rpc_msg *, bool_t, int); 1360a701b1Robert Gordon 1377c478bdstevel@tonic-gate/* 1387c478bdstevel@tonic-gate * Server transport operations vector. 1397c478bdstevel@tonic-gate */ 1407c478bdstevel@tonic-gatestruct svc_ops rdma_svc_ops = { 1417c478bdstevel@tonic-gate svc_rdma_krecv, /* Get requests */ 1427c478bdstevel@tonic-gate svc_rdma_kgetargs, /* Deserialize arguments */ 1437c478bdstevel@tonic-gate svc_rdma_ksend, /* Send reply */ 1447c478bdstevel@tonic-gate svc_rdma_kfreeargs, /* Free argument data space */ 1457c478bdstevel@tonic-gate svc_rdma_kdestroy, /* Destroy transport handle */ 1467c478bdstevel@tonic-gate svc_rdma_kdup, /* Check entry in dup req cache */ 1477c478bdstevel@tonic-gate svc_rdma_kdupdone, /* Mark entry in dup req cache as done */ 1487c478bdstevel@tonic-gate svc_rdma_kgetres, /* Get pointer to response buffer */ 1497c478bdstevel@tonic-gate svc_rdma_kfreeres, /* Destroy pre-serialized response header */ 1507c478bdstevel@tonic-gate svc_rdma_kclone_destroy, /* Destroy a clone xprt */ 15160536efKaren Rochford svc_rdma_kstart, /* Tell `ready-to-receive' to rpcmod */ 1520a4b081Karen Rochford svc_rdma_kclone_xprt, /* Transport specific clone xprt */ 1534a3b052Andy Fiddaman svc_rdma_ktattrs, /* Get Transport Attributes */ 1544a3b052Andy Fiddaman NULL, /* Increment transport reference count */ 1554a3b052Andy Fiddaman NULL /* Decrement transport reference count */ 1567c478bdstevel@tonic-gate}; 1577c478bdstevel@tonic-gate 1587c478bdstevel@tonic-gate/* 1597c478bdstevel@tonic-gate * Server statistics 1607c478bdstevel@tonic-gate * NOTE: This structure type is duplicated in the NFS fast path. 1617c478bdstevel@tonic-gate */ 1627c478bdstevel@tonic-gatestruct { 1637c478bdstevel@tonic-gate kstat_named_t rscalls; 1647c478bdstevel@tonic-gate kstat_named_t rsbadcalls; 1657c478bdstevel@tonic-gate kstat_named_t rsnullrecv; 1667c478bdstevel@tonic-gate kstat_named_t rsbadlen; 1677c478bdstevel@tonic-gate kstat_named_t rsxdrcall; 1687c478bdstevel@tonic-gate kstat_named_t rsdupchecks; 1697c478bdstevel@tonic-gate kstat_named_t rsdupreqs; 1707c478bdstevel@tonic-gate kstat_named_t rslongrpcs; 1710a701b1Robert Gordon kstat_named_t rstotalreplies; 1720a701b1Robert Gordon kstat_named_t rstotallongreplies; 1730a701b1Robert Gordon kstat_named_t rstotalinlinereplies; 1747c478bdstevel@tonic-gate} rdmarsstat = { 1757c478bdstevel@tonic-gate { "calls", KSTAT_DATA_UINT64 }, 1767c478bdstevel@tonic-gate { "badcalls", KSTAT_DATA_UINT64 }, 1777c478bdstevel@tonic-gate { "nullrecv", KSTAT_DATA_UINT64 }, 1787c478bdstevel@tonic-gate { "badlen", KSTAT_DATA_UINT64 }, 1797c478bdstevel@tonic-gate { "xdrcall", KSTAT_DATA_UINT64 }, 1807c478bdstevel@tonic-gate { "dupchecks", KSTAT_DATA_UINT64 }, 1817c478bdstevel@tonic-gate { "dupreqs", KSTAT_DATA_UINT64 }, 1820a701b1Robert Gordon { "longrpcs", KSTAT_DATA_UINT64 }, 1830a701b1Robert Gordon { "totalreplies", KSTAT_DATA_UINT64 }, 1840a701b1Robert Gordon { "totallongreplies", KSTAT_DATA_UINT64 }, 1850a701b1Robert Gordon { "totalinlinereplies", KSTAT_DATA_UINT64 }, 1867c478bdstevel@tonic-gate}; 1877c478bdstevel@tonic-gate 1887c478bdstevel@tonic-gatekstat_named_t *rdmarsstat_ptr = (kstat_named_t *)&rdmarsstat; 1897c478bdstevel@tonic-gateuint_t rdmarsstat_ndata = sizeof (rdmarsstat) / sizeof (kstat_named_t); 1907c478bdstevel@tonic-gate 1911a5e258Josef 'Jeff' Sipek#define RSSTAT_INCR(x) atomic_inc_64(&rdmarsstat.x.value.ui64) 1927c478bdstevel@tonic-gate/* 1937c478bdstevel@tonic-gate * Create a transport record. 1947c478bdstevel@tonic-gate * The transport record, output buffer, and private data structure 1957c478bdstevel@tonic-gate * are allocated. The output buffer is serialized into using xdrmem. 1967c478bdstevel@tonic-gate * There is one transport record per user process which implements a 1977c478bdstevel@tonic-gate * set of services. 1987c478bdstevel@tonic-gate */ 1997c478bdstevel@tonic-gate/* ARGSUSED */ 2007c478bdstevel@tonic-gateint 2017c478bdstevel@tonic-gatesvc_rdma_kcreate(char *netid, SVC_CALLOUT_TABLE *sct, int id, 2020a701b1Robert Gordon rdma_xprt_group_t *started_xprts) 2037c478bdstevel@tonic-gate{ 2047c478bdstevel@tonic-gate int error; 2057c478bdstevel@tonic-gate SVCMASTERXPRT *xprt; 2067c478bdstevel@tonic-gate struct rdma_data *rd; 2077c478bdstevel@tonic-gate rdma_registry_t *rmod; 2087c478bdstevel@tonic-gate rdma_xprt_record_t *xprt_rec; 2097c478bdstevel@tonic-gate queue_t *q; 2107c478bdstevel@tonic-gate /* 2117c478bdstevel@tonic-gate * modload the RDMA plugins is not already done. 2127c478bdstevel@tonic-gate */ 2137c478bdstevel@tonic-gate if (!rdma_modloaded) { 2140a701b1Robert Gordon /*CONSTANTCONDITION*/ 2150a701b1Robert Gordon ASSERT(sizeof (struct clone_rdma_data) <= SVC_P2LEN); 2160a701b1Robert Gordon 2177c478bdstevel@tonic-gate mutex_enter(&rdma_modload_lock); 2187c478bdstevel@tonic-gate if (!rdma_modloaded) { 2197c478bdstevel@tonic-gate error = rdma_modload(); 2207c478bdstevel@tonic-gate } 2217c478bdstevel@tonic-gate mutex_exit(&rdma_modload_lock); 2227c478bdstevel@tonic-gate 2237c478bdstevel@tonic-gate if (error) 2247c478bdstevel@tonic-gate return (error); 2257c478bdstevel@tonic-gate } 2267c478bdstevel@tonic-gate 2277c478bdstevel@tonic-gate /* 2287c478bdstevel@tonic-gate * master_xprt_count is the count of master transport handles 2297c478bdstevel@tonic-gate * that were successfully created and are ready to recieve for 2307c478bdstevel@tonic-gate * RDMA based access. 2317c478bdstevel@tonic-gate */ 2327c478bdstevel@tonic-gate error = 0; 2337c478bdstevel@tonic-gate xprt_rec = NULL; 2347c478bdstevel@tonic-gate rw_enter(&rdma_lock, RW_READER); 2357c478bdstevel@tonic-gate if (rdma_mod_head == NULL) { 2367c478bdstevel@tonic-gate started_xprts->rtg_count = 0; 2377c478bdstevel@tonic-gate rw_exit(&rdma_lock); 2387c478bdstevel@tonic-gate if (rdma_dev_available) 2397c478bdstevel@tonic-gate return (EPROTONOSUPPORT); 2407c478bdstevel@tonic-gate else 2417c478bdstevel@tonic-gate return (ENODEV); 2427c478bdstevel@tonic-gate } 2437c478bdstevel@tonic-gate 2447c478bdstevel@tonic-gate /* 2457c478bdstevel@tonic-gate * If we have reached here, then atleast one RDMA plugin has loaded. 2467c478bdstevel@tonic-gate * Create a master_xprt, make it start listenining on the device, 2477c478bdstevel@tonic-gate * if an error is generated, record it, we might need to shut 2487c478bdstevel@tonic-gate * the master_xprt. 2497c478bdstevel@tonic-gate * SVC_START() calls svc_rdma_kstart which calls plugin binding 2507c478bdstevel@tonic-gate * routines. 2517c478bdstevel@tonic-gate */ 2527c478bdstevel@tonic-gate for (rmod = rdma_mod_head; rmod != NULL; rmod = rmod->r_next) { 2537c478bdstevel@tonic-gate 2547c478bdstevel@tonic-gate /* 2557c478bdstevel@tonic-gate * One SVCMASTERXPRT per RDMA plugin. 2567c478bdstevel@tonic-gate */ 2577c478bdstevel@tonic-gate xprt = kmem_zalloc(sizeof (*xprt), KM_SLEEP); 2587c478bdstevel@tonic-gate xprt->xp_ops = &rdma_svc_ops; 2597c478bdstevel@tonic-gate xprt->xp_sct = sct; 2607c478bdstevel@tonic-gate xprt->xp_type = T_RDMA; 2617c478bdstevel@tonic-gate mutex_init(&xprt->xp_req_lock, NULL, MUTEX_DEFAULT, NULL); 2627c478bdstevel@tonic-gate mutex_init(&xprt->xp_thread_lock, NULL, MUTEX_DEFAULT, NULL); 2637c478bdstevel@tonic-gate xprt->xp_req_head = (mblk_t *)0; 2647c478bdstevel@tonic-gate xprt->xp_req_tail = (mblk_t *)0; 2652695d4fMarcel Telka xprt->xp_full = FALSE; 2662695d4fMarcel Telka xprt->xp_enable = FALSE; 2672695d4fMarcel Telka xprt->xp_reqs = 0; 2682695d4fMarcel Telka xprt->xp_size = 0; 2697c478bdstevel@tonic-gate xprt->xp_threads = 0; 2707c478bdstevel@tonic-gate xprt->xp_detached_threads = 0; 2717c478bdstevel@tonic-gate 2727c478bdstevel@tonic-gate rd = kmem_zalloc(sizeof (*rd), KM_SLEEP); 2737c478bdstevel@tonic-gate xprt->xp_p2 = (caddr_t)rd; 2747c478bdstevel@tonic-gate rd->rd_xprt = xprt; 2757c478bdstevel@tonic-gate rd->r_mod = rmod->r_mod; 2767c478bdstevel@tonic-gate 2777c478bdstevel@tonic-gate q = &rd->rd_data.q; 2787c478bdstevel@tonic-gate xprt->xp_wq = q; 2797c478bdstevel@tonic-gate q->q_ptr = &rd->rd_xprt; 2807c478bdstevel@tonic-gate xprt->xp_netid = NULL; 2817c478bdstevel@tonic-gate 2827c478bdstevel@tonic-gate /* 2837c478bdstevel@tonic-gate * Each of the plugins will have their own Service ID 2847c478bdstevel@tonic-gate * to listener specific mapping, like port number for VI 2857c478bdstevel@tonic-gate * and service name for IB. 2867c478bdstevel@tonic-gate */ 2877c478bdstevel@tonic-gate rd->rd_data.svcid = id; 2887c478bdstevel@tonic-gate error = svc_xprt_register(xprt, id); 2897c478bdstevel@tonic-gate if (error) { 2900a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__xprt__reg); 2917c478bdstevel@tonic-gate goto cleanup; 2927c478bdstevel@tonic-gate } 2937c478bdstevel@tonic-gate 2947c478bdstevel@tonic-gate SVC_START(xprt); 2957c478bdstevel@tonic-gate if (!rd->rd_data.active) { 2967c478bdstevel@tonic-gate svc_xprt_unregister(xprt); 2977c478bdstevel@tonic-gate error = rd->rd_data.err_code; 2987c478bdstevel@tonic-gate goto cleanup; 2997c478bdstevel@tonic-gate } 3007c478bdstevel@tonic-gate 3017c478bdstevel@tonic-gate /* 3027c478bdstevel@tonic-gate * This is set only when there is atleast one or more 3037c478bdstevel@tonic-gate * transports successfully created. We insert the pointer 3047c478bdstevel@tonic-gate * to the created RDMA master xprt into a separately maintained 3057c478bdstevel@tonic-gate * list. This way we can easily reference it later to cleanup, 3067c478bdstevel@tonic-gate * when NFS kRPC service pool is going away/unregistered. 3077c478bdstevel@tonic-gate */ 3087c478bdstevel@tonic-gate started_xprts->rtg_count ++; 3097c478bdstevel@tonic-gate xprt_rec = kmem_alloc(sizeof (*xprt_rec), KM_SLEEP); 3107c478bdstevel@tonic-gate xprt_rec->rtr_xprt_ptr = xprt; 3117c478bdstevel@tonic-gate xprt_rec->rtr_next = started_xprts->rtg_listhead; 3127c478bdstevel@tonic-gate started_xprts->rtg_listhead = xprt_rec; 3137c478bdstevel@tonic-gate continue; 3147c478bdstevel@tonic-gatecleanup: 3157c478bdstevel@tonic-gate SVC_DESTROY(xprt); 3167c478bdstevel@tonic-gate if (error == RDMA_FAILED) 3177c478bdstevel@tonic-gate error = EPROTONOSUPPORT; 3187c478bdstevel@tonic-gate } 3197c478bdstevel@tonic-gate 3207c478bdstevel@tonic-gate rw_exit(&rdma_lock); 3217c478bdstevel@tonic-gate 3227c478bdstevel@tonic-gate /* 3237c478bdstevel@tonic-gate * Don't return any error even if a single plugin was started 3247c478bdstevel@tonic-gate * successfully. 3257c478bdstevel@tonic-gate */ 3267c478bdstevel@tonic-gate if (started_xprts->rtg_count == 0) 3277c478bdstevel@tonic-gate return (error); 3287c478bdstevel@tonic-gate return (0); 3297c478bdstevel@tonic-gate} 3307c478bdstevel@tonic-gate 3317c478bdstevel@tonic-gate/* 3327c478bdstevel@tonic-gate * Cleanup routine for freeing up memory allocated by 3337c478bdstevel@tonic-gate * svc_rdma_kcreate() 3347c478bdstevel@tonic-gate */ 3357c478bdstevel@tonic-gatevoid 3367c478bdstevel@tonic-gatesvc_rdma_kdestroy(SVCMASTERXPRT *xprt) 3377c478bdstevel@tonic-gate{ 3387c478bdstevel@tonic-gate struct rdma_data *rd = (struct rdma_data *)xprt->xp_p2; 3397c478bdstevel@tonic-gate 3407c478bdstevel@tonic-gate 3417c478bdstevel@tonic-gate mutex_destroy(&xprt->xp_req_lock); 3427c478bdstevel@tonic-gate mutex_destroy(&xprt->xp_thread_lock); 3437c478bdstevel@tonic-gate kmem_free(rd, sizeof (*rd)); 3447c478bdstevel@tonic-gate kmem_free(xprt, sizeof (*xprt)); 3457c478bdstevel@tonic-gate} 3467c478bdstevel@tonic-gate 3477c478bdstevel@tonic-gate 3487c478bdstevel@tonic-gatestatic void 3497c478bdstevel@tonic-gatesvc_rdma_kstart(SVCMASTERXPRT *xprt) 3507c478bdstevel@tonic-gate{ 3517c478bdstevel@tonic-gate struct rdma_svc_data *svcdata; 3527c478bdstevel@tonic-gate rdma_mod_t *rmod; 3537c478bdstevel@tonic-gate 3547c478bdstevel@tonic-gate svcdata = &((struct rdma_data *)xprt->xp_p2)->rd_data; 3557c478bdstevel@tonic-gate rmod = ((struct rdma_data *)xprt->xp_p2)->r_mod; 3567c478bdstevel@tonic-gate 3577c478bdstevel@tonic-gate /* 3587c478bdstevel@tonic-gate * Create a listener for module at this port 3597c478bdstevel@tonic-gate */ 3607c478bdstevel@tonic-gate 36151f34d4Rajkumar Sivaprakasam if (rmod->rdma_count != 0) 36251f34d4Rajkumar Sivaprakasam (*rmod->rdma_ops->rdma_svc_listen)(svcdata); 36351f34d4Rajkumar Sivaprakasam else 36451f34d4Rajkumar Sivaprakasam svcdata->err_code = RDMA_FAILED; 3657c478bdstevel@tonic-gate} 3667c478bdstevel@tonic-gate 3677c478bdstevel@tonic-gatevoid 3687c478bdstevel@tonic-gatesvc_rdma_kstop(SVCMASTERXPRT *xprt) 3697c478bdstevel@tonic-gate{ 3707c478bdstevel@tonic-gate struct rdma_svc_data *svcdata; 3717c478bdstevel@tonic-gate rdma_mod_t *rmod; 3727c478bdstevel@tonic-gate 3737c478bdstevel@tonic-gate svcdata = &((struct rdma_data *)xprt->xp_p2)->rd_data; 3747c478bdstevel@tonic-gate rmod = ((struct rdma_data *)xprt->xp_p2)->r_mod; 3757c478bdstevel@tonic-gate 3767c478bdstevel@tonic-gate /* 37751f34d4Rajkumar Sivaprakasam * Call the stop listener routine for each plugin. If rdma_count is 37851f34d4Rajkumar Sivaprakasam * already zero set active to zero. 3797c478bdstevel@tonic-gate */ 38051f34d4Rajkumar Sivaprakasam if (rmod->rdma_count != 0) 38151f34d4Rajkumar Sivaprakasam (*rmod->rdma_ops->rdma_svc_stop)(svcdata); 38251f34d4Rajkumar Sivaprakasam else 38351f34d4Rajkumar Sivaprakasam svcdata->active = 0; 3847c478bdstevel@tonic-gate if (svcdata->active) 3850a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__kstop); 3867c478bdstevel@tonic-gate} 3877c478bdstevel@tonic-gate 3887c478bdstevel@tonic-gate/* ARGSUSED */ 3897c478bdstevel@tonic-gatestatic void 3907c478bdstevel@tonic-gatesvc_rdma_kclone_destroy(SVCXPRT *clone_xprt) 3917c478bdstevel@tonic-gate{ 39260536efKaren Rochford 39360536efKaren Rochford struct clone_rdma_data *cdrp; 39460536efKaren Rochford cdrp = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 39560536efKaren Rochford 39660536efKaren Rochford /* 39760536efKaren Rochford * Only free buffers and release connection when cloned is set. 39860536efKaren Rochford */ 39960536efKaren Rochford if (cdrp->cloned != TRUE) 40060536efKaren Rochford return; 40160536efKaren Rochford 40260536efKaren Rochford rdma_buf_free(cdrp->conn, &cdrp->rpcbuf); 40360536efKaren Rochford if (cdrp->cl_reply) { 40460536efKaren Rochford clist_free(cdrp->cl_reply); 40560536efKaren Rochford cdrp->cl_reply = NULL; 40660536efKaren Rochford } 40760536efKaren Rochford RDMA_REL_CONN(cdrp->conn); 40860536efKaren Rochford 40960536efKaren Rochford cdrp->cloned = 0; 4107c478bdstevel@tonic-gate} 4117c478bdstevel@tonic-gate 41260536efKaren Rochford/* 41360536efKaren Rochford * Clone the xprt specific information. It will be freed by 41460536efKaren Rochford * SVC_CLONE_DESTROY. 41560536efKaren Rochford */ 41660536efKaren Rochfordstatic void 41760536efKaren Rochfordsvc_rdma_kclone_xprt(SVCXPRT *src_xprt, SVCXPRT *dst_xprt) 41860536efKaren Rochford{ 41960536efKaren Rochford struct clone_rdma_data *srcp2; 42060536efKaren Rochford struct clone_rdma_data *dstp2; 42160536efKaren Rochford 42260536efKaren Rochford srcp2 = (struct clone_rdma_data *)src_xprt->xp_p2buf; 42360536efKaren Rochford dstp2 = (struct clone_rdma_data *)dst_xprt->xp_p2buf; 42460536efKaren Rochford 42560536efKaren Rochford if (srcp2->conn != NULL) { 42660536efKaren Rochford srcp2->cloned = TRUE; 42760536efKaren Rochford *dstp2 = *srcp2; 42860536efKaren Rochford } 42960536efKaren Rochford} 43060536efKaren Rochford 4310a4b081Karen Rochfordstatic void 4320a4b081Karen Rochfordsvc_rdma_ktattrs(SVCXPRT *clone_xprt, int attrflag, void **tattr) 4330a4b081Karen Rochford{ 4340a4b081Karen Rochford CONN *conn; 4350a4b081Karen Rochford *tattr = NULL; 4360a4b081Karen Rochford 4370a4b081Karen Rochford switch (attrflag) { 4380a4b081Karen Rochford case SVC_TATTR_ADDRMASK: 4390a4b081Karen Rochford conn = ((struct clone_rdma_data *)clone_xprt->xp_p2buf)->conn; 4400a4b081Karen Rochford ASSERT(conn != NULL); 4410a4b081Karen Rochford if (conn) 4420a4b081Karen Rochford *tattr = (void *)&conn->c_addrmask; 4430a4b081Karen Rochford } 4440a4b081Karen Rochford} 44560536efKaren Rochford 4467c478bdstevel@tonic-gatestatic bool_t 4477c478bdstevel@tonic-gatesvc_rdma_krecv(SVCXPRT *clone_xprt, mblk_t *mp, struct rpc_msg *msg) 4487c478bdstevel@tonic-gate{ 4490a701b1Robert Gordon XDR *xdrs; 4500a701b1Robert Gordon CONN *conn; 4510a701b1Robert Gordon rdma_recv_data_t *rdp = (rdma_recv_data_t *)mp->b_rptr; 4520a701b1Robert Gordon struct clone_rdma_data *crdp; 4530a701b1Robert Gordon struct clist *cl = NULL; 4540a701b1Robert Gordon struct clist *wcl = NULL; 4550a701b1Robert Gordon struct clist *cllong = NULL; 4560a701b1Robert Gordon 4570a701b1Robert Gordon rdma_stat status; 4580a701b1Robert Gordon uint32_t vers, op, pos, xid; 4590a701b1Robert Gordon uint32_t rdma_credit; 4600a701b1Robert Gordon uint32_t wcl_total_length = 0; 4610a701b1Robert Gordon bool_t wwl = FALSE; 4620a701b1Robert Gordon 4630a701b1Robert Gordon crdp = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 4647c478bdstevel@tonic-gate RSSTAT_INCR(rscalls); 4657c478bdstevel@tonic-gate conn = rdp->conn; 4667c478bdstevel@tonic-gate 4677c478bdstevel@tonic-gate status = rdma_svc_postrecv(conn); 4687c478bdstevel@tonic-gate if (status != RDMA_SUCCESS) { 4690a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__krecv__postrecv); 4700a701b1Robert Gordon goto badrpc_call; 4717c478bdstevel@tonic-gate } 4727c478bdstevel@tonic-gate 4737c478bdstevel@tonic-gate xdrs = &clone_xprt->xp_xdrin; 4747c478bdstevel@tonic-gate xdrmem_create(xdrs, rdp->rpcmsg.addr, rdp->rpcmsg.len, XDR_DECODE); 4757c478bdstevel@tonic-gate xid = *(uint32_t *)rdp->rpcmsg.addr; 4767c478bdstevel@tonic-gate XDR_SETPOS(xdrs, sizeof (uint32_t)); 4770a701b1Robert Gordon 4787c478bdstevel@tonic-gate if (! xdr_u_int(xdrs, &vers) || 4790a701b1Robert Gordon ! xdr_u_int(xdrs, &rdma_credit) || 4807c478bdstevel@tonic-gate ! xdr_u_int(xdrs, &op)) { 4810a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__krecv__uint); 4820a701b1Robert Gordon goto xdr_err; 4837c478bdstevel@tonic-gate } 4840a701b1Robert Gordon 4850a701b1Robert Gordon /* Checking if the status of the recv operation was normal */ 4860a701b1Robert Gordon if (rdp->status != 0) { 4870a701b1Robert Gordon DTRACE_PROBE1(krpc__e__svcrdma__krecv__invalid__status, 4880a701b1Robert Gordon int, rdp->status); 4890a701b1Robert Gordon goto badrpc_call; 4907c478bdstevel@tonic-gate } 4917c478bdstevel@tonic-gate 4927c478bdstevel@tonic-gate if (! xdr_do_clist(xdrs, &cl)) { 4930a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__krecv__do__clist); 4940a701b1Robert Gordon goto xdr_err; 4957c478bdstevel@tonic-gate } 4967c478bdstevel@tonic-gate 4970a701b1Robert Gordon if (!xdr_decode_wlist_svc(xdrs, &wcl, &wwl, &wcl_total_length, conn)) { 4980a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__krecv__decode__wlist); 4990a701b1Robert Gordon if (cl) 5000a701b1Robert Gordon clist_free(cl); 5010a701b1Robert Gordon goto xdr_err; 5020a701b1Robert Gordon } 5030a701b1Robert Gordon crdp->cl_wlist = wcl; 5040a701b1Robert Gordon 5050a701b1Robert Gordon crdp->cl_reply = NULL; 5060a701b1Robert Gordon (void) xdr_decode_reply_wchunk(xdrs, &crdp->cl_reply); 5070a701b1Robert Gordon 5087c478bdstevel@tonic-gate /* 5097c478bdstevel@tonic-gate * A chunk at 0 offset indicates that the RPC call message 5107c478bdstevel@tonic-gate * is in a chunk. Get the RPC call message chunk. 5117c478bdstevel@tonic-gate */ 5127c478bdstevel@tonic-gate if (cl != NULL && op == RDMA_NOMSG) { 5137c478bdstevel@tonic-gate 5147c478bdstevel@tonic-gate /* Remove RPC call message chunk from chunklist */ 5157c478bdstevel@tonic-gate cllong = cl; 5167c478bdstevel@tonic-gate cl = cl->c_next; 5177c478bdstevel@tonic-gate cllong->c_next = NULL; 5187c478bdstevel@tonic-gate 5190a701b1Robert Gordon 5207c478bdstevel@tonic-gate /* Allocate and register memory for the RPC call msg chunk */ 5210a701b1Robert Gordon cllong->rb_longbuf.type = RDMA_LONG_BUFFER; 5220a701b1Robert Gordon cllong->rb_longbuf.len = cllong->c_len > LONG_REPLY_LEN ? 5230a701b1Robert Gordon cllong->c_len : LONG_REPLY_LEN; 5240a701b1Robert Gordon 5250a701b1Robert Gordon if (rdma_buf_alloc(conn, &cllong->rb_longbuf)) { 5267c478bdstevel@tonic-gate clist_free(cllong); 5270a701b1Robert Gordon goto cll_malloc_err; 5287c478bdstevel@tonic-gate } 5290a701b1Robert Gordon 5300a701b1Robert Gordon cllong->u.c_daddr3 = cllong->rb_longbuf.addr; 5310a701b1Robert Gordon 53225c9576Toomas Soome if (cllong->u.c_daddr == 0) { 5330a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__krecv__nomem); 5340a701b1Robert Gordon rdma_buf_free(conn, &cllong->rb_longbuf); 5350a701b1Robert Gordon clist_free(cllong); 5360a701b1Robert Gordon goto cll_malloc_err; 5370a701b1Robert Gordon } 5380a701b1Robert Gordon 5390a701b1Robert Gordon status = clist_register(conn, cllong, CLIST_REG_DST); 5407c478bdstevel@tonic-gate if (status) { 5410a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__krecv__clist__reg); 5420a701b1Robert Gordon rdma_buf_free(conn, &cllong->rb_longbuf); 5437c478bdstevel@tonic-gate clist_free(cllong); 5440a701b1Robert Gordon goto cll_malloc_err; 5457c478bdstevel@tonic-gate } 5467c478bdstevel@tonic-gate 5477c478bdstevel@tonic-gate /* 5487c478bdstevel@tonic-gate * Now read the RPC call message in 5497c478bdstevel@tonic-gate */ 5507c478bdstevel@tonic-gate status = RDMA_READ(conn, cllong, WAIT); 5517c478bdstevel@tonic-gate if (status) { 5520a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__krecv__read); 553f837ee4Siddheshwar Mahesh (void) clist_deregister(conn, cllong); 5540a701b1Robert Gordon rdma_buf_free(conn, &cllong->rb_longbuf); 5557c478bdstevel@tonic-gate clist_free(cllong); 5560a701b1Robert Gordon goto cll_malloc_err; 5577c478bdstevel@tonic-gate } 5587c478bdstevel@tonic-gate 5590a701b1Robert Gordon status = clist_syncmem(conn, cllong, CLIST_REG_DST); 560f837ee4Siddheshwar Mahesh (void) clist_deregister(conn, cllong); 5617c478bdstevel@tonic-gate 5620a701b1Robert Gordon xdrrdma_create(xdrs, (caddr_t)(uintptr_t)cllong->u.c_daddr3, 5637c478bdstevel@tonic-gate cllong->c_len, 0, cl, XDR_DECODE, conn); 5647c478bdstevel@tonic-gate 5650a701b1Robert Gordon crdp->rpcbuf = cllong->rb_longbuf; 5660a701b1Robert Gordon crdp->rpcbuf.len = cllong->c_len; 5677c478bdstevel@tonic-gate clist_free(cllong); 5687c478bdstevel@tonic-gate RDMA_BUF_FREE(conn, &rdp->rpcmsg); 5697c478bdstevel@tonic-gate } else { 5707c478bdstevel@tonic-gate pos = XDR_GETPOS(xdrs); 5717c478bdstevel@tonic-gate xdrrdma_create(xdrs, rdp->rpcmsg.addr + pos, 5720a701b1Robert Gordon rdp->rpcmsg.len - pos, 0, cl, XDR_DECODE, conn); 5730a701b1Robert Gordon crdp->rpcbuf = rdp->rpcmsg; 5740a701b1Robert Gordon 5750a701b1Robert Gordon /* Use xdrrdmablk_ops to indicate there is a read chunk list */ 5760a701b1Robert Gordon if (cl != NULL) { 5770a701b1Robert Gordon int32_t flg = XDR_RDMA_RLIST_REG; 5780a701b1Robert Gordon 5790a701b1Robert Gordon XDR_CONTROL(xdrs, XDR_RDMA_SET_FLAGS, &flg); 5800a701b1Robert Gordon xdrs->x_ops = &xdrrdmablk_ops; 5810a701b1Robert Gordon } 5827c478bdstevel@tonic-gate } 5830a701b1Robert Gordon 5840a701b1Robert Gordon if (crdp->cl_wlist) { 5850a701b1Robert Gordon int32_t flg = XDR_RDMA_WLIST_REG; 5860a701b1Robert Gordon 5870a701b1Robert Gordon XDR_CONTROL(xdrs, XDR_RDMA_SET_WLIST, crdp->cl_wlist); 5880a701b1Robert Gordon XDR_CONTROL(xdrs, XDR_RDMA_SET_FLAGS, &flg); 5890a701b1Robert Gordon } 5900a701b1Robert Gordon 5917c478bdstevel@tonic-gate if (! xdr_callmsg(xdrs, msg)) { 5920a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__krecv__callmsg); 5937c478bdstevel@tonic-gate RSSTAT_INCR(rsxdrcall); 5940a701b1Robert Gordon goto callmsg_err; 5957c478bdstevel@tonic-gate } 5967c478bdstevel@tonic-gate 5977c478bdstevel@tonic-gate /* 5987c478bdstevel@tonic-gate * Point the remote transport address in the service_transport 5997c478bdstevel@tonic-gate * handle at the address in the request. 6007c478bdstevel@tonic-gate */ 6017c478bdstevel@tonic-gate clone_xprt->xp_rtaddr.buf = conn->c_raddr.buf; 6027c478bdstevel@tonic-gate clone_xprt->xp_rtaddr.len = conn->c_raddr.len; 6037c478bdstevel@tonic-gate clone_xprt->xp_rtaddr.maxlen = conn->c_raddr.len; 6047523befSiddheshwar Mahesh 6057523befSiddheshwar Mahesh clone_xprt->xp_lcladdr.buf = conn->c_laddr.buf; 6067523befSiddheshwar Mahesh clone_xprt->xp_lcladdr.len = conn->c_laddr.len; 6077523befSiddheshwar Mahesh clone_xprt->xp_lcladdr.maxlen = conn->c_laddr.len; 6087523befSiddheshwar Mahesh 6097523befSiddheshwar Mahesh /* 6107523befSiddheshwar Mahesh * In case of RDMA, connection management is 6117523befSiddheshwar Mahesh * entirely done in rpcib module and netid in the 6127523befSiddheshwar Mahesh * SVCMASTERXPRT is NULL. Initialize the clone netid 6137523befSiddheshwar Mahesh * from the connection. 6147523befSiddheshwar Mahesh */ 6157523befSiddheshwar Mahesh 6167523befSiddheshwar Mahesh clone_xprt->xp_netid = conn->c_netid; 6177523befSiddheshwar Mahesh 6187c478bdstevel@tonic-gate clone_xprt->xp_xid = xid; 6190a701b1Robert Gordon crdp->conn = conn; 6200a701b1Robert Gordon 6217c478bdstevel@tonic-gate freeb(mp); 6220a701b1Robert Gordon 6237c478bdstevel@tonic-gate return (TRUE); 6240a701b1Robert Gordon 6250a701b1Robert Gordoncallmsg_err: 6260a701b1Robert Gordon rdma_buf_free(conn, &crdp->rpcbuf); 6270a701b1Robert Gordon 6280a701b1Robert Gordoncll_malloc_err: 6290a701b1Robert Gordon if (cl) 6300a701b1Robert Gordon clist_free(cl); 6310a701b1Robert Gordonxdr_err: 6320a701b1Robert Gordon XDR_DESTROY(xdrs); 6330a701b1Robert Gordon 6340a701b1Robert Gordonbadrpc_call: 6350a701b1Robert Gordon RDMA_BUF_FREE(conn, &rdp->rpcmsg); 6360a701b1Robert Gordon RDMA_REL_CONN(conn); 6370a701b1Robert Gordon freeb(mp); 6380a701b1Robert Gordon RSSTAT_INCR(rsbadcalls); 6390a701b1Robert Gordon return (FALSE); 6400a701b1Robert Gordon} 6410a701b1Robert Gordon 6420a701b1Robert Gordonstatic int 6430a701b1Robert Gordonsvc_process_long_reply(SVCXPRT * clone_xprt, 6440a701b1Robert Gordon xdrproc_t xdr_results, caddr_t xdr_location, 6450a701b1Robert Gordon struct rpc_msg *msg, bool_t has_args, int *msglen, 6460a701b1Robert Gordon int *freelen, int *numchunks, unsigned int *final_len) 6470a701b1Robert Gordon{ 6480a701b1Robert Gordon int status; 6490a701b1Robert Gordon XDR xdrslong; 6500a701b1Robert Gordon struct clist *wcl = NULL; 6510a701b1Robert Gordon int count = 0; 6520a701b1Robert Gordon int alloc_len; 6530a701b1Robert Gordon char *memp; 6540a701b1Robert Gordon rdma_buf_t long_rpc = {0}; 6550a701b1Robert Gordon struct clone_rdma_data *crdp; 6560a701b1Robert Gordon 6570a701b1Robert Gordon crdp = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 6580a701b1Robert Gordon 6590a701b1Robert Gordon bzero(&xdrslong, sizeof (xdrslong)); 6600a701b1Robert Gordon 6610a701b1Robert Gordon /* Choose a size for the long rpc response */ 6620a701b1Robert Gordon if (MSG_IS_RPCSEC_GSS(msg)) { 6630a701b1Robert Gordon alloc_len = RNDUP(MAX_AUTH_BYTES + *msglen); 6640a701b1Robert Gordon } else { 6650a701b1Robert Gordon alloc_len = RNDUP(*msglen); 6660a701b1Robert Gordon } 6670a701b1Robert Gordon 6680a701b1Robert Gordon if (alloc_len <= 64 * 1024) { 6690a701b1Robert Gordon if (alloc_len > 32 * 1024) { 6700a701b1Robert Gordon alloc_len = 64 * 1024; 6710a701b1Robert Gordon } else { 6720a701b1Robert Gordon if (alloc_len > 16 * 1024) { 6730a701b1Robert Gordon alloc_len = 32 * 1024; 6740a701b1Robert Gordon } else { 6750a701b1Robert Gordon alloc_len = 16 * 1024; 6760a701b1Robert Gordon } 6770a701b1Robert Gordon } 6780a701b1Robert Gordon } 6790a701b1Robert Gordon 6800a701b1Robert Gordon long_rpc.type = RDMA_LONG_BUFFER; 6810a701b1Robert Gordon long_rpc.len = alloc_len; 6820a701b1Robert Gordon if (rdma_buf_alloc(crdp->conn, &long_rpc)) { 6830a701b1Robert Gordon return (SVC_RDMA_FAIL); 6840a701b1Robert Gordon } 6850a701b1Robert Gordon 6860a701b1Robert Gordon memp = long_rpc.addr; 6870a701b1Robert Gordon xdrmem_create(&xdrslong, memp, alloc_len, XDR_ENCODE); 6880a701b1Robert Gordon 6890a701b1Robert Gordon msg->rm_xid = clone_xprt->xp_xid; 6900a701b1Robert Gordon 6910a701b1Robert Gordon if (!(xdr_replymsg(&xdrslong, msg) && 6920a701b1Robert Gordon (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, &xdrslong, 6930a701b1Robert Gordon xdr_results, xdr_location)))) { 6940a701b1Robert Gordon rdma_buf_free(crdp->conn, &long_rpc); 6950a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__longrep__authwrap); 6960a701b1Robert Gordon return (SVC_RDMA_FAIL); 6970a701b1Robert Gordon } 6980a701b1Robert Gordon 6990a701b1Robert Gordon *final_len = XDR_GETPOS(&xdrslong); 7000a701b1Robert Gordon 701f837ee4Siddheshwar Mahesh DTRACE_PROBE1(krpc__i__replylen, uint_t, *final_len); 7020a701b1Robert Gordon *numchunks = 0; 7030a701b1Robert Gordon *freelen = 0; 7040a701b1Robert Gordon 7050a701b1Robert Gordon wcl = crdp->cl_reply; 7060a701b1Robert Gordon wcl->rb_longbuf = long_rpc; 7070a701b1Robert Gordon 7080a701b1Robert Gordon count = *final_len; 709f837ee4Siddheshwar Mahesh while ((wcl != NULL) && (count > 0)) { 710f837ee4Siddheshwar Mahesh 7110a701b1Robert Gordon if (wcl->c_dmemhandle.mrc_rmr == 0) 7120a701b1Robert Gordon break; 7130a701b1Robert Gordon 714f837ee4Siddheshwar Mahesh DTRACE_PROBE2(krpc__i__write__chunks, uint32_t, count, 715f837ee4Siddheshwar Mahesh uint32_t, wcl->c_len); 716f837ee4Siddheshwar Mahesh 7170a701b1Robert Gordon if (wcl->c_len > count) { 7180a701b1Robert Gordon wcl->c_len = count; 7190a701b1Robert Gordon } 7200a701b1Robert Gordon wcl->w.c_saddr3 = (caddr_t)memp; 7210a701b1Robert Gordon 7220a701b1Robert Gordon count -= wcl->c_len; 7230a701b1Robert Gordon *numchunks += 1; 7240a701b1Robert Gordon memp += wcl->c_len; 7250a701b1Robert Gordon wcl = wcl->c_next; 7260a701b1Robert Gordon } 7270a701b1Robert Gordon 728f837ee4Siddheshwar Mahesh /* 729f837ee4Siddheshwar Mahesh * Make rest of the chunks 0-len 730f837ee4Siddheshwar Mahesh */ 731f837ee4Siddheshwar Mahesh while (wcl != NULL) { 732f837ee4Siddheshwar Mahesh if (wcl->c_dmemhandle.mrc_rmr == 0) 733f837ee4Siddheshwar Mahesh break; 734f837ee4Siddheshwar Mahesh wcl->c_len = 0; 735f837ee4Siddheshwar Mahesh wcl = wcl->c_next; 736f837ee4Siddheshwar Mahesh } 737f837ee4Siddheshwar Mahesh 7380a701b1Robert Gordon wcl = crdp->cl_reply; 7390a701b1Robert Gordon 7400a701b1Robert Gordon /* 7410a701b1Robert Gordon * MUST fail if there are still more data 7420a701b1Robert Gordon */ 7430a701b1Robert Gordon if (count > 0) { 7440a701b1Robert Gordon rdma_buf_free(crdp->conn, &long_rpc); 7450a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__longrep__dlen__clist); 7460a701b1Robert Gordon return (SVC_RDMA_FAIL); 7470a701b1Robert Gordon } 7480a701b1Robert Gordon 7490a701b1Robert Gordon if (clist_register(crdp->conn, wcl, CLIST_REG_SOURCE) != RDMA_SUCCESS) { 7500a701b1Robert Gordon rdma_buf_free(crdp->conn, &long_rpc); 7510a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__longrep__clistreg); 7520a701b1Robert Gordon return (SVC_RDMA_FAIL); 7530a701b1Robert Gordon } 7540a701b1Robert Gordon 7550a701b1Robert Gordon status = clist_syncmem(crdp->conn, wcl, CLIST_REG_SOURCE); 7560a701b1Robert Gordon 7570a701b1Robert Gordon if (status) { 758f837ee4Siddheshwar Mahesh (void) clist_deregister(crdp->conn, wcl); 7590a701b1Robert Gordon rdma_buf_free(crdp->conn, &long_rpc); 7600a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__longrep__syncmem); 7610a701b1Robert Gordon return (SVC_RDMA_FAIL); 7620a701b1Robert Gordon } 7630a701b1Robert Gordon 7640a701b1Robert Gordon status = RDMA_WRITE(crdp->conn, wcl, WAIT); 7650a701b1Robert Gordon 766f837ee4Siddheshwar Mahesh (void) clist_deregister(crdp->conn, wcl); 7670a701b1Robert Gordon rdma_buf_free(crdp->conn, &wcl->rb_longbuf); 7680a701b1Robert Gordon 7690a701b1Robert Gordon if (status != RDMA_SUCCESS) { 7700a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__longrep__write); 7710a701b1Robert Gordon return (SVC_RDMA_FAIL); 7720a701b1Robert Gordon } 7730a701b1Robert Gordon 7740a701b1Robert Gordon return (SVC_RDMA_SUCCESS); 7750a701b1Robert Gordon} 7760a701b1Robert Gordon 7770a701b1Robert Gordon 7780a701b1Robert Gordonstatic int 7790a701b1Robert Gordonsvc_compose_rpcmsg(SVCXPRT * clone_xprt, CONN * conn, xdrproc_t xdr_results, 7800a701b1Robert Gordon caddr_t xdr_location, rdma_buf_t *rpcreply, XDR ** xdrs, 7810a701b1Robert Gordon struct rpc_msg *msg, bool_t has_args, uint_t *len) 7820a701b1Robert Gordon{ 7830a701b1Robert Gordon /* 7840a701b1Robert Gordon * Get a pre-allocated buffer for rpc reply 7850a701b1Robert Gordon */ 7860a701b1Robert Gordon rpcreply->type = SEND_BUFFER; 7870a701b1Robert Gordon if (rdma_buf_alloc(conn, rpcreply)) { 7880a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__rpcmsg__reply__nofreebufs); 7890a701b1Robert Gordon return (SVC_RDMA_FAIL); 7900a701b1Robert Gordon } 7910a701b1Robert Gordon 7920a701b1Robert Gordon xdrrdma_create(*xdrs, rpcreply->addr, rpcreply->len, 7930a701b1Robert Gordon 0, NULL, XDR_ENCODE, conn); 7940a701b1Robert Gordon 7950a701b1Robert Gordon msg->rm_xid = clone_xprt->xp_xid; 7960a701b1Robert Gordon 7970a701b1Robert Gordon if (has_args) { 7980a701b1Robert Gordon if (!(xdr_replymsg(*xdrs, msg) && 7990a701b1Robert Gordon (!has_args || 8000a701b1Robert Gordon SVCAUTH_WRAP(&clone_xprt->xp_auth, *xdrs, 8010a701b1Robert Gordon xdr_results, xdr_location)))) { 8020a701b1Robert Gordon rdma_buf_free(conn, rpcreply); 8030a701b1Robert Gordon DTRACE_PROBE( 8040a701b1Robert Gordon krpc__e__svcrdma__rpcmsg__reply__authwrap1); 8050a701b1Robert Gordon return (SVC_RDMA_FAIL); 8060a701b1Robert Gordon } 8070a701b1Robert Gordon } else { 8080a701b1Robert Gordon if (!xdr_replymsg(*xdrs, msg)) { 8090a701b1Robert Gordon rdma_buf_free(conn, rpcreply); 8100a701b1Robert Gordon DTRACE_PROBE( 8110a701b1Robert Gordon krpc__e__svcrdma__rpcmsg__reply__authwrap2); 8120a701b1Robert Gordon return (SVC_RDMA_FAIL); 8130a701b1Robert Gordon } 8140a701b1Robert Gordon } 8150a701b1Robert Gordon 8160a701b1Robert Gordon *len = XDR_GETPOS(*xdrs); 8170a701b1Robert Gordon 8180a701b1Robert Gordon return (SVC_RDMA_SUCCESS); 8197c478bdstevel@tonic-gate} 8207c478bdstevel@tonic-gate 8217c478bdstevel@tonic-gate/* 8227c478bdstevel@tonic-gate * Send rpc reply. 8237c478bdstevel@tonic-gate */ 8247c478bdstevel@tonic-gatestatic bool_t 8250a701b1Robert Gordonsvc_rdma_ksend(SVCXPRT * clone_xprt, struct rpc_msg *msg) 8267c478bdstevel@tonic-gate{ 8270a701b1Robert Gordon XDR *xdrs_rpc = &(clone_xprt->xp_xdrout); 8280a701b1Robert Gordon XDR xdrs_rhdr; 8290a701b1Robert Gordon CONN *conn = NULL; 8300a701b1Robert Gordon rdma_buf_t rbuf_resp = {0}, rbuf_rpc_resp = {0}; 8310a701b1Robert Gordon 8320a701b1Robert Gordon struct clone_rdma_data *crdp; 8330a701b1Robert Gordon struct clist *cl_read = NULL; 8340a701b1Robert Gordon struct clist *cl_send = NULL; 8350a701b1Robert Gordon struct clist *cl_write = NULL; 8360a701b1Robert Gordon xdrproc_t xdr_results; /* results XDR encoding function */ 8370a701b1Robert Gordon caddr_t xdr_location; /* response results pointer */ 8380a701b1Robert Gordon 8397c478bdstevel@tonic-gate int retval = FALSE; 8400a701b1Robert Gordon int status, msglen, num_wreply_segments = 0; 8410a701b1Robert Gordon uint32_t rdma_credit = 0; 8420a701b1Robert Gordon int freelen = 0; 8430a701b1Robert Gordon bool_t has_args; 8440a701b1Robert Gordon uint_t final_resp_len, rdma_response_op, vers; 8457c478bdstevel@tonic-gate 8460a701b1Robert Gordon bzero(&xdrs_rhdr, sizeof (XDR)); 8470a701b1Robert Gordon crdp = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 8480a701b1Robert Gordon conn = crdp->conn; 8497c478bdstevel@tonic-gate 8507c478bdstevel@tonic-gate /* 8517c478bdstevel@tonic-gate * If there is a result procedure specified in the reply message, 8527c478bdstevel@tonic-gate * it will be processed in the xdr_replymsg and SVCAUTH_WRAP. 8537c478bdstevel@tonic-gate * We need to make sure it won't be processed twice, so we null 8547c478bdstevel@tonic-gate * it for xdr_replymsg here. 8557c478bdstevel@tonic-gate */ 8567c478bdstevel@tonic-gate has_args = FALSE; 8577c478bdstevel@tonic-gate if (msg->rm_reply.rp_stat == MSG_ACCEPTED && 8587c478bdstevel@tonic-gate msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { 8597c478bdstevel@tonic-gate if ((xdr_results = msg->acpted_rply.ar_results.proc) != NULL) { 8607c478bdstevel@tonic-gate has_args = TRUE; 8617c478bdstevel@tonic-gate xdr_location = msg->acpted_rply.ar_results.where; 8627c478bdstevel@tonic-gate msg->acpted_rply.ar_results.proc = xdr_void; 8637c478bdstevel@tonic-gate msg->acpted_rply.ar_results.where = NULL; 8647c478bdstevel@tonic-gate } 8657c478bdstevel@tonic-gate } 8667c478bdstevel@tonic-gate 8677c478bdstevel@tonic-gate /* 8680a701b1Robert Gordon * Given the limit on the inline response size (RPC_MSG_SZ), 8690a701b1Robert Gordon * there is a need to make a guess as to the overall size of 8700a701b1Robert Gordon * the response. If the resultant size is beyond the inline 8710a701b1Robert Gordon * size, then the server needs to use the "reply chunk list" 8720a701b1Robert Gordon * provided by the client (if the client provided one). An 8730a701b1Robert Gordon * example of this type of response would be a READDIR 8740a701b1Robert Gordon * response (e.g. a small directory read would fit in RPC_MSG_SZ 8750a701b1Robert Gordon * and that is the preference but it may not fit) 8760a701b1Robert Gordon * 8770a701b1Robert Gordon * Combine the encoded size and the size of the true results 8780a701b1Robert Gordon * and then make the decision about where to encode and send results. 8790a701b1Robert Gordon * 8800a701b1Robert Gordon * One important note, this calculation is ignoring the size 8810a701b1Robert Gordon * of the encoding of the authentication overhead. The reason 8820a701b1Robert Gordon * for this is rooted in the complexities of access to the 8830a701b1Robert Gordon * encoded size of RPCSEC_GSS related authentiation, 8840a701b1Robert Gordon * integrity, and privacy. 8850a701b1Robert Gordon * 8860a701b1Robert Gordon * If it turns out that the encoded authentication bumps the 8870a701b1Robert Gordon * response over the RPC_MSG_SZ limit, then it may need to 8880a701b1Robert Gordon * attempt to encode for the reply chunk list. 8890a701b1Robert Gordon */ 8900a701b1Robert Gordon 8910a701b1Robert Gordon /* 8920a701b1Robert Gordon * Calculating the "sizeof" the RPC response header and the 8930a701b1Robert Gordon * encoded results. 8947c478bdstevel@tonic-gate */ 8957c478bdstevel@tonic-gate msglen = xdr_sizeof(xdr_replymsg, msg); 8960a701b1Robert Gordon 8970a701b1Robert Gordon if (msglen > 0) { 8980a701b1Robert Gordon RSSTAT_INCR(rstotalreplies); 8990a701b1Robert Gordon } 9000a701b1Robert Gordon if (has_args) 9017c478bdstevel@tonic-gate msglen += xdrrdma_sizeof(xdr_results, xdr_location, 9020a701b1Robert Gordon rdma_minchunk, NULL, NULL); 9037c478bdstevel@tonic-gate 9040a701b1Robert Gordon DTRACE_PROBE1(krpc__i__svcrdma__ksend__msglen, int, msglen); 9057c478bdstevel@tonic-gate 9060a701b1Robert Gordon status = SVC_RDMA_SUCCESS; 9077c478bdstevel@tonic-gate 9080a701b1Robert Gordon if (msglen < RPC_MSG_SZ) { 9097c478bdstevel@tonic-gate /* 9100a701b1Robert Gordon * Looks like the response will fit in the inline 9110a701b1Robert Gordon * response; let's try 9127c478bdstevel@tonic-gate */ 9130a701b1Robert Gordon RSSTAT_INCR(rstotalinlinereplies); 9147c478bdstevel@tonic-gate 9150a701b1Robert Gordon rdma_response_op = RDMA_MSG; 9167c478bdstevel@tonic-gate 9170a701b1Robert Gordon status = svc_compose_rpcmsg(clone_xprt, conn, xdr_results, 9180a701b1Robert Gordon xdr_location, &rbuf_rpc_resp, &xdrs_rpc, msg, 9190a701b1Robert Gordon has_args, &final_resp_len); 9200a701b1Robert Gordon 9210a701b1Robert Gordon DTRACE_PROBE1(krpc__i__srdma__ksend__compose_status, 9220a701b1Robert Gordon int, status); 9230a701b1Robert Gordon DTRACE_PROBE1(krpc__i__srdma__ksend__compose_len, 9240a701b1Robert Gordon int, final_resp_len); 9250a701b1Robert Gordon 9260a701b1Robert Gordon if (status == SVC_RDMA_SUCCESS && crdp->cl_reply) { 9270a701b1Robert Gordon clist_free(crdp->cl_reply); 9280a701b1Robert Gordon crdp->cl_reply = NULL; 9297c478bdstevel@tonic-gate } 9300a701b1Robert Gordon } 9317c478bdstevel@tonic-gate 9320a701b1Robert Gordon /* 9330a701b1Robert Gordon * If the encode failed (size?) or the message really is 9340a701b1Robert Gordon * larger than what is allowed, try the response chunk list. 9350a701b1Robert Gordon */ 9360a701b1Robert Gordon if (status != SVC_RDMA_SUCCESS || msglen >= RPC_MSG_SZ) { 9377c478bdstevel@tonic-gate /* 9380a701b1Robert Gordon * attempting to use a reply chunk list when there 9390a701b1Robert Gordon * isn't one won't get very far... 9407c478bdstevel@tonic-gate */ 9410a701b1Robert Gordon if (crdp->cl_reply == NULL) { 9420a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__ksend__noreplycl); 9430a701b1Robert Gordon goto out; 9447c478bdstevel@tonic-gate } 9457c478bdstevel@tonic-gate 9460a701b1Robert Gordon RSSTAT_INCR(rstotallongreplies); 9477c478bdstevel@tonic-gate 9480a701b1Robert Gordon msglen = xdr_sizeof(xdr_replymsg, msg); 9490a701b1Robert Gordon msglen += xdrrdma_sizeof(xdr_results, xdr_location, 0, 9500a701b1Robert Gordon NULL, NULL); 9517c478bdstevel@tonic-gate 9520a701b1Robert Gordon status = svc_process_long_reply(clone_xprt, xdr_results, 9530a701b1Robert Gordon xdr_location, msg, has_args, &msglen, &freelen, 9540a701b1Robert Gordon &num_wreply_segments, &final_resp_len); 9557c478bdstevel@tonic-gate 9560a701b1Robert Gordon DTRACE_PROBE1(krpc__i__svcrdma__ksend__longreplen, 9570a701b1Robert Gordon int, final_resp_len); 9580a701b1Robert Gordon 9590a701b1Robert Gordon if (status != SVC_RDMA_SUCCESS) { 9600a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__ksend__compose__failed); 9617c478bdstevel@tonic-gate goto out; 9627c478bdstevel@tonic-gate } 9630a701b1Robert Gordon 9640a701b1Robert Gordon rdma_response_op = RDMA_NOMSG; 9657c478bdstevel@tonic-gate } 9667c478bdstevel@tonic-gate 9670a701b1Robert Gordon DTRACE_PROBE1(krpc__i__svcrdma__ksend__rdmamsg__len, 9680a701b1Robert Gordon int, final_resp_len); 9690a701b1Robert Gordon 9700a701b1Robert Gordon rbuf_resp.type = SEND_BUFFER; 9710a701b1Robert Gordon if (rdma_buf_alloc(conn, &rbuf_resp)) { 9720a701b1Robert Gordon rdma_buf_free(conn, &rbuf_rpc_resp); 9730a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__ksend__nofreebufs); 9747c478bdstevel@tonic-gate goto out; 9757c478bdstevel@tonic-gate } 9767c478bdstevel@tonic-gate 9770a701b1Robert Gordon rdma_credit = rdma_bufs_granted; 9780a701b1Robert Gordon 9790a701b1Robert Gordon vers = RPCRDMA_VERS; 9800a701b1Robert Gordon xdrmem_create(&xdrs_rhdr, rbuf_resp.addr, rbuf_resp.len, XDR_ENCODE); 9810a701b1Robert Gordon (*(uint32_t *)rbuf_resp.addr) = msg->rm_xid; 9820a701b1Robert Gordon /* Skip xid and set the xdr position accordingly. */ 9830a701b1Robert Gordon XDR_SETPOS(&xdrs_rhdr, sizeof (uint32_t)); 9840a701b1Robert Gordon if (!xdr_u_int(&xdrs_rhdr, &vers) || 9850a701b1Robert Gordon !xdr_u_int(&xdrs_rhdr, &rdma_credit) || 9860a701b1Robert Gordon !xdr_u_int(&xdrs_rhdr, &rdma_response_op)) { 9870a701b1Robert Gordon rdma_buf_free(conn, &rbuf_rpc_resp); 9880a701b1Robert Gordon rdma_buf_free(conn, &rbuf_resp); 9890a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__ksend__uint); 9900a701b1Robert Gordon goto out; 9917c478bdstevel@tonic-gate } 9927c478bdstevel@tonic-gate 9937c478bdstevel@tonic-gate /* 9940a701b1Robert Gordon * Now XDR the read chunk list, actually always NULL 9957c478bdstevel@tonic-gate */ 9960a701b1Robert Gordon (void) xdr_encode_rlist_svc(&xdrs_rhdr, cl_read); 9970a701b1Robert Gordon 9987c478bdstevel@tonic-gate /* 9990a701b1Robert Gordon * encode write list -- we already drove RDMA_WRITEs 10007c478bdstevel@tonic-gate */ 10010a701b1Robert Gordon cl_write = crdp->cl_wlist; 10020a701b1Robert Gordon if (!xdr_encode_wlist(&xdrs_rhdr, cl_write)) { 10030a701b1Robert Gordon DTRACE_PROBE(krpc__e__svcrdma__ksend__enc__wlist); 10040a701b1Robert Gordon rdma_buf_free(conn, &rbuf_rpc_resp); 10050a701b1Robert Gordon rdma_buf_free(conn, &rbuf_resp); 10067c478bdstevel@tonic-gate goto out; 10077c478bdstevel@tonic-gate } 10087c478bdstevel@tonic-gate 10097c478bdstevel@tonic-gate /* 10100a701b1Robert Gordon * XDR encode the RDMA_REPLY write chunk 10117c478bdstevel@tonic-gate */ 10120a701b1Robert Gordon if (!xdr_encode_reply_wchunk(&xdrs_rhdr, crdp->cl_reply, 10130a701b1Robert Gordon num_wreply_segments)) { 10140a701b1Robert Gordon rdma_buf_free(conn, &rbuf_rpc_resp); 10150a701b1Robert Gordon rdma_buf_free(conn, &rbuf_resp); 10160a701b1Robert Gordon goto out; 10170a701b1Robert Gordon } 10187c478bdstevel@tonic-gate 10190a701b1Robert Gordon clist_add(&cl_send, 0, XDR_GETPOS(&xdrs_rhdr), &rbuf_resp.handle, 10200a701b1Robert Gordon rbuf_resp.addr, NULL, NULL); 10217c478bdstevel@tonic-gate 10220a701b1Robert Gordon if (rdma_response_op == RDMA_MSG) { 10230a701b1Robert Gordon clist_add(&cl_send, 0, final_resp_len, &rbuf_rpc_resp.handle, 10240a701b1Robert Gordon rbuf_rpc_resp.addr, NULL, NULL); 10257c478bdstevel@tonic-gate } 10267c478bdstevel@tonic-gate 10270a701b1Robert Gordon status = RDMA_SEND(conn, cl_send, msg->rm_xid); 10287c478bdstevel@tonic-gate 10290a701b1Robert Gordon if (status == RDMA_SUCCESS) { 10300a701b1Robert Gordon retval = TRUE; 10317c478bdstevel@tonic-gate } 10327c478bdstevel@tonic-gate 10337c478bdstevel@tonic-gateout: 10347c478bdstevel@tonic-gate /* 10357c478bdstevel@tonic-gate * Free up sendlist chunks 10367c478bdstevel@tonic-gate */ 10370a701b1Robert Gordon if (cl_send != NULL) 10380a701b1Robert Gordon clist_free(cl_send); 10397c478bdstevel@tonic-gate 10407c478bdstevel@tonic-gate /* 10417c478bdstevel@tonic-gate * Destroy private data for xdr rdma 10427c478bdstevel@tonic-gate */ 10430a701b1Robert Gordon if (clone_xprt->xp_xdrout.x_ops != NULL) { 10440a701b1Robert Gordon XDR_DESTROY(&(clone_xprt->xp_xdrout)); 10450a701b1Robert Gordon } 10460a701b1Robert Gordon 10470a701b1Robert Gordon if (crdp->cl_reply) { 10480a701b1Robert Gordon clist_free(crdp->cl_reply); 10490a701b1Robert Gordon crdp->cl_reply = NULL; 10500a701b1Robert Gordon } 10517c478bdstevel@tonic-gate 10527c478bdstevel@tonic-gate /* 10537c478bdstevel@tonic-gate * This is completely disgusting. If public is set it is 10547c478bdstevel@tonic-gate * a pointer to a structure whose first field is the address 10557c478bdstevel@tonic-gate * of the function to free that structure and any related 10567c478bdstevel@tonic-gate * stuff. (see rrokfree in nfs_xdr.c). 10577c478bdstevel@tonic-gate */ 10580a701b1Robert Gordon if (xdrs_rpc->x_public) { 10597c478bdstevel@tonic-gate /* LINTED pointer alignment */ 10600a701b1Robert Gordon (**((int (**)()) xdrs_rpc->x_public)) (xdrs_rpc->x_public); 10610a701b1Robert Gordon } 10620a701b1Robert Gordon 10630a701b1Robert Gordon if (xdrs_rhdr.x_ops != NULL) { 10640a701b1Robert Gordon XDR_DESTROY(&xdrs_rhdr); 10657c478bdstevel@tonic-gate } 10667c478bdstevel@tonic-gate 10677c478bdstevel@tonic-gate return (retval); 10687c478bdstevel@tonic-gate} 10697c478bdstevel@tonic-gate 10707c478bdstevel@tonic-gate/* 10717c478bdstevel@tonic-gate * Deserialize arguments. 10727c478bdstevel@tonic-gate */ 10737c478bdstevel@tonic-gatestatic bool_t 10747c478bdstevel@tonic-gatesvc_rdma_kgetargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, caddr_t args_ptr) 10757c478bdstevel@tonic-gate{ 10767c478bdstevel@tonic-gate if ((SVCAUTH_UNWRAP(&clone_xprt->xp_auth, &clone_xprt->xp_xdrin, 10777c478bdstevel@tonic-gate xdr_args, args_ptr)) != TRUE) 10787c478bdstevel@tonic-gate return (FALSE); 10797c478bdstevel@tonic-gate return (TRUE); 10807c478bdstevel@tonic-gate} 10817c478bdstevel@tonic-gate 10827c478bdstevel@tonic-gatestatic bool_t 10837c478bdstevel@tonic-gatesvc_rdma_kfreeargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 10847c478bdstevel@tonic-gate caddr_t args_ptr) 10857c478bdstevel@tonic-gate{ 10860a701b1Robert Gordon struct clone_rdma_data *crdp; 10877c478bdstevel@tonic-gate bool_t retval; 10887c478bdstevel@tonic-gate 108960536efKaren Rochford /* 109060536efKaren Rochford * If the cloned bit is true, then this transport specific 109160536efKaren Rochford * rmda data has been duplicated into another cloned xprt. Do 109260536efKaren Rochford * not free, or release the connection, it is still in use. The 109360536efKaren Rochford * buffers will be freed and the connection released later by 109460536efKaren Rochford * SVC_CLONE_DESTROY(). 109560536efKaren Rochford */ 10960a701b1Robert Gordon crdp = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 109760536efKaren Rochford if (crdp->cloned == TRUE) { 109860536efKaren Rochford crdp->cloned = 0; 109960536efKaren Rochford return (TRUE); 110060536efKaren Rochford } 11010a701b1Robert Gordon 11020a701b1Robert Gordon /* 11030a701b1Robert Gordon * Free the args if needed then XDR_DESTROY 11040a701b1Robert Gordon */ 11057c478bdstevel@tonic-gate if (args_ptr) { 11067c478bdstevel@tonic-gate XDR *xdrs = &clone_xprt->xp_xdrin; 11077c478bdstevel@tonic-gate 11087c478bdstevel@tonic-gate xdrs->x_op = XDR_FREE; 11097c478bdstevel@tonic-gate retval = (*xdr_args)(xdrs, args_ptr); 11107c478bdstevel@tonic-gate } 11110a701b1Robert Gordon 11127c478bdstevel@tonic-gate XDR_DESTROY(&(clone_xprt->xp_xdrin)); 11130a701b1Robert Gordon rdma_buf_free(crdp->conn, &crdp->rpcbuf); 11140a701b1Robert Gordon if (crdp->cl_reply) { 11150a701b1Robert Gordon clist_free(crdp->cl_reply); 11160a701b1Robert Gordon crdp->cl_reply = NULL; 11170a701b1Robert Gordon } 11180a701b1Robert Gordon RDMA_REL_CONN(crdp->conn); 11190a701b1Robert Gordon 11207c478bdstevel@tonic-gate return (retval); 11217c478bdstevel@tonic-gate} 11227c478bdstevel@tonic-gate 11237c478bdstevel@tonic-gate/* ARGSUSED */ 11247c478bdstevel@tonic-gatestatic int32_t * 11257c478bdstevel@tonic-gatesvc_rdma_kgetres(SVCXPRT *clone_xprt, int size) 11267c478bdstevel@tonic-gate{ 11277c478bdstevel@tonic-gate return (NULL); 11287c478bdstevel@tonic-gate} 11297c478bdstevel@tonic-gate 11307c478bdstevel@tonic-gate/* ARGSUSED */ 11317c478bdstevel@tonic-gatestatic void 11327c478bdstevel@tonic-gatesvc_rdma_kfreeres(SVCXPRT *clone_xprt) 11337c478bdstevel@tonic-gate{ 11347c478bdstevel@tonic-gate} 11357c478bdstevel@tonic-gate 11367c478bdstevel@tonic-gate/* 11377c478bdstevel@tonic-gate * the dup cacheing routines below provide a cache of non-failure 11387c478bdstevel@tonic-gate * transaction id's. rpc service routines can use this to detect 11397c478bdstevel@tonic-gate * retransmissions and re-send a non-failure response. 11407c478bdstevel@tonic-gate */ 11417c478bdstevel@tonic-gate 11427c478bdstevel@tonic-gate/* 11437c478bdstevel@tonic-gate * MAXDUPREQS is the number of cached items. It should be adjusted 11447c478bdstevel@tonic-gate * to the service load so that there is likely to be a response entry 11457c478bdstevel@tonic-gate * when the first retransmission comes in. 11467c478bdstevel@tonic-gate */ 1147201f5ebSebastien Roy#define MAXDUPREQS 8192 11487c478bdstevel@tonic-gate 11497c478bdstevel@tonic-gate/* 1150b7f0713Marcel Telka * This should be appropriately scaled to MAXDUPREQS. To produce as less as 1151b7f0713Marcel Telka * possible collisions it is suggested to set this to a prime. 11527c478bdstevel@tonic-gate */ 1153201f5ebSebastien Roy#define DRHASHSZ 2053 11547c478bdstevel@tonic-gate 11557c478bdstevel@tonic-gate#define XIDHASH(xid) ((xid) % DRHASHSZ) 11567c478bdstevel@tonic-gate#define DRHASH(dr) XIDHASH((dr)->dr_xid) 11577c478bdstevel@tonic-gate#define REQTOXID(req) ((req)->rq_xprt->xp_xid) 11587c478bdstevel@tonic-gate 11597c478bdstevel@tonic-gatestatic int rdmandupreqs = 0; 116059418bdgtint rdmamaxdupreqs = MAXDUPREQS; 11617c478bdstevel@tonic-gatestatic kmutex_t rdmadupreq_lock; 11627c478bdstevel@tonic-gatestatic struct dupreq *rdmadrhashtbl[DRHASHSZ]; 11637c478bdstevel@tonic-gatestatic int rdmadrhashstat[DRHASHSZ]; 11647c478bdstevel@tonic-gate 11657c478bdstevel@tonic-gatestatic void unhash(struct dupreq *); 11667c478bdstevel@tonic-gate 11677c478bdstevel@tonic-gate/* 11687c478bdstevel@tonic-gate * rdmadrmru points to the head of a circular linked list in lru order. 11697c478bdstevel@tonic-gate * rdmadrmru->dr_next == drlru 11707c478bdstevel@tonic-gate */ 11717c478bdstevel@tonic-gatestruct dupreq *rdmadrmru; 11727c478bdstevel@tonic-gate 11737c478bdstevel@tonic-gate/* 11747c478bdstevel@tonic-gate * svc_rdma_kdup searches the request cache and returns 0 if the 11757c478bdstevel@tonic-gate * request is not found in the cache. If it is found, then it 11767c478bdstevel@tonic-gate * returns the state of the request (in progress or done) and 11777c478bdstevel@tonic-gate * the status or attributes that were part of the original reply. 11787c478bdstevel@tonic-gate */ 11797c478bdstevel@tonic-gatestatic int 11807c478bdstevel@tonic-gatesvc_rdma_kdup(struct svc_req *req, caddr_t res, int size, struct dupreq **drpp, 1181201f5ebSebastien Roy bool_t *dupcachedp) 11827c478bdstevel@tonic-gate{ 11837c478bdstevel@tonic-gate struct dupreq *dr; 11847c478bdstevel@tonic-gate uint32_t xid; 11857c478bdstevel@tonic-gate uint32_t drhash; 11867c478bdstevel@tonic-gate int status; 11877c478bdstevel@tonic-gate 11887c478bdstevel@tonic-gate xid = REQTOXID(req); 11897c478bdstevel@tonic-gate mutex_enter(&rdmadupreq_lock); 11907c478bdstevel@tonic-gate RSSTAT_INCR(rsdupchecks); 11917c478bdstevel@tonic-gate /* 11927c478bdstevel@tonic-gate * Check to see whether an entry already exists in the cache. 11937c478bdstevel@tonic-gate */ 11947c478bdstevel@tonic-gate dr = rdmadrhashtbl[XIDHASH(xid)]; 11957c478bdstevel@tonic-gate while (dr != NULL) { 11967c478bdstevel@tonic-gate if (dr->dr_xid == xid && 11977c478bdstevel@tonic-gate dr->dr_proc == req->rq_proc && 11987c478bdstevel@tonic-gate dr->dr_prog == req->rq_prog && 11997c478bdstevel@tonic-gate dr->dr_vers == req->rq_vers && 12007c478bdstevel@tonic-gate dr->dr_addr.len == req->rq_xprt->xp_rtaddr.len && 12017c478bdstevel@tonic-gate bcmp((caddr_t)dr->dr_addr.buf, 12027c478bdstevel@tonic-gate (caddr_t)req->rq_xprt->xp_rtaddr.buf, 12037c478bdstevel@tonic-gate dr->dr_addr.len) == 0) { 12047c478bdstevel@tonic-gate status = dr->dr_status; 12057c478bdstevel@tonic-gate if (status == DUP_DONE) { 12067c478bdstevel@tonic-gate bcopy(dr->dr_resp.buf, res, size); 12077c478bdstevel@tonic-gate if (dupcachedp != NULL) 12087c478bdstevel@tonic-gate *dupcachedp = (dr->dr_resfree != NULL); 12097c478bdstevel@tonic-gate } else { 12107c478bdstevel@tonic-gate dr->dr_status = DUP_INPROGRESS; 12117c478bdstevel@tonic-gate *drpp = dr; 12127c478bdstevel@tonic-gate } 12137c478bdstevel@tonic-gate RSSTAT_INCR(rsdupreqs); 12147c478bdstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12157c478bdstevel@tonic-gate return (status); 12167c478bdstevel@tonic-gate } 12177c478bdstevel@tonic-gate dr = dr->dr_chain; 12187c478bdstevel@tonic-gate } 12197c478bdstevel@tonic-gate 12207c478bdstevel@tonic-gate /* 12217c478bdstevel@tonic-gate * There wasn't an entry, either allocate a new one or recycle 12227c478bdstevel@tonic-gate * an old one. 12237c478bdstevel@tonic-gate */ 12247c478bdstevel@tonic-gate if (rdmandupreqs < rdmamaxdupreqs) { 12257c478bdstevel@tonic-gate dr = kmem_alloc(sizeof (*dr), KM_NOSLEEP); 12267c478bdstevel@tonic-gate if (dr == NULL) { 12277c478bdstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12287c478bdstevel@tonic-gate return (DUP_ERROR); 12297c478bdstevel@tonic-gate } 12307c478bdstevel@tonic-gate dr->dr_resp.buf = NULL; 12317c478bdstevel@tonic-gate dr->dr_resp.maxlen = 0; 12327c478bdstevel@tonic-gate dr->dr_addr.buf = NULL; 12337c478bdstevel@tonic-gate dr->dr_addr.maxlen = 0; 12347c478bdstevel@tonic-gate if (rdmadrmru) { 12357c478bdstevel@tonic-gate dr->dr_next = rdmadrmru->dr_next; 12367c478bdstevel@tonic-gate rdmadrmru->dr_next = dr; 12377c478bdstevel@tonic-gate } else { 12387c478bdstevel@tonic-gate dr->dr_next = dr; 12397c478bdstevel@tonic-gate } 12407c478bdstevel@tonic-gate rdmandupreqs++; 12417c478bdstevel@tonic-gate } else { 12427c478bdstevel@tonic-gate dr = rdmadrmru->dr_next; 12437c478bdstevel@tonic-gate while (dr->dr_status == DUP_INPROGRESS) { 12447c478bdstevel@tonic-gate dr = dr->dr_next; 12457c478bdstevel@tonic-gate if (dr == rdmadrmru->dr_next) { 12467c478bdstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12477c478bdstevel@tonic-gate return (DUP_ERROR); 12487c478bdstevel@tonic-gate } 12497c478bdstevel@tonic-gate } 12507c478bdstevel@tonic-gate unhash(dr); 12517c478bdstevel@tonic-gate if (dr->dr_resfree) { 12527c478bdstevel@tonic-gate (*dr->dr_resfree)(dr->dr_resp.buf); 12537c478bdstevel@tonic-gate } 12547c478bdstevel@tonic-gate } 12557c478bdstevel@tonic-gate dr->dr_resfree = NULL; 12567c478bdstevel@tonic-gate rdmadrmru = dr; 12577c478bdstevel@tonic-gate 12587c478bdstevel@tonic-gate dr->dr_xid = REQTOXID(req); 12597c478bdstevel@tonic-gate dr->dr_prog = req->rq_prog; 12607c478bdstevel@tonic-gate dr->dr_vers = req->rq_vers; 12617c478bdstevel@tonic-gate dr->dr_proc = req->rq_proc; 12627c478bdstevel@tonic-gate if (dr->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { 12637c478bdstevel@tonic-gate if (dr->dr_addr.buf != NULL) 12647c478bdstevel@tonic-gate kmem_free(dr->dr_addr.buf, dr->dr_addr.maxlen); 12657c478bdstevel@tonic-gate dr->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; 12667c478bdstevel@tonic-gate dr->dr_addr.buf = kmem_alloc(dr->dr_addr.maxlen, KM_NOSLEEP); 12677c478bdstevel@tonic-gate if (dr->dr_addr.buf == NULL) { 12687c478bdstevel@tonic-gate dr->dr_addr.maxlen = 0; 12697c478bdstevel@tonic-gate dr->dr_status = DUP_DROP; 12707c478bdstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12717c478bdstevel@tonic-gate return (DUP_ERROR); 12727c478bdstevel@tonic-gate } 12737c478bdstevel@tonic-gate } 12747c478bdstevel@tonic-gate dr->dr_addr.len = req->rq_xprt->xp_rtaddr.len; 12757c478bdstevel@tonic-gate bcopy(req->rq_xprt->xp_rtaddr.buf, dr->dr_addr.buf, dr->dr_addr.len); 12767c478bdstevel@tonic-gate if (dr->dr_resp.maxlen < size) { 12777c478bdstevel@tonic-gate if (dr->dr_resp.buf != NULL) 12787c478bdstevel@tonic-gate kmem_free(dr->dr_resp.buf, dr->dr_resp.maxlen); 12797c478bdstevel@tonic-gate dr->dr_resp.maxlen = (unsigned int)size; 12807c478bdstevel@tonic-gate dr->dr_resp.buf = kmem_alloc(size, KM_NOSLEEP); 12817c478bdstevel@tonic-gate if (dr->dr_resp.buf == NULL) { 12827c478bdstevel@tonic-gate dr->dr_resp.maxlen = 0; 12837c478bdstevel@tonic-gate dr->dr_status = DUP_DROP; 12847c478bdstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12857c478bdstevel@tonic-gate return (DUP_ERROR); 12867c478bdstevel@tonic-gate } 12877c478bdstevel@tonic-gate } 12887c478bdstevel@tonic-gate dr->dr_status = DUP_INPROGRESS; 12897c478bdstevel@tonic-gate 12907c478bdstevel@tonic-gate drhash = (uint32_t)DRHASH(dr); 12917c478bdstevel@tonic-gate dr->dr_chain = rdmadrhashtbl[drhash]; 12927c478bdstevel@tonic-gate rdmadrhashtbl[drhash] = dr; 12937c478bdstevel@tonic-gate rdmadrhashstat[drhash]++; 12947c478bdstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12957c478bdstevel@tonic-gate *drpp = dr; 12967c478bdstevel@tonic-gate return (DUP_NEW); 12977c478bdstevel@tonic-gate} 12987c478bdstevel@tonic-gate 12997c478bdstevel@tonic-gate/* 13007c478bdstevel@tonic-gate * svc_rdma_kdupdone marks the request done (DUP_DONE or DUP_DROP) 13017c478bdstevel@tonic-gate * and stores the response. 13027c478bdstevel@tonic-gate */ 13037c478bdstevel@tonic-gatestatic void 13047c478bdstevel@tonic-gatesvc_rdma_kdupdone(struct dupreq *dr, caddr_t res, void (*dis_resfree)(), 1305