1b86efd96Sagiri /* 2b86efd96Sagiri * CDDL HEADER START 3b86efd96Sagiri * 4b86efd96Sagiri * The contents of this file are subject to the terms of the 5b86efd96Sagiri * Common Development and Distribution License (the "License"). 6b86efd96Sagiri * You may not use this file except in compliance with the License. 7b86efd96Sagiri * 8b86efd96Sagiri * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9b86efd96Sagiri * or http://www.opensolaris.org/os/licensing. 10b86efd96Sagiri * See the License for the specific language governing permissions 11b86efd96Sagiri * and limitations under the License. 12b86efd96Sagiri * 13b86efd96Sagiri * When distributing Covered Code, include this CDDL HEADER in each 14b86efd96Sagiri * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15b86efd96Sagiri * If applicable, add the following below this CDDL HEADER, with the 16b86efd96Sagiri * fields enclosed by brackets "[]" replaced with your own identifying 17b86efd96Sagiri * information: Portions Copyright [yyyy] [name of copyright owner] 18b86efd96Sagiri * 19b86efd96Sagiri * CDDL HEADER END 20b86efd96Sagiri */ 21b86efd96Sagiri /* 22*03494a98SBill Taylor * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23b86efd96Sagiri * Use is subject to license terms. 24b86efd96Sagiri */ 25b86efd96Sagiri /* 26b86efd96Sagiri * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27b86efd96Sagiri * 28b86efd96Sagiri * This software is available to you under a choice of one of two 29b86efd96Sagiri * licenses. You may choose to be licensed under the terms of the GNU 30b86efd96Sagiri * General Public License (GPL) Version 2, available from the file 31b86efd96Sagiri * COPYING in the main directory of this source tree, or the 32b86efd96Sagiri * OpenIB.org BSD license below: 33b86efd96Sagiri * 34b86efd96Sagiri * Redistribution and use in source and binary forms, with or 35b86efd96Sagiri * without modification, are permitted provided that the following 36b86efd96Sagiri * conditions are met: 37b86efd96Sagiri * 38b86efd96Sagiri * - Redistributions of source code must retain the above 39b86efd96Sagiri * copyright notice, this list of conditions and the following 40b86efd96Sagiri * disclaimer. 41b86efd96Sagiri * 42b86efd96Sagiri * - Redistributions in binary form must reproduce the above 43b86efd96Sagiri * copyright notice, this list of conditions and the following 44b86efd96Sagiri * disclaimer in the documentation and/or other materials 45b86efd96Sagiri * provided with the distribution. 46b86efd96Sagiri * 47b86efd96Sagiri * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48b86efd96Sagiri * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49b86efd96Sagiri * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50b86efd96Sagiri * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51b86efd96Sagiri * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52b86efd96Sagiri * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53b86efd96Sagiri * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54b86efd96Sagiri * SOFTWARE. 55b86efd96Sagiri * 56b86efd96Sagiri */ 57b86efd96Sagiri /* 58b86efd96Sagiri * Sun elects to include this software in Sun product 59b86efd96Sagiri * under the OpenIB BSD license. 60b86efd96Sagiri * 61b86efd96Sagiri * 62b86efd96Sagiri * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63b86efd96Sagiri * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64b86efd96Sagiri * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65b86efd96Sagiri * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66b86efd96Sagiri * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67b86efd96Sagiri * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68b86efd96Sagiri * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69b86efd96Sagiri * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70b86efd96Sagiri * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71b86efd96Sagiri * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72b86efd96Sagiri * POSSIBILITY OF SUCH DAMAGE. 73b86efd96Sagiri */ 74b86efd96Sagiri 75b86efd96Sagiri #include <sys/types.h> 76b86efd96Sagiri #include <sys/ddi.h> 77b86efd96Sagiri #include <sys/sunddi.h> 78b86efd96Sagiri #include <sys/ib/clients/rds/rdsib_cm.h> 79b86efd96Sagiri #include <sys/ib/clients/rds/rdsib_ib.h> 80b86efd96Sagiri #include <sys/ib/clients/rds/rdsib_buf.h> 81b86efd96Sagiri #include <sys/ib/clients/rds/rdsib_ep.h> 82b86efd96Sagiri #include <sys/ib/clients/rds/rds_kstat.h> 83b86efd96Sagiri 84b86efd96Sagiri static void rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, 85b86efd96Sagiri ibt_async_code_t code, ibt_async_event_t *event); 86b86efd96Sagiri 87b86efd96Sagiri static struct ibt_clnt_modinfo_s rds_ib_modinfo = { 88*03494a98SBill Taylor IBTI_V_CURR, 89b86efd96Sagiri IBT_NETWORK, 90b86efd96Sagiri rds_async_handler, 91b86efd96Sagiri NULL, 92b86efd96Sagiri "RDS" 93b86efd96Sagiri }; 94b86efd96Sagiri 95b86efd96Sagiri /* performance tunables */ 96b86efd96Sagiri uint_t rds_no_interrupts = 0; 97b86efd96Sagiri uint_t rds_poll_percent_full = 25; 98b86efd96Sagiri uint_t rds_wc_signal = IBT_NEXT_SOLICITED; 99b86efd96Sagiri uint_t rds_waittime_ms = 100; /* ms */ 100b86efd96Sagiri 101b86efd96Sagiri extern dev_info_t *rdsib_dev_info; 102b86efd96Sagiri extern void rds_close_sessions(); 103b86efd96Sagiri 104b86efd96Sagiri static void 105b86efd96Sagiri rdsib_validate_chan_sizes(ibt_hca_attr_t *hattrp) 106b86efd96Sagiri { 107b86efd96Sagiri /* The SQ size should not be more than that supported by the HCA */ 108b86efd96Sagiri if (((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_chan_sz) || 109b86efd96Sagiri ((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_cq_sz)) { 11074242422Sagiri RDS_DPRINTF2("RDSIB", "MaxDataSendBuffers + %d is greater " 111b86efd96Sagiri "than that supported by the HCA driver " 112b86efd96Sagiri "(%d + %d > %d or %d), lowering it to a supported value.", 113b86efd96Sagiri RDS_NUM_ACKS, MaxDataSendBuffers, RDS_NUM_ACKS, 114b86efd96Sagiri hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 115b86efd96Sagiri 116b86efd96Sagiri MaxDataSendBuffers = (hattrp->hca_max_chan_sz > 117b86efd96Sagiri hattrp->hca_max_cq_sz) ? 118b86efd96Sagiri hattrp->hca_max_cq_sz - RDS_NUM_ACKS : 119b86efd96Sagiri hattrp->hca_max_chan_sz - RDS_NUM_ACKS; 120b86efd96Sagiri } 121b86efd96Sagiri 122b86efd96Sagiri /* The RQ size should not be more than that supported by the HCA */ 123b86efd96Sagiri if ((MaxDataRecvBuffers > hattrp->hca_max_chan_sz) || 124b86efd96Sagiri (MaxDataRecvBuffers > hattrp->hca_max_cq_sz)) { 12574242422Sagiri RDS_DPRINTF2("RDSIB", "MaxDataRecvBuffers is greater than that " 126b86efd96Sagiri "supported by the HCA driver (%d > %d or %d), lowering it " 127b86efd96Sagiri "to a supported value.", MaxDataRecvBuffers, 128b86efd96Sagiri hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 129b86efd96Sagiri 130b86efd96Sagiri MaxDataRecvBuffers = (hattrp->hca_max_chan_sz > 131b86efd96Sagiri hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 132b86efd96Sagiri hattrp->hca_max_chan_sz; 133b86efd96Sagiri } 134b86efd96Sagiri 135b86efd96Sagiri /* The SQ size should not be more than that supported by the HCA */ 136b86efd96Sagiri if ((MaxCtrlSendBuffers > hattrp->hca_max_chan_sz) || 137b86efd96Sagiri (MaxCtrlSendBuffers > hattrp->hca_max_cq_sz)) { 13874242422Sagiri RDS_DPRINTF2("RDSIB", "MaxCtrlSendBuffers is greater than that " 139b86efd96Sagiri "supported by the HCA driver (%d > %d or %d), lowering it " 140b86efd96Sagiri "to a supported value.", MaxCtrlSendBuffers, 141b86efd96Sagiri hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 142b86efd96Sagiri 143b86efd96Sagiri MaxCtrlSendBuffers = (hattrp->hca_max_chan_sz > 144b86efd96Sagiri hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 145b86efd96Sagiri hattrp->hca_max_chan_sz; 146b86efd96Sagiri } 147b86efd96Sagiri 148b86efd96Sagiri /* The RQ size should not be more than that supported by the HCA */ 149b86efd96Sagiri if ((MaxCtrlRecvBuffers > hattrp->hca_max_chan_sz) || 150b86efd96Sagiri (MaxCtrlRecvBuffers > hattrp->hca_max_cq_sz)) { 15174242422Sagiri RDS_DPRINTF2("RDSIB", "MaxCtrlRecvBuffers is greater than that " 152b86efd96Sagiri "supported by the HCA driver (%d > %d or %d), lowering it " 153b86efd96Sagiri "to a supported value.", MaxCtrlRecvBuffers, 154b86efd96Sagiri hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 155b86efd96Sagiri 156b86efd96Sagiri MaxCtrlRecvBuffers = (hattrp->hca_max_chan_sz > 157b86efd96Sagiri hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 158b86efd96Sagiri hattrp->hca_max_chan_sz; 159b86efd96Sagiri } 160b86efd96Sagiri 161b86efd96Sagiri /* The MaxRecvMemory should be less than that supported by the HCA */ 1625763ba1eSagiri if ((NDataRX * RdsPktSize) > hattrp->hca_max_memr_len) { 16374242422Sagiri RDS_DPRINTF2("RDSIB", "MaxRecvMemory is greater than that " 164b86efd96Sagiri "supported by the HCA driver (%d > %d), lowering it to %d", 1655763ba1eSagiri NDataRX * RdsPktSize, hattrp->hca_max_memr_len, 166b86efd96Sagiri hattrp->hca_max_memr_len); 167b86efd96Sagiri 1685763ba1eSagiri NDataRX = hattrp->hca_max_memr_len/RdsPktSize; 169b86efd96Sagiri } 170b86efd96Sagiri } 171b86efd96Sagiri 17200a3eaf3SRamaswamy Tummala /* Return hcap, given the hca guid */ 17300a3eaf3SRamaswamy Tummala rds_hca_t * 17400a3eaf3SRamaswamy Tummala rds_lkup_hca(ib_guid_t hca_guid) 17500a3eaf3SRamaswamy Tummala { 17600a3eaf3SRamaswamy Tummala rds_hca_t *hcap; 17700a3eaf3SRamaswamy Tummala 17800a3eaf3SRamaswamy Tummala RDS_DPRINTF4("rds_lkup_hca", "Enter: statep: 0x%p " 17900a3eaf3SRamaswamy Tummala "guid: %llx", rdsib_statep, hca_guid); 18000a3eaf3SRamaswamy Tummala 18100a3eaf3SRamaswamy Tummala rw_enter(&rdsib_statep->rds_hca_lock, RW_READER); 18200a3eaf3SRamaswamy Tummala 18300a3eaf3SRamaswamy Tummala hcap = rdsib_statep->rds_hcalistp; 18400a3eaf3SRamaswamy Tummala while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) { 18500a3eaf3SRamaswamy Tummala hcap = hcap->hca_nextp; 18600a3eaf3SRamaswamy Tummala } 18700a3eaf3SRamaswamy Tummala 18800a3eaf3SRamaswamy Tummala rw_exit(&rdsib_statep->rds_hca_lock); 18900a3eaf3SRamaswamy Tummala 19000a3eaf3SRamaswamy Tummala RDS_DPRINTF4("rds_lkup_hca", "return"); 19100a3eaf3SRamaswamy Tummala 19200a3eaf3SRamaswamy Tummala return (hcap); 19300a3eaf3SRamaswamy Tummala } 19400a3eaf3SRamaswamy Tummala 19500a3eaf3SRamaswamy Tummala 19600a3eaf3SRamaswamy Tummala static rds_hca_t * 19700a3eaf3SRamaswamy Tummala rdsib_init_hca(ib_guid_t hca_guid) 19800a3eaf3SRamaswamy Tummala { 19900a3eaf3SRamaswamy Tummala rds_hca_t *hcap; 20000a3eaf3SRamaswamy Tummala boolean_t alloc = B_FALSE; 20100a3eaf3SRamaswamy Tummala int ret; 20200a3eaf3SRamaswamy Tummala 20300a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_init_hca", "enter: HCA 0x%llx", hca_guid); 20400a3eaf3SRamaswamy Tummala 20500a3eaf3SRamaswamy Tummala /* Do a HCA lookup */ 20600a3eaf3SRamaswamy Tummala hcap = rds_lkup_hca(hca_guid); 20700a3eaf3SRamaswamy Tummala 20800a3eaf3SRamaswamy Tummala if (hcap != NULL && hcap->hca_hdl != NULL) { 20900a3eaf3SRamaswamy Tummala /* 21000a3eaf3SRamaswamy Tummala * This can happen if we get IBT_HCA_ATTACH_EVENT on an HCA 21100a3eaf3SRamaswamy Tummala * that we have already opened. Just return NULL so that 21200a3eaf3SRamaswamy Tummala * we'll not end up reinitializing the HCA again. 21300a3eaf3SRamaswamy Tummala */ 21400a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_init_hca", "HCA already initialized"); 21500a3eaf3SRamaswamy Tummala return (NULL); 21600a3eaf3SRamaswamy Tummala } 21700a3eaf3SRamaswamy Tummala 21800a3eaf3SRamaswamy Tummala if (hcap == NULL) { 21900a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_init_hca", "New HCA is added"); 22000a3eaf3SRamaswamy Tummala hcap = (rds_hca_t *)kmem_zalloc(sizeof (rds_hca_t), KM_SLEEP); 22100a3eaf3SRamaswamy Tummala alloc = B_TRUE; 22200a3eaf3SRamaswamy Tummala } 22300a3eaf3SRamaswamy Tummala 22400a3eaf3SRamaswamy Tummala hcap->hca_guid = hca_guid; 22500a3eaf3SRamaswamy Tummala ret = ibt_open_hca(rdsib_statep->rds_ibhdl, hca_guid, 22600a3eaf3SRamaswamy Tummala &hcap->hca_hdl); 22700a3eaf3SRamaswamy Tummala if (ret != IBT_SUCCESS) { 22800a3eaf3SRamaswamy Tummala if (ret == IBT_HCA_IN_USE) { 22900a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_init_hca", 23000a3eaf3SRamaswamy Tummala "ibt_open_hca: 0x%llx returned IBT_HCA_IN_USE", 23100a3eaf3SRamaswamy Tummala hca_guid); 23200a3eaf3SRamaswamy Tummala } else { 23300a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_init_hca", 23400a3eaf3SRamaswamy Tummala "ibt_open_hca: 0x%llx failed: %d", hca_guid, ret); 23500a3eaf3SRamaswamy Tummala } 23600a3eaf3SRamaswamy Tummala if (alloc == B_TRUE) { 23700a3eaf3SRamaswamy Tummala kmem_free(hcap, sizeof (rds_hca_t)); 23800a3eaf3SRamaswamy Tummala } 23900a3eaf3SRamaswamy Tummala return (NULL); 24000a3eaf3SRamaswamy Tummala } 24100a3eaf3SRamaswamy Tummala 24200a3eaf3SRamaswamy Tummala ret = ibt_query_hca(hcap->hca_hdl, &hcap->hca_attr); 24300a3eaf3SRamaswamy Tummala if (ret != IBT_SUCCESS) { 24400a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_init_hca", 24500a3eaf3SRamaswamy Tummala "Query HCA: 0x%llx failed: %d", hca_guid, ret); 24600a3eaf3SRamaswamy Tummala ret = ibt_close_hca(hcap->hca_hdl); 24700a3eaf3SRamaswamy Tummala ASSERT(ret == IBT_SUCCESS); 24800a3eaf3SRamaswamy Tummala if (alloc == B_TRUE) { 24900a3eaf3SRamaswamy Tummala kmem_free(hcap, sizeof (rds_hca_t)); 25000a3eaf3SRamaswamy Tummala } else { 25100a3eaf3SRamaswamy Tummala hcap->hca_hdl = NULL; 25200a3eaf3SRamaswamy Tummala } 25300a3eaf3SRamaswamy Tummala return (NULL); 25400a3eaf3SRamaswamy Tummala } 25500a3eaf3SRamaswamy Tummala 25600a3eaf3SRamaswamy Tummala ret = ibt_query_hca_ports(hcap->hca_hdl, 0, 25700a3eaf3SRamaswamy Tummala &hcap->hca_pinfop, &hcap->hca_nports, &hcap->hca_pinfo_sz); 25800a3eaf3SRamaswamy Tummala if (ret != IBT_SUCCESS) { 25900a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_init_hca", 26000a3eaf3SRamaswamy Tummala "Query HCA 0x%llx ports failed: %d", hca_guid, 26100a3eaf3SRamaswamy Tummala ret); 26200a3eaf3SRamaswamy Tummala ret = ibt_close_hca(hcap->hca_hdl); 26300a3eaf3SRamaswamy Tummala hcap->hca_hdl = NULL; 26400a3eaf3SRamaswamy Tummala ASSERT(ret == IBT_SUCCESS); 26500a3eaf3SRamaswamy Tummala if (alloc == B_TRUE) { 26600a3eaf3SRamaswamy Tummala kmem_free(hcap, sizeof (rds_hca_t)); 26700a3eaf3SRamaswamy Tummala } else { 26800a3eaf3SRamaswamy Tummala hcap->hca_hdl = NULL; 26900a3eaf3SRamaswamy Tummala } 27000a3eaf3SRamaswamy Tummala return (NULL); 27100a3eaf3SRamaswamy Tummala } 27200a3eaf3SRamaswamy Tummala 27300a3eaf3SRamaswamy Tummala /* Only one PD per HCA is allocated, so do it here */ 27400a3eaf3SRamaswamy Tummala ret = ibt_alloc_pd(hcap->hca_hdl, IBT_PD_NO_FLAGS, 27500a3eaf3SRamaswamy Tummala &hcap->hca_pdhdl); 27600a3eaf3SRamaswamy Tummala if (ret != IBT_SUCCESS) { 27700a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_init_hca", 27800a3eaf3SRamaswamy Tummala "ibt_alloc_pd 0x%llx failed: %d", hca_guid, ret); 27900a3eaf3SRamaswamy Tummala (void) ibt_free_portinfo(hcap->hca_pinfop, 28000a3eaf3SRamaswamy Tummala hcap->hca_pinfo_sz); 28100a3eaf3SRamaswamy Tummala ret = ibt_close_hca(hcap->hca_hdl); 28200a3eaf3SRamaswamy Tummala ASSERT(ret == IBT_SUCCESS); 28300a3eaf3SRamaswamy Tummala hcap->hca_hdl = NULL; 28400a3eaf3SRamaswamy Tummala if (alloc == B_TRUE) { 28500a3eaf3SRamaswamy Tummala kmem_free(hcap, sizeof (rds_hca_t)); 28600a3eaf3SRamaswamy Tummala } else { 28700a3eaf3SRamaswamy Tummala hcap->hca_hdl = NULL; 28800a3eaf3SRamaswamy Tummala } 28900a3eaf3SRamaswamy Tummala return (NULL); 29000a3eaf3SRamaswamy Tummala } 29100a3eaf3SRamaswamy Tummala 29200a3eaf3SRamaswamy Tummala rdsib_validate_chan_sizes(&hcap->hca_attr); 29300a3eaf3SRamaswamy Tummala 29400a3eaf3SRamaswamy Tummala rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); 29500a3eaf3SRamaswamy Tummala hcap->hca_state = RDS_HCA_STATE_OPEN; 29600a3eaf3SRamaswamy Tummala if (alloc == B_TRUE) { 29700a3eaf3SRamaswamy Tummala /* this is a new HCA, add it to the list */ 29800a3eaf3SRamaswamy Tummala rdsib_statep->rds_nhcas++; 29900a3eaf3SRamaswamy Tummala hcap->hca_nextp = rdsib_statep->rds_hcalistp; 30000a3eaf3SRamaswamy Tummala rdsib_statep->rds_hcalistp = hcap; 30100a3eaf3SRamaswamy Tummala } 30200a3eaf3SRamaswamy Tummala rw_exit(&rdsib_statep->rds_hca_lock); 30300a3eaf3SRamaswamy Tummala 30400a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_init_hca", "return: HCA 0x%llx", hca_guid); 30500a3eaf3SRamaswamy Tummala 30600a3eaf3SRamaswamy Tummala return (hcap); 30700a3eaf3SRamaswamy Tummala } 30800a3eaf3SRamaswamy Tummala 309b86efd96Sagiri /* 310d99cb22fSagiri * Called from attach 311b86efd96Sagiri */ 312b86efd96Sagiri int 313d99cb22fSagiri rdsib_initialize_ib() 314b86efd96Sagiri { 315b86efd96Sagiri ib_guid_t *guidp; 31600a3eaf3SRamaswamy Tummala rds_hca_t *hcap; 317b86efd96Sagiri uint_t ix, hcaix, nhcas; 318b86efd96Sagiri int ret; 319b86efd96Sagiri 320d99cb22fSagiri RDS_DPRINTF2("rdsib_initialize_ib", "enter: statep %p", rdsib_statep); 321b86efd96Sagiri 322b86efd96Sagiri ASSERT(rdsib_statep != NULL); 323b86efd96Sagiri if (rdsib_statep == NULL) { 324d99cb22fSagiri RDS_DPRINTF1("rdsib_initialize_ib", 325d99cb22fSagiri "RDS Statep not initialized"); 326b86efd96Sagiri return (-1); 327b86efd96Sagiri } 328b86efd96Sagiri 329b86efd96Sagiri /* How many hcas are there? */ 330b86efd96Sagiri nhcas = ibt_get_hca_list(&guidp); 331b86efd96Sagiri if (nhcas == 0) { 332d99cb22fSagiri RDS_DPRINTF2("rdsib_initialize_ib", "No IB HCAs Available"); 333b86efd96Sagiri return (-1); 334b86efd96Sagiri } 335b86efd96Sagiri 336d99cb22fSagiri RDS_DPRINTF3("rdsib_initialize_ib", "Number of HCAs: %d", nhcas); 337b86efd96Sagiri 338b86efd96Sagiri /* Register with IBTF */ 339b86efd96Sagiri ret = ibt_attach(&rds_ib_modinfo, rdsib_dev_info, rdsib_statep, 340b86efd96Sagiri &rdsib_statep->rds_ibhdl); 341b86efd96Sagiri if (ret != IBT_SUCCESS) { 342d99cb22fSagiri RDS_DPRINTF2("rdsib_initialize_ib", "ibt_attach failed: %d", 343d99cb22fSagiri ret); 344b86efd96Sagiri (void) ibt_free_hca_list(guidp, nhcas); 345b86efd96Sagiri return (-1); 346b86efd96Sagiri } 347b86efd96Sagiri 348b86efd96Sagiri /* 349b86efd96Sagiri * Open each HCA and gather its information. Don't care about HCAs 350b86efd96Sagiri * that cannot be opened. It is OK as long as atleast one HCA can be 351b86efd96Sagiri * opened. 352b86efd96Sagiri * Initialize a HCA only if all the information is available. 353b86efd96Sagiri */ 354b86efd96Sagiri for (ix = 0, hcaix = 0; ix < nhcas; ix++) { 355b86efd96Sagiri RDS_DPRINTF3(LABEL, "Open HCA: 0x%llx", guidp[ix]); 356b86efd96Sagiri 35700a3eaf3SRamaswamy Tummala hcap = rdsib_init_hca(guidp[ix]); 35800a3eaf3SRamaswamy Tummala if (hcap != NULL) hcaix++; 359b86efd96Sagiri } 360b86efd96Sagiri 361b86efd96Sagiri /* free the HCA list, we are done with it */ 362b86efd96Sagiri (void) ibt_free_hca_list(guidp, nhcas); 363b86efd96Sagiri 364b86efd96Sagiri if (hcaix == 0) { 365b86efd96Sagiri /* Failed to Initialize even one HCA */ 366d99cb22fSagiri RDS_DPRINTF2("rdsib_initialize_ib", "No HCAs are initialized"); 367b86efd96Sagiri (void) ibt_detach(rdsib_statep->rds_ibhdl); 368b86efd96Sagiri rdsib_statep->rds_ibhdl = NULL; 369b86efd96Sagiri return (-1); 370b86efd96Sagiri } 371b86efd96Sagiri 372b86efd96Sagiri if (hcaix < nhcas) { 373b86efd96Sagiri RDS_DPRINTF2("rdsib_open_ib", "HCAs %d/%d failed to initialize", 374b86efd96Sagiri (nhcas - hcaix), nhcas); 375b86efd96Sagiri } 376b86efd96Sagiri 377d99cb22fSagiri RDS_DPRINTF2("rdsib_initialize_ib", "return: statep %p", rdsib_statep); 378b86efd96Sagiri 379b86efd96Sagiri return (0); 380b86efd96Sagiri } 381b86efd96Sagiri 382b86efd96Sagiri /* 383d99cb22fSagiri * Called from detach 384b86efd96Sagiri */ 385b86efd96Sagiri void 386d99cb22fSagiri rdsib_deinitialize_ib() 387b86efd96Sagiri { 388c1f8b08eSagiri rds_hca_t *hcap, *nextp; 389b86efd96Sagiri int ret; 390b86efd96Sagiri 391d99cb22fSagiri RDS_DPRINTF2("rdsib_deinitialize_ib", "enter: statep %p", rdsib_statep); 392b86efd96Sagiri 393b86efd96Sagiri /* close and destroy all the sessions */ 394b86efd96Sagiri rds_close_sessions(NULL); 395b86efd96Sagiri 396c1f8b08eSagiri /* Release all HCA resources */ 397c1f8b08eSagiri rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); 39800a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_deinitialize_ib", "HCA List: %p, NHCA: %d", 39900a3eaf3SRamaswamy Tummala rdsib_statep->rds_hcalistp, rdsib_statep->rds_nhcas); 400b86efd96Sagiri hcap = rdsib_statep->rds_hcalistp; 401c1f8b08eSagiri rdsib_statep->rds_hcalistp = NULL; 402c1f8b08eSagiri rdsib_statep->rds_nhcas = 0; 403c1f8b08eSagiri rw_exit(&rdsib_statep->rds_hca_lock); 404c1f8b08eSagiri 405b86efd96Sagiri while (hcap != NULL) { 406c1f8b08eSagiri nextp = hcap->hca_nextp; 407b86efd96Sagiri 40800a3eaf3SRamaswamy Tummala if (hcap->hca_hdl != NULL) { 40900a3eaf3SRamaswamy Tummala ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl); 41000a3eaf3SRamaswamy Tummala ASSERT(ret == IBT_SUCCESS); 411b86efd96Sagiri 41200a3eaf3SRamaswamy Tummala (void) ibt_free_portinfo(hcap->hca_pinfop, 41300a3eaf3SRamaswamy Tummala hcap->hca_pinfo_sz); 414b86efd96Sagiri 41500a3eaf3SRamaswamy Tummala ret = ibt_close_hca(hcap->hca_hdl); 41600a3eaf3SRamaswamy Tummala ASSERT(ret == IBT_SUCCESS); 41700a3eaf3SRamaswamy Tummala } 418b86efd96Sagiri 419b86efd96Sagiri kmem_free(hcap, sizeof (rds_hca_t)); 420c1f8b08eSagiri hcap = nextp; 421b86efd96Sagiri } 422b86efd96Sagiri 423b86efd96Sagiri /* Deregister with IBTF */ 424b86efd96Sagiri if (rdsib_statep->rds_ibhdl != NULL) { 425b86efd96Sagiri (void) ibt_detach(rdsib_statep->rds_ibhdl); 426b86efd96Sagiri rdsib_statep->rds_ibhdl = NULL; 427b86efd96Sagiri } 428b86efd96Sagiri 429d99cb22fSagiri RDS_DPRINTF2("rdsib_deinitialize_ib", "return: statep %p", 430d99cb22fSagiri rdsib_statep); 431d99cb22fSagiri } 432d99cb22fSagiri 433d99cb22fSagiri /* 434d99cb22fSagiri * Called on open of first RDS socket 435d99cb22fSagiri */ 436d99cb22fSagiri int 437d99cb22fSagiri rdsib_open_ib() 438d99cb22fSagiri { 439d99cb22fSagiri int ret; 440d99cb22fSagiri 441d99cb22fSagiri RDS_DPRINTF2("rdsib_open_ib", "enter: statep %p", rdsib_statep); 442d99cb22fSagiri 443d99cb22fSagiri /* Enable incoming connection requests */ 444d99cb22fSagiri if (rdsib_statep->rds_srvhdl == NULL) { 445d99cb22fSagiri rdsib_statep->rds_srvhdl = 446d99cb22fSagiri rds_register_service(rdsib_statep->rds_ibhdl); 447d99cb22fSagiri if (rdsib_statep->rds_srvhdl == NULL) { 448d99cb22fSagiri RDS_DPRINTF2("rdsib_open_ib", 449d99cb22fSagiri "Service registration failed"); 450d99cb22fSagiri return (-1); 451d99cb22fSagiri } else { 452d99cb22fSagiri /* bind the service on all available ports */ 453d99cb22fSagiri ret = rds_bind_service(rdsib_statep); 454d99cb22fSagiri if (ret != 0) { 455d99cb22fSagiri RDS_DPRINTF2("rdsib_open_ib", 456d99cb22fSagiri "Bind service failed: %d", ret); 457d99cb22fSagiri } 458d99cb22fSagiri } 459d99cb22fSagiri } 460d99cb22fSagiri 461d99cb22fSagiri RDS_DPRINTF2("rdsib_open_ib", "return: statep %p", rdsib_statep); 462d99cb22fSagiri 463d99cb22fSagiri return (0); 464d99cb22fSagiri } 465d99cb22fSagiri 466d99cb22fSagiri /* 467d99cb22fSagiri * Called when all ports are closed. 468d99cb22fSagiri */ 469d99cb22fSagiri void 470d99cb22fSagiri rdsib_close_ib() 471d99cb22fSagiri { 472d99cb22fSagiri int ret; 473d99cb22fSagiri 474d99cb22fSagiri RDS_DPRINTF2("rdsib_close_ib", "enter: statep %p", rdsib_statep); 475d99cb22fSagiri 476d99cb22fSagiri /* Disable incoming connection requests */ 477d99cb22fSagiri if (rdsib_statep->rds_srvhdl != NULL) { 478d99cb22fSagiri ret = ibt_unbind_all_services(rdsib_statep->rds_srvhdl); 479d99cb22fSagiri if (ret != 0) { 480d99cb22fSagiri RDS_DPRINTF2("rdsib_close_ib", 481d99cb22fSagiri "ibt_unbind_all_services failed: %d\n", ret); 482d99cb22fSagiri } 483d99cb22fSagiri ret = ibt_deregister_service(rdsib_statep->rds_ibhdl, 484d99cb22fSagiri rdsib_statep->rds_srvhdl); 485d99cb22fSagiri if (ret != 0) { 486d99cb22fSagiri RDS_DPRINTF2("rdsib_close_ib", 487d99cb22fSagiri "ibt_deregister_service failed: %d\n", ret); 488d99cb22fSagiri } else { 489d99cb22fSagiri rdsib_statep->rds_srvhdl = NULL; 490d99cb22fSagiri } 491d99cb22fSagiri } 492d99cb22fSagiri 493d99cb22fSagiri RDS_DPRINTF2("rdsib_close_ib", "return: statep %p", rdsib_statep); 494b86efd96Sagiri } 495b86efd96Sagiri 496b86efd96Sagiri /* Return hcap, given the hca guid */ 497b86efd96Sagiri rds_hca_t * 498b86efd96Sagiri rds_get_hcap(rds_state_t *statep, ib_guid_t hca_guid) 499b86efd96Sagiri { 500b86efd96Sagiri rds_hca_t *hcap; 501b86efd96Sagiri 502b86efd96Sagiri RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: Enter: statep: 0x%p " 503b86efd96Sagiri "guid: %llx", statep, hca_guid); 504b86efd96Sagiri 505b86efd96Sagiri rw_enter(&statep->rds_hca_lock, RW_READER); 506b86efd96Sagiri 507b86efd96Sagiri hcap = statep->rds_hcalistp; 508b86efd96Sagiri while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) { 509b86efd96Sagiri hcap = hcap->hca_nextp; 510b86efd96Sagiri } 511b86efd96Sagiri 51200a3eaf3SRamaswamy Tummala /* 51300a3eaf3SRamaswamy Tummala * don't let anyone use this HCA until the RECV memory 51400a3eaf3SRamaswamy Tummala * is registered with this HCA 51500a3eaf3SRamaswamy Tummala */ 51600a3eaf3SRamaswamy Tummala if ((hcap != NULL) && 51700a3eaf3SRamaswamy Tummala (hcap->hca_state == RDS_HCA_STATE_MEM_REGISTERED)) { 51800a3eaf3SRamaswamy Tummala ASSERT(hcap->hca_mrhdl != NULL); 51900a3eaf3SRamaswamy Tummala rw_exit(&statep->rds_hca_lock); 52000a3eaf3SRamaswamy Tummala return (hcap); 52100a3eaf3SRamaswamy Tummala } 52200a3eaf3SRamaswamy Tummala 52300a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rds_get_hcap", 52400a3eaf3SRamaswamy Tummala "HCA (0x%p, 0x%llx) is not initialized", hcap, hca_guid); 525b86efd96Sagiri rw_exit(&statep->rds_hca_lock); 526b86efd96Sagiri 527b86efd96Sagiri RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: return"); 528b86efd96Sagiri 52900a3eaf3SRamaswamy Tummala return (NULL); 530b86efd96Sagiri } 531b86efd96Sagiri 532b86efd96Sagiri /* Return hcap, given a gid */ 533b86efd96Sagiri rds_hca_t * 534b86efd96Sagiri rds_gid_to_hcap(rds_state_t *statep, ib_gid_t gid) 535b86efd96Sagiri { 536c1f8b08eSagiri rds_hca_t *hcap; 537c1f8b08eSagiri uint_t ix; 538b86efd96Sagiri 539b86efd96Sagiri RDS_DPRINTF4("rds_gid_to_hcap", "Enter: statep: 0x%p gid: %llx:%llx", 540b86efd96Sagiri statep, gid.gid_prefix, gid.gid_guid); 541b86efd96Sagiri 542c1f8b08eSagiri rw_enter(&statep->rds_hca_lock, RW_READER); 543c1f8b08eSagiri 544c1f8b08eSagiri hcap = statep->rds_hcalistp; 545c1f8b08eSagiri while (hcap != NULL) { 54600a3eaf3SRamaswamy Tummala 54700a3eaf3SRamaswamy Tummala /* 54800a3eaf3SRamaswamy Tummala * don't let anyone use this HCA until the RECV memory 54900a3eaf3SRamaswamy Tummala * is registered with this HCA 55000a3eaf3SRamaswamy Tummala */ 55100a3eaf3SRamaswamy Tummala if (hcap->hca_state != RDS_HCA_STATE_MEM_REGISTERED) { 55200a3eaf3SRamaswamy Tummala RDS_DPRINTF3("rds_gid_to_hcap", 55300a3eaf3SRamaswamy Tummala "HCA (0x%p, 0x%llx) is not initialized", 55400a3eaf3SRamaswamy Tummala hcap, gid.gid_guid); 55500a3eaf3SRamaswamy Tummala hcap = hcap->hca_nextp; 55600a3eaf3SRamaswamy Tummala continue; 55700a3eaf3SRamaswamy Tummala } 55800a3eaf3SRamaswamy Tummala 559c1f8b08eSagiri for (ix = 0; ix < hcap->hca_nports; ix++) { 560c1f8b08eSagiri if ((hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_prefix == 561c1f8b08eSagiri gid.gid_prefix) && 562c1f8b08eSagiri (hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_guid == 563c1f8b08eSagiri gid.gid_guid)) { 564c1f8b08eSagiri RDS_DPRINTF4("rds_gid_to_hcap", 565c1f8b08eSagiri "gid found in hcap: 0x%p", hcap); 566c1f8b08eSagiri rw_exit(&statep->rds_hca_lock); 567c1f8b08eSagiri return (hcap); 568c1f8b08eSagiri } 569c1f8b08eSagiri } 570c1f8b08eSagiri hcap = hcap->hca_nextp; 571b86efd96Sagiri } 572b86efd96Sagiri 573c1f8b08eSagiri rw_exit(&statep->rds_hca_lock); 574c1f8b08eSagiri 575c1f8b08eSagiri return (NULL); 576b86efd96Sagiri } 577b86efd96Sagiri 578b86efd96Sagiri /* This is called from the send CQ handler */ 579b86efd96Sagiri void 580b86efd96Sagiri rds_send_acknowledgement(rds_ep_t *ep) 581b86efd96Sagiri { 582b86efd96Sagiri int ret; 583b86efd96Sagiri uint_t ix; 584b86efd96Sagiri 585b86efd96Sagiri RDS_DPRINTF4("rds_send_acknowledgement", "Enter EP(%p)", ep); 586b86efd96Sagiri 587b86efd96Sagiri mutex_enter(&ep->ep_lock); 588b86efd96Sagiri 589b86efd96Sagiri ASSERT(ep->ep_rdmacnt != 0); 590b86efd96Sagiri 591b86efd96Sagiri /* 592b86efd96Sagiri * The previous ACK completed successfully, send the next one 593b86efd96Sagiri * if more messages were received after sending the last ACK 594b86efd96Sagiri */ 595b86efd96Sagiri if (ep->ep_rbufid != *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va) { 596b86efd96Sagiri *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va = ep->ep_rbufid; 597b86efd96Sagiri mutex_exit(&ep->ep_lock); 598b86efd96Sagiri 599b86efd96Sagiri /* send acknowledgement */ 600b86efd96Sagiri RDS_INCR_TXACKS(); 601b86efd96Sagiri ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix); 602b86efd96Sagiri if (ret != IBT_SUCCESS) { 60374242422Sagiri RDS_DPRINTF2("rds_send_acknowledgement", 604b86efd96Sagiri "EP(%p): ibt_post_send for acknowledgement " 605b86efd96Sagiri "failed: %d, SQ depth: %d", 606b86efd96Sagiri ep, ret, ep->ep_sndpool.pool_nbusy); 607b86efd96Sagiri mutex_enter(&ep->ep_lock); 608b86efd96Sagiri ep->ep_rdmacnt--; 609b86efd96Sagiri mutex_exit(&ep->ep_lock); 610b86efd96Sagiri } 611b86efd96Sagiri } else { 612b86efd96Sagiri /* ACKed all messages, no more to ACK */ 613b86efd96Sagiri ep->ep_rdmacnt--; 614b86efd96Sagiri mutex_exit(&ep->ep_lock); 615b86efd96Sagiri return; 616b86efd96Sagiri } 617b86efd96Sagiri 618b86efd96Sagiri RDS_DPRINTF4("rds_send_acknowledgement", "Return EP(%p)", ep); 619b86efd96Sagiri } 620b86efd96Sagiri 621b86efd96Sagiri static int 622b86efd96Sagiri rds_poll_ctrl_completions(ibt_cq_hdl_t cq, rds_ep_t *ep) 623b86efd96Sagiri { 624b86efd96Sagiri ibt_wc_t wc; 625b86efd96Sagiri uint_t npolled; 626b86efd96Sagiri rds_buf_t *bp; 627b86efd96Sagiri rds_ctrl_pkt_t *cpkt; 628b86efd96Sagiri rds_qp_t *recvqp; 629b86efd96Sagiri int ret = IBT_SUCCESS; 630b86efd96Sagiri 631b86efd96Sagiri RDS_DPRINTF4("rds_poll_ctrl_completions", "Enter: EP(%p)", ep); 632b86efd96Sagiri 633b86efd96Sagiri bzero(&wc, sizeof (ibt_wc_t)); 634b86efd96Sagiri ret = ibt_poll_cq(cq, &wc, 1, &npolled); 635b86efd96Sagiri if (ret != IBT_SUCCESS) { 636b86efd96Sagiri if (ret != IBT_CQ_EMPTY) { 637b86efd96Sagiri RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 638b86efd96Sagiri "returned: %d", ep, cq, ret); 639b86efd96Sagiri } else { 640b86efd96Sagiri RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 641b86efd96Sagiri "returned: IBT_CQ_EMPTY", ep, cq); 642b86efd96Sagiri } 643b86efd96Sagiri return (ret); 644b86efd96Sagiri } 645b86efd96Sagiri 646b86efd96Sagiri bp = (rds_buf_t *)(uintptr_t)wc.wc_id; 647b86efd96Sagiri 648b86efd96Sagiri if (wc.wc_status != IBT_WC_SUCCESS) { 649b86efd96Sagiri mutex_enter(&ep->ep_recvqp.qp_lock); 650b86efd96Sagiri ep->ep_recvqp.qp_level--; 651b86efd96Sagiri mutex_exit(&ep->ep_recvqp.qp_lock); 652b86efd96Sagiri 653b86efd96Sagiri /* Free the buffer */ 654b86efd96Sagiri bp->buf_state = RDS_RCVBUF_FREE; 655b86efd96Sagiri rds_free_recv_buf(bp, 1); 656b86efd96Sagiri 657b86efd96Sagiri /* Receive completion failure */ 658b86efd96Sagiri if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) { 659b86efd96Sagiri RDS_DPRINTF2("rds_poll_ctrl_completions", 660b86efd96Sagiri "EP(%p) CQ(%p) BP(%p): WC Error Status: %d", 661b86efd96Sagiri ep, cq, wc.wc_id, wc.wc_status); 662b86efd96Sagiri } 663b86efd96Sagiri return (ret); 664b86efd96Sagiri } 665b86efd96Sagiri 666b86efd96Sagiri /* there is one less in the RQ */ 667b86efd96Sagiri recvqp = &ep->ep_recvqp; 668b86efd96Sagiri mutex_enter(&recvqp->qp_lock); 669b86efd96Sagiri recvqp->qp_level--; 670b86efd96Sagiri if ((recvqp->qp_taskqpending == B_FALSE) && 671b86efd96Sagiri (recvqp->qp_level <= recvqp->qp_lwm)) { 672b86efd96Sagiri /* Time to post more buffers into the RQ */ 673b86efd96Sagiri recvqp->qp_taskqpending = B_TRUE; 674b86efd96Sagiri mutex_exit(&recvqp->qp_lock); 675b86efd96Sagiri 676b86efd96Sagiri ret = ddi_taskq_dispatch(rds_taskq, 677b86efd96Sagiri rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 678b86efd96Sagiri if (ret != DDI_SUCCESS) { 67974242422Sagiri RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d", 680b86efd96Sagiri ret); 681b86efd96Sagiri mutex_enter(&recvqp->qp_lock); 682b86efd96Sagiri recvqp->qp_taskqpending = B_FALSE; 683b86efd96Sagiri mutex_exit(&recvqp->qp_lock); 684b86efd96Sagiri } 685b86efd96Sagiri } else { 686b86efd96Sagiri mutex_exit(&recvqp->qp_lock); 687b86efd96Sagiri } 688b86efd96Sagiri 689b86efd96Sagiri cpkt = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va; 690b86efd96Sagiri rds_handle_control_message(ep->ep_sp, cpkt); 691b86efd96Sagiri 692b86efd96Sagiri bp->buf_state = RDS_RCVBUF_FREE; 693b86efd96Sagiri rds_free_recv_buf(bp, 1); 694b86efd96Sagiri 695b86efd96Sagiri RDS_DPRINTF4("rds_poll_ctrl_completions", "Return: EP(%p)", ep); 696b86efd96Sagiri 697b86efd96Sagiri return (ret); 698b86efd96Sagiri } 699b86efd96Sagiri 700b86efd96Sagiri #define RDS_POST_FEW_ATATIME 100 701b86efd96Sagiri /* Post recv WRs into the RQ. Assumes the ep->refcnt is already incremented */ 702b86efd96Sagiri void 703b86efd96Sagiri rds_post_recv_buf(void *arg) 704b86efd96Sagiri { 705b86efd96Sagiri ibt_channel_hdl_t chanhdl; 706b86efd96Sagiri rds_ep_t *ep; 707b86efd96Sagiri rds_session_t *sp; 708b86efd96Sagiri rds_qp_t *recvqp; 709b86efd96Sagiri rds_bufpool_t *gp; 710b86efd96Sagiri rds_buf_t *bp, *bp1; 711b86efd96Sagiri ibt_recv_wr_t *wrp, wr[RDS_POST_FEW_ATATIME]; 712b86efd96Sagiri rds_hca_t *hcap; 713b86efd96Sagiri uint_t npost, nspace, rcv_len; 714b86efd96Sagiri uint_t ix, jx, kx; 715b86efd96Sagiri int ret; 716b86efd96Sagiri 717b86efd96Sagiri chanhdl = (ibt_channel_hdl_t)arg; 718b86efd96Sagiri RDS_DPRINTF4("rds_post_recv_buf", "Enter: CHAN(%p)", chanhdl); 719b86efd96Sagiri RDS_INCR_POST_RCV_BUF_CALLS(); 720b86efd96Sagiri 721b86efd96Sagiri ep = (rds_ep_t *)ibt_get_chan_private(chanhdl); 722b86efd96Sagiri ASSERT(ep != NULL); 723b86efd96Sagiri sp = ep->ep_sp; 724b86efd96Sagiri recvqp = &ep->ep_recvqp; 725b86efd96Sagiri 726b86efd96Sagiri RDS_DPRINTF5("rds_post_recv_buf", "EP(%p)", ep); 727b86efd96Sagiri 728b86efd96Sagiri /* get the hcap for the HCA hosting this channel */ 72900a3eaf3SRamaswamy Tummala hcap = rds_lkup_hca(ep->ep_hca_guid); 730b86efd96Sagiri if (hcap == NULL) { 731b86efd96Sagiri RDS_DPRINTF2("rds_post_recv_buf", "HCA (0x%llx) not found", 732b86efd96Sagiri ep->ep_hca_guid); 733b86efd96Sagiri return; 734b86efd96Sagiri } 735b86efd96Sagiri 736b86efd96Sagiri /* Make sure the session is still connected */ 737b86efd96Sagiri rw_enter(&sp->session_lock, RW_READER); 738b86efd96Sagiri if ((sp->session_state != RDS_SESSION_STATE_INIT) && 73900a3eaf3SRamaswamy Tummala (sp->session_state != RDS_SESSION_STATE_CONNECTED) && 74000a3eaf3SRamaswamy Tummala (sp->session_state != RDS_SESSION_STATE_HCA_CLOSING)) { 741b86efd96Sagiri RDS_DPRINTF2("rds_post_recv_buf", "EP(%p): Session is not " 742b86efd96Sagiri "in active state (%d)", ep, sp->session_state); 743b86efd96Sagiri rw_exit(&sp->session_lock); 744b86efd96Sagiri return; 745b86efd96Sagiri } 746b86efd96Sagiri rw_exit(&sp->session_lock); 747b86efd96Sagiri 748b86efd96Sagiri /* how many can be posted */ 749b86efd96Sagiri mutex_enter(&recvqp->qp_lock); 750b86efd96Sagiri nspace = recvqp->qp_depth - recvqp->qp_level; 751b86efd96Sagiri if (nspace == 0) { 752b86efd96Sagiri RDS_DPRINTF2("rds_post_recv_buf", "RQ is FULL"); 753b86efd96Sagiri recvqp->qp_taskqpending = B_FALSE; 754b86efd96Sagiri mutex_exit(&recvqp->qp_lock); 755b86efd96Sagiri return; 756b86efd96Sagiri } 757b86efd96Sagiri mutex_exit(&recvqp->qp_lock); 758b86efd96Sagiri 759b86efd96Sagiri if (ep->ep_type == RDS_EP_TYPE_DATA) { 760b86efd96Sagiri gp = &rds_dpool; 761b86efd96Sagiri rcv_len = RdsPktSize; 762b86efd96Sagiri } else { 763b86efd96Sagiri gp = &rds_cpool; 764b86efd96Sagiri rcv_len = RDS_CTRLPKT_SIZE; 765b86efd96Sagiri } 766b86efd96Sagiri 767b86efd96Sagiri bp = rds_get_buf(gp, nspace, &jx); 768b86efd96Sagiri if (bp == NULL) { 769b86efd96Sagiri RDS_DPRINTF2(LABEL, "EP(%p): No Recv buffers available", ep); 770b86efd96Sagiri /* try again later */ 771b86efd96Sagiri ret = ddi_taskq_dispatch(rds_taskq, rds_post_recv_buf, 772b86efd96Sagiri (void *)ep->ep_chanhdl, DDI_NOSLEEP); 773b86efd96Sagiri if (ret != DDI_SUCCESS) { 77474242422Sagiri RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d", 775b86efd96Sagiri ret); 776b86efd96Sagiri mutex_enter(&recvqp->qp_lock); 777b86efd96Sagiri recvqp->qp_taskqpending = B_FALSE; 778b86efd96Sagiri mutex_exit(&recvqp->qp_lock); 779b86efd96Sagiri } 780b86efd96Sagiri return; 781b86efd96Sagiri } 782b86efd96Sagiri 783b86efd96Sagiri if (jx != nspace) { 784b86efd96Sagiri RDS_DPRINTF2(LABEL, "EP(%p): Recv buffers " 785b86efd96Sagiri "needed: %d available: %d", ep, nspace, jx); 786b86efd96Sagiri nspace = jx; 787b86efd96Sagiri } 788b86efd96Sagiri 789b86efd96Sagiri bp1 = bp; 790b86efd96Sagiri for (ix = 0; ix < nspace; ix++) { 791b86efd96Sagiri bp1->buf_ep = ep; 792b86efd96Sagiri ASSERT(bp1->buf_state == RDS_RCVBUF_FREE); 793b86efd96Sagiri bp1->buf_state = RDS_RCVBUF_POSTED; 794b86efd96Sagiri bp1->buf_ds.ds_key = hcap->hca_lkey; 795b86efd96Sagiri bp1->buf_ds.ds_len = rcv_len; 796b86efd96Sagiri bp1 = bp1->buf_nextp; 797b86efd96Sagiri } 798b86efd96Sagiri 799b86efd96Sagiri #if 0 800b86efd96Sagiri wrp = kmem_zalloc(RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t), 801b86efd96Sagiri KM_SLEEP); 802b86efd96Sagiri #else 803b86efd96Sagiri wrp = &wr[0]; 804b86efd96Sagiri #endif 805b86efd96Sagiri 806b86efd96Sagiri npost = nspace; 807b86efd96Sagiri while (npost) { 808b86efd96Sagiri jx = (npost > RDS_POST_FEW_ATATIME) ? 809b86efd96Sagiri RDS_POST_FEW_ATATIME : npost; 810b86efd96Sagiri for (ix = 0; ix < jx; ix++) { 811b86efd96Sagiri wrp[ix].wr_id = (uintptr_t)bp; 812b86efd96Sagiri wrp[ix].wr_nds = 1; 813b86efd96Sagiri wrp[ix].wr_sgl = &bp->buf_ds; 814b86efd96Sagiri bp = bp->buf_nextp; 815b86efd96Sagiri } 816b86efd96Sagiri 817b86efd96Sagiri ret = ibt_post_recv(chanhdl, wrp, jx, &kx); 818b86efd96Sagiri if ((ret != IBT_SUCCESS) || (kx != jx)) { 81974242422Sagiri RDS_DPRINTF2(LABEL, "ibt_post_recv for %d WRs failed: " 820b86efd96Sagiri "%d", npost, ret); 821b86efd96Sagiri npost -= kx; 822b86efd96Sagiri break; 823b86efd96Sagiri } 824b86efd96Sagiri 825b86efd96Sagiri npost -= jx; 826b86efd96Sagiri } 827b86efd96Sagiri 828b86efd96Sagiri mutex_enter(&recvqp->qp_lock); 829b86efd96Sagiri if (npost != 0) { 830b86efd96Sagiri RDS_DPRINTF2("rds_post_recv_buf", 831b86efd96Sagiri "EP(%p) Failed to post %d WRs", ep, npost); 832b86efd96Sagiri recvqp->qp_level += (nspace - npost); 833b86efd96Sagiri } else { 834b86efd96Sagiri recvqp->qp_level += nspace; 835b86efd96Sagiri } 836b86efd96Sagiri 837b86efd96Sagiri /* 838b86efd96Sagiri * sometimes, the recv WRs can get consumed as soon as they are 839b86efd96Sagiri * posted. In that case, taskq thread to post more WRs to the RQ will 840b86efd96Sagiri * not be scheduled as the taskqpending flag is still set. 841b86efd96Sagiri */ 842b86efd96Sagiri if (recvqp->qp_level == 0) { 843b86efd96Sagiri mutex_exit(&recvqp->qp_lock); 844b86efd96Sagiri ret = ddi_taskq_dispatch(rds_taskq, 845b86efd96Sagiri rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 846b86efd96Sagiri if (ret != DDI_SUCCESS) { 84774242422Sagiri RDS_DPRINTF2("rds_post_recv_buf", 848b86efd96Sagiri "ddi_taskq_dispatch failed: %d", ret); 849b86efd96Sagiri mutex_enter(&recvqp->qp_lock); 850b86efd96Sagiri recvqp->qp_taskqpending = B_FALSE; 851b86efd96Sagiri mutex_exit(&recvqp->qp_lock); 852b86efd96Sagiri } 853b86efd96Sagiri } else { 854b86efd96Sagiri recvqp->qp_taskqpending = B_FALSE; 855b86efd96Sagiri mutex_exit(&recvqp->qp_lock); 856b86efd96Sagiri } 857b86efd96Sagiri 858b86efd96Sagiri #if 0 859b86efd96Sagiri kmem_free(wrp, RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t)); 860b86efd96Sagiri #endif 861b86efd96Sagiri 862b86efd96Sagiri RDS_DPRINTF4("rds_post_recv_buf", "Return: EP(%p)", ep); 863b86efd96Sagiri } 864b86efd96Sagiri 865b86efd96Sagiri static int 866b86efd96Sagiri rds_poll_data_completions(ibt_cq_hdl_t cq, rds_ep_t *ep) 867b86efd96Sagiri { 868b86efd96Sagiri ibt_wc_t wc; 869b86efd96Sagiri rds_buf_t *bp; 870b86efd96Sagiri rds_data_hdr_t *pktp; 871b86efd96Sagiri rds_qp_t *recvqp; 872b86efd96Sagiri uint_t npolled; 873b86efd96Sagiri int ret = IBT_SUCCESS; 874b86efd96Sagiri 875b86efd96Sagiri 876b86efd96Sagiri RDS_DPRINTF4("rds_poll_data_completions", "Enter: EP(%p)", ep); 877b86efd96Sagiri 878b86efd96Sagiri bzero(&wc, sizeof (ibt_wc_t)); 879b86efd96Sagiri ret = ibt_poll_cq(cq, &wc, 1, &npolled); 880b86efd96Sagiri if (ret != IBT_SUCCESS) { 881b86efd96Sagiri if (ret != IBT_CQ_EMPTY) { 882b86efd96Sagiri RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 883b86efd96Sagiri "returned: %d", ep, cq, ret); 884b86efd96Sagiri } else { 885b86efd96Sagiri RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 886b86efd96Sagiri "returned: IBT_CQ_EMPTY", ep, cq); 887b86efd96Sagiri } 888b86efd96Sagiri return (ret); 889b86efd96Sagiri } 890b86efd96Sagiri 891b86efd96Sagiri bp = (rds_buf_t *)(uintptr_t)wc.wc_id; 892b86efd96Sagiri ASSERT(bp->buf_state == RDS_RCVBUF_POSTED); 893b86efd96Sagiri bp->buf_state = RDS_RCVBUF_ONSOCKQ; 894b86efd96Sagiri bp->buf_nextp = NULL; 895b86efd96Sagiri 896b86efd96Sagiri if (wc.wc_status != IBT_WC_SUCCESS) { 897b86efd96Sagiri mutex_enter(&ep->ep_recvqp.qp_lock); 898b86efd96Sagiri ep->ep_recvqp.qp_level--; 899b86efd96Sagiri mutex_exit(&ep->ep_recvqp.qp_lock); 900b86efd96Sagiri 901b86efd96Sagiri /* free the buffer */ 902b86efd96Sagiri bp->buf_state = RDS_RCVBUF_FREE; 903b86efd96Sagiri rds_free_recv_buf(bp, 1); 904b86efd96Sagiri 905b86efd96Sagiri /* Receive completion failure */ 906b86efd96Sagiri if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) { 907b86efd96Sagiri RDS_DPRINTF2("rds_poll_data_completions", 908b86efd96Sagiri "EP(%p) CQ(%p) BP(%p): WC Error Status: %d", 909b86efd96Sagiri ep, cq, wc.wc_id, wc.wc_status); 910b86efd96Sagiri RDS_INCR_RXERRS(); 911b86efd96Sagiri } 912b86efd96Sagiri return (ret); 913b86efd96Sagiri } 914b86efd96Sagiri 915b86efd96Sagiri /* there is one less in the RQ */ 916b86efd96Sagiri recvqp = &ep->ep_recvqp; 917b86efd96Sagiri mutex_enter(&recvqp->qp_lock); 918b86efd96Sagiri recvqp->qp_level--; 919b86efd96Sagiri if ((recvqp->qp_taskqpending == B_FALSE) && 920b86efd96Sagiri (recvqp->qp_level <= recvqp->qp_lwm)) { 921b86efd96Sagiri /* Time to post more buffers into the RQ */ 922b86efd96Sagiri recvqp->qp_taskqpending = B_TRUE; 923b86efd96Sagiri mutex_exit(&recvqp->qp_lock); 924b86efd96Sagiri 925b86efd96Sagiri ret = ddi_taskq_dispatch(rds_taskq, 926b86efd96Sagiri rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 927b86efd96Sagiri if (ret != DDI_SUCCESS) { 92874242422Sagiri RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d", 929b86efd96Sagiri ret); 930b86efd96Sagiri mutex_enter(&recvqp->qp_lock); 931b86efd96Sagiri recvqp->qp_taskqpending = B_FALSE; 932b86efd96Sagiri mutex_exit(&recvqp->qp_lock); 933b86efd96Sagiri } 934b86efd96Sagiri } else { 935b86efd96Sagiri mutex_exit(&recvqp->qp_lock); 936b86efd96Sagiri } 937b86efd96Sagiri 938b86efd96Sagiri pktp = (rds_data_hdr_t *)(uintptr_t)bp->buf_ds.ds_va; 939b86efd96Sagiri ASSERT(pktp->dh_datalen != 0); 940b86efd96Sagiri 941b86efd96Sagiri RDS_DPRINTF5(LABEL, "Message Received: sendIP: 0x%x recvIP: 0x%x " 942b86efd96Sagiri "sendport: %d recvport: %d npkts: %d pktno: %d", ep->ep_remip, 943b86efd96Sagiri ep->ep_myip, pktp->dh_sendport, pktp->dh_recvport, 944b86efd96Sagiri pktp->dh_npkts, pktp->dh_psn); 945b86efd96Sagiri 946b86efd96Sagiri RDS_DPRINTF3(LABEL, "BP(%p): npkts: %d psn: %d", bp, 947b86efd96Sagiri pktp->dh_npkts, pktp->dh_psn); 948b86efd96Sagiri 949b86efd96Sagiri if (pktp->dh_npkts == 1) { 950b86efd96Sagiri /* single pkt or last packet */ 951b86efd96Sagiri if (pktp->dh_psn != 0) { 952b86efd96Sagiri /* last packet of a segmented message */ 953b86efd96Sagiri ASSERT(ep->ep_seglbp != NULL); 954b86efd96Sagiri ep->ep_seglbp->buf_nextp = bp; 955b86efd96Sagiri ep->ep_seglbp = bp; 956b86efd96Sagiri rds_received_msg(ep, ep->ep_segfbp); 957b86efd96Sagiri ep->ep_segfbp = NULL; 958b86efd96Sagiri ep->ep_seglbp = NULL; 959b86efd96Sagiri } else { 960b86efd96Sagiri /* single packet */ 961b86efd96Sagiri rds_received_msg(ep, bp); 962b86efd96Sagiri } 963b86efd96Sagiri } else { 964b86efd96Sagiri /* multi-pkt msg */ 965b86efd96Sagiri if (pktp->dh_psn == 0) { 966b86efd96Sagiri /* first packet */ 967b86efd96Sagiri ASSERT(ep->ep_segfbp == NULL); 968b86efd96Sagiri ep->ep_segfbp = bp; 969b86efd96Sagiri ep->ep_seglbp = bp; 970b86efd96Sagiri } else { 971b86efd96Sagiri /* intermediate packet */ 972b86efd96Sagiri ASSERT(ep->ep_segfbp != NULL); 973b86efd96Sagiri ep->ep_seglbp->buf_nextp = bp; 974b86efd96Sagiri ep->ep_seglbp = bp; 975b86efd96Sagiri } 976b86efd96Sagiri } 977b86efd96Sagiri 978b86efd96Sagiri RDS_DPRINTF4("rds_poll_data_completions", "Return: EP(%p)", ep); 979b86efd96Sagiri 980b86efd96Sagiri return (ret); 981b86efd96Sagiri } 982b86efd96Sagiri 983b86efd96Sagiri void 984b86efd96Sagiri rds_recvcq_handler(ibt_cq_hdl_t cq, void *arg) 985b86efd96Sagiri { 986b86efd96Sagiri rds_ep_t *ep; 987b86efd96Sagiri int ret = IBT_SUCCESS; 988b86efd96Sagiri int (*func)(ibt_cq_hdl_t, rds_ep_t *); 989b86efd96Sagiri 990b86efd96Sagiri ep = (rds_ep_t *)arg; 991b86efd96Sagiri 992b86efd96Sagiri RDS_DPRINTF4("rds_recvcq_handler", "enter: EP(%p)", ep); 993b86efd96Sagiri 994b86efd96Sagiri if (ep->ep_type == RDS_EP_TYPE_DATA) { 995b86efd96Sagiri func = rds_poll_data_completions; 996b86efd96Sagiri } else { 997b86efd96Sagiri func = rds_poll_ctrl_completions; 998b86efd96Sagiri } 999b86efd96Sagiri 1000b86efd96Sagiri do { 1001b86efd96Sagiri ret = func(cq, ep); 1002b86efd96Sagiri } while (ret != IBT_CQ_EMPTY); 1003b86efd96Sagiri 1004b86efd96Sagiri /* enable the CQ */ 1005b86efd96Sagiri ret = ibt_enable_cq_notify(cq, rds_wc_signal); 1006b86efd96Sagiri if (ret != IBT_SUCCESS) { 1007b86efd96Sagiri RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify " 1008b86efd96Sagiri "failed: %d", ep, cq, ret); 1009b86efd96Sagiri return; 1010b86efd96Sagiri } 1011b86efd96Sagiri 1012b86efd96Sagiri do { 1013b86efd96Sagiri ret = func(cq, ep); 1014b86efd96Sagiri } while (ret != IBT_CQ_EMPTY); 1015b86efd96Sagiri 1016b86efd96Sagiri RDS_DPRINTF4("rds_recvcq_handler", "Return: EP(%p)", ep); 1017b86efd96Sagiri } 1018b86efd96Sagiri 1019b86efd96Sagiri void 1020b86efd96Sagiri rds_poll_send_completions(ibt_cq_hdl_t cq, rds_ep_t *ep, boolean_t lock) 1021b86efd96Sagiri { 1022b86efd96Sagiri ibt_wc_t wc[RDS_NUM_DATA_SEND_WCS]; 1023b86efd96Sagiri uint_t npolled, nret, send_error = 0; 1024b86efd96Sagiri rds_buf_t *headp, *tailp, *bp; 1025b86efd96Sagiri int ret, ix; 1026b86efd96Sagiri 1027b86efd96Sagiri RDS_DPRINTF4("rds_poll_send_completions", "Enter EP(%p)", ep); 1028b86efd96Sagiri 1029b86efd96Sagiri headp = NULL; 1030b86efd96Sagiri tailp = NULL; 1031b86efd96Sagiri npolled = 0; 1032b86efd96Sagiri do { 1033b86efd96Sagiri ret = ibt_poll_cq(cq, wc, RDS_NUM_DATA_SEND_WCS, &nret); 1034b86efd96Sagiri if (ret != IBT_SUCCESS) { 1035b86efd96Sagiri if (ret != IBT_CQ_EMPTY) { 1036b86efd96Sagiri RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): " 1037b86efd96Sagiri "ibt_poll_cq returned: %d", ep, cq, ret); 1038b86efd96Sagiri } else { 1039b86efd96Sagiri RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): " 1040b86efd96Sagiri "ibt_poll_cq returned: IBT_CQ_EMPTY", 1041b86efd96Sagiri ep, cq); 1042b86efd96Sagiri } 1043b86efd96Sagiri 1044b86efd96Sagiri break; 1045b86efd96Sagiri } 1046b86efd96Sagiri 1047b86efd96Sagiri for (ix = 0; ix < nret; ix++) { 1048b86efd96Sagiri if (wc[ix].wc_status == IBT_WC_SUCCESS) { 1049b86efd96Sagiri if (wc[ix].wc_type == IBT_WRC_RDMAW) { 1050b86efd96Sagiri rds_send_acknowledgement(ep); 1051b86efd96Sagiri continue; 1052b86efd96Sagiri } 1053b86efd96Sagiri 1054b86efd96Sagiri bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 1055b86efd96Sagiri ASSERT(bp->buf_state == RDS_SNDBUF_PENDING); 1056b86efd96Sagiri bp->buf_state = RDS_SNDBUF_FREE; 1057b86efd96Sagiri } else if (wc[ix].wc_status == IBT_WC_WR_FLUSHED_ERR) { 1058b86efd96Sagiri RDS_INCR_TXERRS(); 1059b86efd96Sagiri RDS_DPRINTF5("rds_poll_send_completions", 1060b86efd96Sagiri "EP(%p): WC ID: %p ERROR: %d", ep, 1061b86efd96Sagiri wc[ix].wc_id, wc[ix].wc_status); 1062b86efd96Sagiri 1063b86efd96Sagiri if (wc[ix].wc_id == RDS_RDMAW_WRID) { 1064b86efd96Sagiri mutex_enter(&ep->ep_lock); 1065b86efd96Sagiri ep->ep_rdmacnt--; 1066b86efd96Sagiri mutex_exit(&ep->ep_lock); 1067b86efd96Sagiri continue; 1068b86efd96Sagiri } 1069b86efd96Sagiri 1070b86efd96Sagiri bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 1071d99cb22fSagiri ASSERT(bp->buf_state == RDS_SNDBUF_PENDING); 1072d99cb22fSagiri bp->buf_state = RDS_SNDBUF_FREE; 1073b86efd96Sagiri } else { 1074b86efd96Sagiri RDS_INCR_TXERRS(); 1075b86efd96Sagiri RDS_DPRINTF2("rds_poll_send_completions", 1076b86efd96Sagiri "EP(%p): WC ID: %p ERROR: %d", ep, 1077b86efd96Sagiri wc[ix].wc_id, wc[ix].wc_status); 1078b86efd96Sagiri if (send_error == 0) { 1079b86efd96Sagiri rds_session_t *sp = ep->ep_sp; 1080b86efd96Sagiri 1081b86efd96Sagiri /* don't let anyone send anymore */ 1082b86efd96Sagiri rw_enter(&sp->session_lock, RW_WRITER); 1083b86efd96Sagiri if (sp->session_state != 1084b86efd96Sagiri RDS_SESSION_STATE_ERROR) { 1085b86efd96Sagiri sp->session_state = 1086b86efd96Sagiri RDS_SESSION_STATE_ERROR; 1087b86efd96Sagiri /* Make this the active end */ 1088b86efd96Sagiri sp->session_type = 1089b86efd96Sagiri RDS_SESSION_ACTIVE; 1090b86efd96Sagiri } 1091b86efd96Sagiri rw_exit(&sp->session_lock); 1092b86efd96Sagiri } 1093b86efd96Sagiri 1094b86efd96Sagiri send_error++; 1095b86efd96Sagiri 1096b86efd96Sagiri if (wc[ix].wc_id == RDS_RDMAW_WRID) { 1097b86efd96Sagiri mutex_enter(&ep->ep_lock); 1098b86efd96Sagiri ep->ep_rdmacnt--; 1099b86efd96Sagiri mutex_exit(&ep->ep_lock); 1100b86efd96Sagiri continue; 1101b86efd96Sagiri } 1102b86efd96Sagiri 1103b86efd96Sagiri bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 1104d99cb22fSagiri ASSERT(bp->buf_state == RDS_SNDBUF_PENDING); 1105d99cb22fSagiri bp->buf_state = RDS_SNDBUF_FREE; 1106b86efd96Sagiri } 1107b86efd96Sagiri 1108b86efd96Sagiri bp->buf_nextp = NULL; 1109b86efd96Sagiri if (headp) { 1110b86efd96Sagiri tailp->buf_nextp = bp; 1111b86efd96Sagiri tailp = bp; 1112b86efd96Sagiri } else { 1113b86efd96Sagiri headp = bp; 1114b86efd96Sagiri tailp = bp; 1115b86efd96Sagiri } 1116b86efd96Sagiri 1117b86efd96Sagiri npolled++; 1118b86efd96Sagiri } 1119b86efd96Sagiri 1120b86efd96Sagiri if (rds_no_interrupts && (npolled > 100)) { 1121b86efd96Sagiri break; 1122b86efd96Sagiri } 1123b86efd96Sagiri 1124b86efd96Sagiri if (rds_no_interrupts == 1) { 1125b86efd96Sagiri break; 1126b86efd96Sagiri } 1127b86efd96Sagiri } while (ret != IBT_CQ_EMPTY); 1128b86efd96Sagiri 1129b86efd96Sagiri RDS_DPRINTF5("rds_poll_send_completions", "Npolled: %d send_error: %d", 1130b86efd96Sagiri npolled, send_error); 1131b86efd96Sagiri 1132b86efd96Sagiri /* put the buffers to the pool */ 1133b86efd96Sagiri if (npolled != 0) { 1134b86efd96Sagiri rds_free_send_buf(ep, headp, tailp, npolled, lock); 1135b86efd96Sagiri } 1136b86efd96Sagiri 1137b86efd96Sagiri if (send_error != 0) { 1138b86efd96Sagiri rds_handle_send_error(ep); 1139b86efd96Sagiri } 1140b86efd96Sagiri 1141b86efd96Sagiri RDS_DPRINTF4("rds_poll_send_completions", "Return EP(%p)", ep); 1142b86efd96Sagiri } 1143b86efd96Sagiri 1144b86efd96Sagiri void 1145b86efd96Sagiri rds_sendcq_handler(ibt_cq_hdl_t cq, void *arg) 1146b86efd96Sagiri { 1147b86efd96Sagiri rds_ep_t *ep; 1148b86efd96Sagiri int ret; 1149b86efd96Sagiri 1150b86efd96Sagiri ep = (rds_ep_t *)arg; 1151b86efd96Sagiri 1152b86efd96Sagiri RDS_DPRINTF4("rds_sendcq_handler", "Enter: EP(%p)", ep); 1153b86efd96Sagiri 1154b86efd96Sagiri /* enable the CQ */ 1155b86efd96Sagiri ret = ibt_enable_cq_notify(cq, IBT_NEXT_COMPLETION); 1156b86efd96Sagiri if (ret != IBT_SUCCESS) { 1157b86efd96Sagiri RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify " 1158b86efd96Sagiri "failed: %d", ep, cq, ret); 1159b86efd96Sagiri return; 1160b86efd96Sagiri } 1161b86efd96Sagiri 1162b86efd96Sagiri rds_poll_send_completions(cq, ep, B_FALSE); 1163b86efd96Sagiri 1164b86efd96Sagiri RDS_DPRINTF4("rds_sendcq_handler", "Return: EP(%p)", ep); 1165b86efd96Sagiri } 1166b86efd96Sagiri 1167b86efd96Sagiri void 1168b86efd96Sagiri rds_ep_free_rc_channel(rds_ep_t *ep) 1169b86efd96Sagiri { 1170b86efd96Sagiri int ret; 1171b86efd96Sagiri 1172b86efd96Sagiri RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Enter", ep); 1173b86efd96Sagiri 1174b86efd96Sagiri ASSERT(mutex_owned(&ep->ep_lock)); 1175b86efd96Sagiri 1176b86efd96Sagiri /* free the QP */ 1177b86efd96Sagiri if (ep->ep_chanhdl != NULL) { 1178b86efd96Sagiri /* wait until the RQ is empty */ 1179b86efd96Sagiri (void) ibt_flush_channel(ep->ep_chanhdl); 1180b86efd96Sagiri (void) rds_is_recvq_empty(ep, B_TRUE); 1181b86efd96Sagiri ret = ibt_free_channel(ep->ep_chanhdl); 1182b86efd96Sagiri if (ret != IBT_SUCCESS) { 118374242422Sagiri RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) " 1184b86efd96Sagiri "ibt_free_channel returned: %d", ep, ret); 1185b86efd96Sagiri } 1186b86efd96Sagiri ep->ep_chanhdl = NULL; 1187b86efd96Sagiri } else { 1188b86efd96Sagiri RDS_DPRINTF2("rds_ep_free_rc_channel", 1189b86efd96Sagiri "EP(%p) Channel is ALREADY FREE", ep); 1190b86efd96Sagiri } 1191b86efd96Sagiri 1192b86efd96Sagiri /* free the Send CQ */ 1193b86efd96Sagiri if (ep->ep_sendcq != NULL) { 1194b86efd96Sagiri ret = ibt_free_cq(ep->ep_sendcq); 1195b86efd96Sagiri if (ret != IBT_SUCCESS) { 119674242422Sagiri RDS_DPRINTF2("rds_ep_free_rc_channel", 1197b86efd96Sagiri "EP(%p) - for sendcq, ibt_free_cq returned %d", 1198b86efd96Sagiri ep, ret); 1199b86efd96Sagiri } 1200b86efd96Sagiri ep->ep_sendcq = NULL; 1201b86efd96Sagiri } else { 1202b86efd96Sagiri RDS_DPRINTF2("rds_ep_free_rc_channel", 1203b86efd96Sagiri "EP(%p) SendCQ is ALREADY FREE", ep); 1204b86efd96Sagiri } 1205b86efd96Sagiri 1206b86efd96Sagiri /* free the Recv CQ */ 1207b86efd96Sagiri if (ep->ep_recvcq != NULL) { 1208b86efd96Sagiri ret = ibt_free_cq(ep->ep_recvcq); 1209b86efd96Sagiri if (ret != IBT_SUCCESS) { 121074242422Sagiri RDS_DPRINTF2("rds_ep_free_rc_channel", 1211b86efd96Sagiri "EP(%p) - for recvcq, ibt_free_cq returned %d", 1212b86efd96Sagiri ep, ret); 1213b86efd96Sagiri } 1214b86efd96Sagiri ep->ep_recvcq = NULL; 1215b86efd96Sagiri } else { 1216b86efd96Sagiri RDS_DPRINTF2("rds_ep_free_rc_channel", 1217b86efd96Sagiri "EP(%p) RecvCQ is ALREADY FREE", ep); 1218b86efd96Sagiri } 1219b86efd96Sagiri 1220b86efd96Sagiri RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Return", ep); 1221b86efd96Sagiri } 1222b86efd96Sagiri 1223b86efd96Sagiri /* Allocate resources for RC channel */ 1224b86efd96Sagiri ibt_channel_hdl_t 1225b86efd96Sagiri rds_ep_alloc_rc_channel(rds_ep_t *ep, uint8_t hca_port) 1226b86efd96Sagiri { 1227b86efd96Sagiri int ret = IBT_SUCCESS; 1228b86efd96Sagiri ibt_cq_attr_t scqattr, rcqattr; 1229b86efd96Sagiri ibt_rc_chan_alloc_args_t chanargs; 1230b86efd96Sagiri ibt_channel_hdl_t chanhdl; 1231c1f8b08eSagiri rds_session_t *sp; 1232b86efd96Sagiri rds_hca_t *hcap; 1233b86efd96Sagiri 1234b86efd96Sagiri RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Enter: 0x%p port: %d", 1235b86efd96Sagiri ep, hca_port); 1236b86efd96Sagiri 1237c1f8b08eSagiri /* Update the EP with the right IP address and HCA guid */ 1238c1f8b08eSagiri sp = ep->ep_sp; 1239c1f8b08eSagiri ASSERT(sp != NULL); 1240c1f8b08eSagiri rw_enter(&sp->session_lock, RW_READER); 1241c1f8b08eSagiri mutex_enter(&ep->ep_lock); 1242c1f8b08eSagiri ep->ep_myip = sp->session_myip; 1243c1f8b08eSagiri ep->ep_remip = sp->session_remip; 1244c1f8b08eSagiri hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid); 1245c1f8b08eSagiri ep->ep_hca_guid = hcap->hca_guid; 1246c1f8b08eSagiri mutex_exit(&ep->ep_lock); 1247c1f8b08eSagiri rw_exit(&sp->session_lock); 1248b86efd96Sagiri 1249b86efd96Sagiri /* reset taskqpending flag here */ 1250b86efd96Sagiri ep->ep_recvqp.qp_taskqpending = B_FALSE; 1251b86efd96Sagiri 1252b86efd96Sagiri if (ep->ep_type == RDS_EP_TYPE_CTRL) { 1253b86efd96Sagiri scqattr.cq_size = MaxCtrlSendBuffers; 1254b86efd96Sagiri scqattr.cq_sched = NULL; 1255b86efd96Sagiri scqattr.cq_flags = IBT_CQ_NO_FLAGS; 1256b86efd96Sagiri 1257b86efd96Sagiri rcqattr.cq_size = MaxCtrlRecvBuffers; 1258b86efd96Sagiri rcqattr.cq_sched = NULL; 1259b86efd96Sagiri rcqattr.cq_flags = IBT_CQ_NO_FLAGS; 1260b86efd96Sagiri 1261b86efd96Sagiri chanargs.rc_sizes.cs_sq = MaxCtrlSendBuffers; 1262b86efd96Sagiri chanargs.rc_sizes.cs_rq = MaxCtrlRecvBuffers; 1263b86efd96Sagiri chanargs.rc_sizes.cs_sq_sgl = 1; 1264b86efd96Sagiri chanargs.rc_sizes.cs_rq_sgl = 1; 1265b86efd96Sagiri } else { 1266b86efd96Sagiri scqattr.cq_size = MaxDataSendBuffers + RDS_NUM_ACKS; 1267b86efd96Sagiri scqattr.cq_sched = NULL; 1268b86efd96Sagiri scqattr.cq_flags = IBT_CQ_NO_FLAGS; 1269b86efd96Sagiri 1270b86efd96Sagiri rcqattr.cq_size = MaxDataRecvBuffers; 1271b86efd96Sagiri rcqattr.cq_sched = NULL; 1272b86efd96Sagiri rcqattr.cq_flags = IBT_CQ_NO_FLAGS; 1273b86efd96Sagiri 1274b86efd96Sagiri chanargs.rc_sizes.cs_sq = MaxDataSendBuffers + RDS_NUM_ACKS; 1275b86efd96Sagiri chanargs.rc_sizes.cs_rq = MaxDataRecvBuffers; 1276b86efd96Sagiri chanargs.rc_sizes.cs_sq_sgl = 1; 1277b86efd96Sagiri chanargs.rc_sizes.cs_rq_sgl = 1; 1278b86efd96Sagiri } 1279b86efd96Sagiri 12805763ba1eSagiri mutex_enter(&ep->ep_lock); 1281b86efd96Sagiri if (ep->ep_sendcq == NULL) { 1282b86efd96Sagiri /* returned size is always greater than the requested size */ 1283b86efd96Sagiri ret = ibt_alloc_cq(hcap->hca_hdl, &scqattr, 1284b86efd96Sagiri &ep->ep_sendcq, NULL); 1285b86efd96Sagiri if (ret != IBT_SUCCESS) { 1286b86efd96Sagiri RDS_DPRINTF2(LABEL, "ibt_alloc_cq for sendCQ " 1287b86efd96Sagiri "failed, size = %d: %d", scqattr.cq_size, ret); 12885763ba1eSagiri mutex_exit(&ep->ep_lock); 1289b86efd96Sagiri return (NULL); 1290b86efd96Sagiri } 1291b86efd96Sagiri 1292b86efd96Sagiri (void) ibt_set_cq_handler(ep->ep_sendcq, rds_sendcq_handler, 1293b86efd96Sagiri ep); 1294b86efd96Sagiri 1295b86efd96Sagiri if (rds_no_interrupts == 0) { 1296b86efd96Sagiri ret = ibt_enable_cq_notify(ep->ep_sendcq, 1297b86efd96Sagiri IBT_NEXT_COMPLETION); 1298b86efd96Sagiri if (ret != IBT_SUCCESS) { 1299b86efd96Sagiri RDS_DPRINTF2(LABEL, 1300b86efd96Sagiri "ibt_enable_cq_notify failed: %d", ret); 1301b86efd96Sagiri (void) ibt_free_cq(ep->ep_sendcq); 1302b86efd96Sagiri ep->ep_sendcq = NULL; 13035763ba1eSagiri mutex_exit(&ep->ep_lock); 1304b86efd96Sagiri return (NULL); 1305b86efd96Sagiri } 1306b86efd96Sagiri } 1307b86efd96Sagiri } 1308b86efd96Sagiri 1309b86efd96Sagiri if (ep->ep_recvcq == NULL) { 1310b86efd96Sagiri /* returned size is always greater than the requested size */ 1311b86efd96Sagiri ret = ibt_alloc_cq(hcap->hca_hdl, &rcqattr, 1312b86efd96Sagiri &ep->ep_recvcq, NULL); 1313b86efd96Sagiri if (ret != IBT_SUCCESS) { 1314b86efd96Sagiri RDS_DPRINTF2(LABEL, "ibt_alloc_cq for recvCQ " 1315b86efd96Sagiri "failed, size = %d: %d", rcqattr.cq_size, ret); 1316b86efd96Sagiri (void) ibt_free_cq(ep->ep_sendcq); 1317b86efd96Sagiri ep->ep_sendcq = NULL; 13185763ba1eSagiri mutex_exit(&ep->ep_lock); 1319b86efd96Sagiri return (NULL); 1320b86efd96Sagiri } 1321b86efd96Sagiri 1322b86efd96Sagiri (void) ibt_set_cq_handler(ep->ep_recvcq, rds_recvcq_handler, 1323b86efd96Sagiri ep); 1324b86efd96Sagiri 1325b86efd96Sagiri ret = ibt_enable_cq_notify(ep->ep_recvcq, rds_wc_signal); 1326b86efd96Sagiri if (ret != IBT_SUCCESS) { 1327b86efd96Sagiri RDS_DPRINTF2(LABEL, 1328b86efd96Sagiri "ibt_enable_cq_notify failed: %d", ret); 1329b86efd96Sagiri (void) ibt_free_cq(ep->ep_recvcq); 1330b86efd96Sagiri ep->ep_recvcq = NULL; 1331b86efd96Sagiri (void) ibt_free_cq(ep->ep_sendcq); 1332b86efd96Sagiri ep->ep_sendcq = NULL; 13335763ba1eSagiri mutex_exit(&ep->ep_lock); 1334b86efd96Sagiri return (NULL); 1335b86efd96Sagiri } 1336b86efd96Sagiri } 1337b86efd96Sagiri 1338b86efd96Sagiri chanargs.rc_flags = IBT_ALL_SIGNALED; 1339b86efd96Sagiri chanargs.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR | 1340b86efd96Sagiri IBT_CEP_ATOMIC; 1341b86efd96Sagiri chanargs.rc_hca_port_num = hca_port; 1342b86efd96Sagiri chanargs.rc_scq = ep->ep_sendcq; 1343b86efd96Sagiri chanargs.rc_rcq = ep->ep_recvcq; 1344b86efd96Sagiri chanargs.rc_pd = hcap->hca_pdhdl; 1345b86efd96Sagiri chanargs.rc_srq = NULL; 1346b86efd96Sagiri 1347b86efd96Sagiri ret = ibt_alloc_rc_channel(hcap->hca_hdl, 1348b86efd96Sagiri IBT_ACHAN_NO_FLAGS, &chanargs, &chanhdl, NULL); 1349b86efd96Sagiri if (ret != IBT_SUCCESS) { 1350b86efd96Sagiri RDS_DPRINTF2(LABEL, "ibt_alloc_rc_channel fail: %d", 1351b86efd96Sagiri ret); 1352b86efd96Sagiri (void) ibt_free_cq(ep->ep_recvcq); 1353b86efd96Sagiri ep->ep_recvcq = NULL; 1354b86efd96Sagiri (void) ibt_free_cq(ep->ep_sendcq); 1355b86efd96Sagiri ep->ep_sendcq = NULL; 13565763ba1eSagiri mutex_exit(&ep->ep_lock); 1357b86efd96Sagiri return (NULL); 1358b86efd96Sagiri } 13595763ba1eSagiri mutex_exit(&ep->ep_lock); 1360b86efd96Sagiri 1361b86efd96Sagiri /* Chan private should contain the ep */ 1362b86efd96Sagiri (void) ibt_set_chan_private(chanhdl, ep); 1363b86efd96Sagiri 1364b86efd96Sagiri RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Return: 0x%p", chanhdl); 1365b86efd96Sagiri 1366b86efd96Sagiri return (chanhdl); 1367b86efd96Sagiri } 1368b86efd96Sagiri 1369b86efd96Sagiri 1370b86efd96Sagiri #if 0 1371b86efd96Sagiri 1372b86efd96Sagiri /* Return node guid given a port gid */ 1373b86efd96Sagiri ib_guid_t 1374b86efd96Sagiri rds_gid_to_node_guid(ib_gid_t gid) 1375b86efd96Sagiri { 1376b86efd96Sagiri ibt_node_info_t nodeinfo; 1377b86efd96Sagiri int ret; 1378b86efd96Sagiri 1379b86efd96Sagiri RDS_DPRINTF4("rds_gid_to_node_guid", "Enter: gid: %llx:%llx", 1380b86efd96Sagiri gid.gid_prefix, gid.gid_guid); 1381b86efd96Sagiri 1382b86efd96Sagiri ret = ibt_gid_to_node_info(gid, &nodeinfo); 1383b86efd96Sagiri if (ret != IBT_SUCCESS) { 1384b86efd96Sagiri RDS_DPRINTF2(LABEL, "ibt_gid_node_info for gid: %llx:%llx " 1385b86efd96Sagiri "failed", gid.gid_prefix, gid.gid_guid); 1386b86efd96Sagiri return (0LL); 1387b86efd96Sagiri } 1388b86efd96Sagiri 1389b86efd96Sagiri RDS_DPRINTF4("rds_gid_to_node_guid", "Return: Node guid: %llx", 1390b86efd96Sagiri nodeinfo.n_node_guid); 1391b86efd96Sagiri 1392b86efd96Sagiri return (nodeinfo.n_node_guid); 1393b86efd96Sagiri } 1394b86efd96Sagiri 1395b86efd96Sagiri #endif 1396b86efd96Sagiri 1397b86efd96Sagiri static void 1398b86efd96Sagiri rds_handle_portup_event(rds_state_t *statep, ibt_hca_hdl_t hdl, 1399b86efd96Sagiri ibt_async_event_t *event) 1400b86efd96Sagiri { 1401b86efd96Sagiri rds_hca_t *hcap; 1402b86efd96Sagiri ibt_hca_portinfo_t *newpinfop, *oldpinfop; 1403b86efd96Sagiri uint_t newsize, oldsize, nport; 1404b86efd96Sagiri ib_gid_t gid; 1405b86efd96Sagiri int ret; 1406b86efd96Sagiri 1407c1f8b08eSagiri RDS_DPRINTF2("rds_handle_portup_event", 1408c1f8b08eSagiri "Enter: GUID: 0x%llx Statep: %p", event->ev_hca_guid, statep); 1409b86efd96Sagiri 141000a3eaf3SRamaswamy Tummala rw_enter(&statep->rds_hca_lock, RW_WRITER); 141100a3eaf3SRamaswamy Tummala 141200a3eaf3SRamaswamy Tummala hcap = statep->rds_hcalistp; 141300a3eaf3SRamaswamy Tummala while ((hcap != NULL) && (hcap->hca_guid != event->ev_hca_guid)) { 141400a3eaf3SRamaswamy Tummala hcap = hcap->hca_nextp; 1415d99cb22fSagiri } 1416d99cb22fSagiri 1417c1f8b08eSagiri if (hcap == NULL) { 1418c1f8b08eSagiri RDS_DPRINTF2("rds_handle_portup_event", "HCA: 0x%llx is " 1419c1f8b08eSagiri "not in our list", event->ev_hca_guid); 142000a3eaf3SRamaswamy Tummala rw_exit(&statep->rds_hca_lock); 1421c1f8b08eSagiri return; 1422c1f8b08eSagiri } 1423b86efd96Sagiri 1424b86efd96Sagiri ret = ibt_query_hca_ports(hdl, 0, &newpinfop, &nport, &newsize); 1425b86efd96Sagiri if (ret != IBT_SUCCESS) { 1426b86efd96Sagiri RDS_DPRINTF2(LABEL, "ibt_query_hca_ports failed: %d", ret); 142700a3eaf3SRamaswamy Tummala rw_exit(&statep->rds_hca_lock); 1428b86efd96Sagiri return; 1429b86efd96Sagiri } 1430b86efd96Sagiri 1431b86efd96Sagiri oldpinfop = hcap->hca_pinfop; 1432b86efd96Sagiri oldsize = hcap->hca_pinfo_sz; 1433b86efd96Sagiri hcap->hca_pinfop = newpinfop; 1434b86efd96Sagiri hcap->hca_pinfo_sz = newsize; 1435b86efd96Sagiri 143600a3eaf3SRamaswamy Tummala (void) ibt_free_portinfo(oldpinfop, oldsize); 1437b86efd96Sagiri 143800a3eaf3SRamaswamy Tummala /* If RDS service is not registered then no bind is needed */ 143900a3eaf3SRamaswamy Tummala if (statep->rds_srvhdl == NULL) { 144000a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rds_handle_portup_event", 144100a3eaf3SRamaswamy Tummala "RDS Service is not registered, so no action needed"); 144200a3eaf3SRamaswamy Tummala rw_exit(&statep->rds_hca_lock); 144300a3eaf3SRamaswamy Tummala return; 1444b86efd96Sagiri } 1445b86efd96Sagiri 144600a3eaf3SRamaswamy Tummala /* 144700a3eaf3SRamaswamy Tummala * If the service was previously bound on this port and 144800a3eaf3SRamaswamy Tummala * if this port has changed state down and now up, we do not 144900a3eaf3SRamaswamy Tummala * need to bind the service again. The bind is expected to 145000a3eaf3SRamaswamy Tummala * persist across state changes. If the service was never bound 145100a3eaf3SRamaswamy Tummala * before then we bind it this time. 145200a3eaf3SRamaswamy Tummala */ 145300a3eaf3SRamaswamy Tummala if (hcap->hca_bindhdl[event->ev_port - 1] == NULL) { 145400a3eaf3SRamaswamy Tummala 145500a3eaf3SRamaswamy Tummala /* structure copy */ 145600a3eaf3SRamaswamy Tummala gid = newpinfop[event->ev_port - 1].p_sgid_tbl[0]; 145700a3eaf3SRamaswamy Tummala 145800a3eaf3SRamaswamy Tummala /* bind RDS service on the port, pass statep as cm_private */ 145900a3eaf3SRamaswamy Tummala ret = ibt_bind_service(statep->rds_srvhdl, gid, NULL, statep, 146000a3eaf3SRamaswamy Tummala &hcap->hca_bindhdl[event->ev_port - 1]); 146100a3eaf3SRamaswamy Tummala if (ret != IBT_SUCCESS) { 146200a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rds_handle_portup_event", 146300a3eaf3SRamaswamy Tummala "Bind service for HCA: 0x%llx Port: %d " 146400a3eaf3SRamaswamy Tummala "gid %llx:%llx returned: %d", event->ev_hca_guid, 146500a3eaf3SRamaswamy Tummala event->ev_port, gid.gid_prefix, gid.gid_guid, ret); 146600a3eaf3SRamaswamy Tummala } 146700a3eaf3SRamaswamy Tummala } 146800a3eaf3SRamaswamy Tummala 146900a3eaf3SRamaswamy Tummala rw_exit(&statep->rds_hca_lock); 1470b86efd96Sagiri 1471b86efd96Sagiri RDS_DPRINTF2("rds_handle_portup_event", "Return: GUID: 0x%llx", 1472b86efd96Sagiri event->ev_hca_guid); 1473b86efd96Sagiri } 1474b86efd96Sagiri 147500a3eaf3SRamaswamy Tummala static void 147600a3eaf3SRamaswamy Tummala rdsib_add_hca(ib_guid_t hca_guid) 147700a3eaf3SRamaswamy Tummala { 147800a3eaf3SRamaswamy Tummala rds_hca_t *hcap; 147900a3eaf3SRamaswamy Tummala ibt_mr_attr_t mem_attr; 148000a3eaf3SRamaswamy Tummala ibt_mr_desc_t mem_desc; 148100a3eaf3SRamaswamy Tummala int ret; 148200a3eaf3SRamaswamy Tummala 148300a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_add_hca", "Enter: GUID: 0x%llx", hca_guid); 148400a3eaf3SRamaswamy Tummala 148500a3eaf3SRamaswamy Tummala hcap = rdsib_init_hca(hca_guid); 148600a3eaf3SRamaswamy Tummala if (hcap == NULL) 148700a3eaf3SRamaswamy Tummala return; 148800a3eaf3SRamaswamy Tummala 148900a3eaf3SRamaswamy Tummala /* register the recv memory with this hca */ 149000a3eaf3SRamaswamy Tummala mutex_enter(&rds_dpool.pool_lock); 149100a3eaf3SRamaswamy Tummala if (rds_dpool.pool_memp == NULL) { 149200a3eaf3SRamaswamy Tummala /* no memory to register */ 149300a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_add_hca", "No memory to register"); 149400a3eaf3SRamaswamy Tummala mutex_exit(&rds_dpool.pool_lock); 149500a3eaf3SRamaswamy Tummala return; 149600a3eaf3SRamaswamy Tummala } 149700a3eaf3SRamaswamy Tummala 149800a3eaf3SRamaswamy Tummala mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)rds_dpool.pool_memp; 149900a3eaf3SRamaswamy Tummala mem_attr.mr_len = rds_dpool.pool_memsize; 150000a3eaf3SRamaswamy Tummala mem_attr.mr_as = NULL; 150100a3eaf3SRamaswamy Tummala mem_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE; 150200a3eaf3SRamaswamy Tummala 150300a3eaf3SRamaswamy Tummala ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, &mem_attr, 150400a3eaf3SRamaswamy Tummala &hcap->hca_mrhdl, &mem_desc); 150500a3eaf3SRamaswamy Tummala 150600a3eaf3SRamaswamy Tummala mutex_exit(&rds_dpool.pool_lock); 150700a3eaf3SRamaswamy Tummala 150800a3eaf3SRamaswamy Tummala if (ret != IBT_SUCCESS) { 150900a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_add_hca", "ibt_register_mr failed: %d", 151000a3eaf3SRamaswamy Tummala ret); 151100a3eaf3SRamaswamy Tummala } else { 151200a3eaf3SRamaswamy Tummala rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); 151300a3eaf3SRamaswamy Tummala hcap->hca_state = RDS_HCA_STATE_MEM_REGISTERED; 151400a3eaf3SRamaswamy Tummala hcap->hca_lkey = mem_desc.md_lkey; 151500a3eaf3SRamaswamy Tummala hcap->hca_rkey = mem_desc.md_rkey; 151600a3eaf3SRamaswamy Tummala rw_exit(&rdsib_statep->rds_hca_lock); 151700a3eaf3SRamaswamy Tummala } 151800a3eaf3SRamaswamy Tummala 151900a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_add_hca", "Retrun: GUID: 0x%llx", hca_guid); 152000a3eaf3SRamaswamy Tummala } 152100a3eaf3SRamaswamy Tummala 152200a3eaf3SRamaswamy Tummala void rds_close_this_session(rds_session_t *sp, uint8_t wait); 152300a3eaf3SRamaswamy Tummala int rds_post_control_message(rds_session_t *sp, uint8_t code, in_port_t port); 152400a3eaf3SRamaswamy Tummala 152500a3eaf3SRamaswamy Tummala static void 152600a3eaf3SRamaswamy Tummala rdsib_del_hca(rds_state_t *statep, ib_guid_t hca_guid) 152700a3eaf3SRamaswamy Tummala { 152800a3eaf3SRamaswamy Tummala rds_session_t *sp; 152900a3eaf3SRamaswamy Tummala rds_hca_t *hcap; 153000a3eaf3SRamaswamy Tummala rds_hca_state_t saved_state; 153100a3eaf3SRamaswamy Tummala int ret, ix; 153200a3eaf3SRamaswamy Tummala 153300a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_del_hca", "Enter: GUID: 0x%llx", hca_guid); 153400a3eaf3SRamaswamy Tummala 153500a3eaf3SRamaswamy Tummala /* 153600a3eaf3SRamaswamy Tummala * This should be a write lock as we don't want anyone to get access 153700a3eaf3SRamaswamy Tummala * to the hcap while we are modifing its contents 153800a3eaf3SRamaswamy Tummala */ 153900a3eaf3SRamaswamy Tummala rw_enter(&statep->rds_hca_lock, RW_WRITER); 154000a3eaf3SRamaswamy Tummala 154100a3eaf3SRamaswamy Tummala hcap = statep->rds_hcalistp; 154200a3eaf3SRamaswamy Tummala while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) { 154300a3eaf3SRamaswamy Tummala hcap = hcap->hca_nextp; 154400a3eaf3SRamaswamy Tummala } 154500a3eaf3SRamaswamy Tummala 154600a3eaf3SRamaswamy Tummala /* Prevent initiating any new activity on this HCA */ 154700a3eaf3SRamaswamy Tummala ASSERT(hcap != NULL); 154800a3eaf3SRamaswamy Tummala saved_state = hcap->hca_state; 154900a3eaf3SRamaswamy Tummala hcap->hca_state = RDS_HCA_STATE_STOPPING; 155000a3eaf3SRamaswamy Tummala 155100a3eaf3SRamaswamy Tummala rw_exit(&statep->rds_hca_lock); 155200a3eaf3SRamaswamy Tummala 155300a3eaf3SRamaswamy Tummala /* 155400a3eaf3SRamaswamy Tummala * stop the outgoing traffic and close any active sessions on this hca. 155500a3eaf3SRamaswamy Tummala * Any pending messages in the SQ will be allowed to complete. 155600a3eaf3SRamaswamy Tummala */ 155700a3eaf3SRamaswamy Tummala rw_enter(&statep->rds_sessionlock, RW_READER); 155800a3eaf3SRamaswamy Tummala sp = statep->rds_sessionlistp; 155900a3eaf3SRamaswamy Tummala while (sp) { 156000a3eaf3SRamaswamy Tummala if (sp->session_hca_guid != hca_guid) { 156100a3eaf3SRamaswamy Tummala sp = sp->session_nextp; 156200a3eaf3SRamaswamy Tummala continue; 156300a3eaf3SRamaswamy Tummala } 156400a3eaf3SRamaswamy Tummala 156500a3eaf3SRamaswamy Tummala rw_enter(&sp->session_lock, RW_WRITER); 156600a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp, 156700a3eaf3SRamaswamy Tummala sp->session_state); 156800a3eaf3SRamaswamy Tummala /* 156900a3eaf3SRamaswamy Tummala * We are changing the session state in advance. This prevents 157000a3eaf3SRamaswamy Tummala * further messages to be posted to the SQ. We then 157100a3eaf3SRamaswamy Tummala * send a control message to the remote and tell it close 157200a3eaf3SRamaswamy Tummala * the session. 157300a3eaf3SRamaswamy Tummala */ 157400a3eaf3SRamaswamy Tummala sp->session_state = RDS_SESSION_STATE_HCA_CLOSING; 157500a3eaf3SRamaswamy Tummala RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 157600a3eaf3SRamaswamy Tummala "RDS_SESSION_STATE_PASSIVE_CLOSING", sp); 157700a3eaf3SRamaswamy Tummala rw_exit(&sp->session_lock); 157800a3eaf3SRamaswamy Tummala 157900a3eaf3SRamaswamy Tummala /* 158000a3eaf3SRamaswamy Tummala * wait until the sendq is empty then tell the remote to 158100a3eaf3SRamaswamy Tummala * close this session. This enables for graceful shutdown of 158200a3eaf3SRamaswamy Tummala * the session 158300a3eaf3SRamaswamy Tummala */ 158400a3eaf3SRamaswamy Tummala rds_is_sendq_empty(&sp->session_dataep, 2); 158500a3eaf3SRamaswamy Tummala (void) rds_post_control_message(sp, 158600a3eaf3SRamaswamy Tummala RDS_CTRL_CODE_CLOSE_SESSION, 0); 158700a3eaf3SRamaswamy Tummala 158800a3eaf3SRamaswamy Tummala sp = sp->session_nextp; 158900a3eaf3SRamaswamy Tummala } 159000a3eaf3SRamaswamy Tummala 159100a3eaf3SRamaswamy Tummala /* wait until all the sessions are off this HCA */ 159200a3eaf3SRamaswamy Tummala sp = statep->rds_sessionlistp; 159300a3eaf3SRamaswamy Tummala while (sp) { 159400a3eaf3SRamaswamy Tummala if (sp->session_hca_guid != hca_guid) { 159500a3eaf3SRamaswamy Tummala sp = sp->session_nextp; 159600a3eaf3SRamaswamy Tummala continue; 159700a3eaf3SRamaswamy Tummala } 159800a3eaf3SRamaswamy Tummala 159900a3eaf3SRamaswamy Tummala rw_enter(&sp->session_lock, RW_READER); 160000a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp, 160100a3eaf3SRamaswamy Tummala sp->session_state); 160200a3eaf3SRamaswamy Tummala 160300a3eaf3SRamaswamy Tummala while ((sp->session_state == RDS_SESSION_STATE_HCA_CLOSING) || 160400a3eaf3SRamaswamy Tummala (sp->session_state == RDS_SESSION_STATE_ERROR) || 160500a3eaf3SRamaswamy Tummala (sp->session_state == RDS_SESSION_STATE_PASSIVE_CLOSING) || 160600a3eaf3SRamaswamy Tummala (sp->session_state == RDS_SESSION_STATE_CLOSED)) { 160700a3eaf3SRamaswamy Tummala rw_exit(&sp->session_lock); 160800a3eaf3SRamaswamy Tummala delay(drv_usectohz(1000000)); 160900a3eaf3SRamaswamy Tummala rw_enter(&sp->session_lock, RW_READER); 161000a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp, 161100a3eaf3SRamaswamy Tummala sp->session_state); 161200a3eaf3SRamaswamy Tummala } 161300a3eaf3SRamaswamy Tummala 161400a3eaf3SRamaswamy Tummala rw_exit(&sp->session_lock); 161500a3eaf3SRamaswamy Tummala 161600a3eaf3SRamaswamy Tummala sp = sp->session_nextp; 161700a3eaf3SRamaswamy Tummala } 161800a3eaf3SRamaswamy Tummala rw_exit(&statep->rds_sessionlock); 161900a3eaf3SRamaswamy Tummala 162000a3eaf3SRamaswamy Tummala /* 162100a3eaf3SRamaswamy Tummala * if rdsib_close_ib was called before this, then that would have 162200a3eaf3SRamaswamy Tummala * unbound the service on all ports. In that case, the HCA structs 162300a3eaf3SRamaswamy Tummala * will contain stale bindhdls. Hence, we do not call unbind unless 162400a3eaf3SRamaswamy Tummala * the service is still registered. 162500a3eaf3SRamaswamy Tummala */ 162600a3eaf3SRamaswamy Tummala if (statep->rds_srvhdl != NULL) { 162700a3eaf3SRamaswamy Tummala /* unbind RDS service on all ports on this HCA */ 162800a3eaf3SRamaswamy Tummala for (ix = 0; ix < hcap->hca_nports; ix++) { 162900a3eaf3SRamaswamy Tummala if (hcap->hca_bindhdl[ix] == NULL) { 163000a3eaf3SRamaswamy Tummala continue; 163100a3eaf3SRamaswamy Tummala } 163200a3eaf3SRamaswamy Tummala 163300a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_del_hca", 163400a3eaf3SRamaswamy Tummala "Unbinding Service: port: %d, bindhdl: %p", 163500a3eaf3SRamaswamy Tummala ix + 1, hcap->hca_bindhdl[ix]); 163600a3eaf3SRamaswamy Tummala (void) ibt_unbind_service(rdsib_statep->rds_srvhdl, 163700a3eaf3SRamaswamy Tummala hcap->hca_bindhdl[ix]); 163800a3eaf3SRamaswamy Tummala hcap->hca_bindhdl[ix] = NULL; 163900a3eaf3SRamaswamy Tummala } 164000a3eaf3SRamaswamy Tummala } 164100a3eaf3SRamaswamy Tummala 164200a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_del_hca", "HCA(%p) State: %d", hcap, 164300a3eaf3SRamaswamy Tummala hcap->hca_state); 164400a3eaf3SRamaswamy Tummala 164500a3eaf3SRamaswamy Tummala switch (saved_state) { 164600a3eaf3SRamaswamy Tummala case RDS_HCA_STATE_MEM_REGISTERED: 164700a3eaf3SRamaswamy Tummala ASSERT(hcap->hca_mrhdl != NULL); 164800a3eaf3SRamaswamy Tummala ret = ibt_deregister_mr(hcap->hca_hdl, hcap->hca_mrhdl); 164900a3eaf3SRamaswamy Tummala if (ret != IBT_SUCCESS) { 165000a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_del_hca", 165100a3eaf3SRamaswamy Tummala "ibt_deregister_mr failed: %d", ret); 165200a3eaf3SRamaswamy Tummala return; 165300a3eaf3SRamaswamy Tummala } 165400a3eaf3SRamaswamy Tummala hcap->hca_mrhdl = NULL; 165500a3eaf3SRamaswamy Tummala /* FALLTHRU */ 165600a3eaf3SRamaswamy Tummala case RDS_HCA_STATE_OPEN: 165700a3eaf3SRamaswamy Tummala ASSERT(hcap->hca_hdl != NULL); 165800a3eaf3SRamaswamy Tummala ASSERT(hcap->hca_pdhdl != NULL); 165900a3eaf3SRamaswamy Tummala 166000a3eaf3SRamaswamy Tummala 166100a3eaf3SRamaswamy Tummala ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl); 166200a3eaf3SRamaswamy Tummala if (ret != IBT_SUCCESS) { 166300a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_del_hca", 166400a3eaf3SRamaswamy Tummala "ibt_free_pd failed: %d", ret); 166500a3eaf3SRamaswamy Tummala } 166600a3eaf3SRamaswamy Tummala 166700a3eaf3SRamaswamy Tummala (void) ibt_free_portinfo(hcap->hca_pinfop, hcap->hca_pinfo_sz); 166800a3eaf3SRamaswamy Tummala 166900a3eaf3SRamaswamy Tummala ret = ibt_close_hca(hcap->hca_hdl); 167000a3eaf3SRamaswamy Tummala if (ret != IBT_SUCCESS) { 167100a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_del_hca", 167200a3eaf3SRamaswamy Tummala "ibt_close_hca failed: %d", ret); 167300a3eaf3SRamaswamy Tummala } 167400a3eaf3SRamaswamy Tummala 167500a3eaf3SRamaswamy Tummala hcap->hca_hdl = NULL; 167600a3eaf3SRamaswamy Tummala hcap->hca_pdhdl = NULL; 167700a3eaf3SRamaswamy Tummala hcap->hca_lkey = 0; 167800a3eaf3SRamaswamy Tummala hcap->hca_rkey = 0; 167900a3eaf3SRamaswamy Tummala } 168000a3eaf3SRamaswamy Tummala 168100a3eaf3SRamaswamy Tummala /* 168200a3eaf3SRamaswamy Tummala * This should be a write lock as we don't want anyone to get access 168300a3eaf3SRamaswamy Tummala * to the hcap while we are modifing its contents 168400a3eaf3SRamaswamy Tummala */ 168500a3eaf3SRamaswamy Tummala rw_enter(&statep->rds_hca_lock, RW_WRITER); 168600a3eaf3SRamaswamy Tummala hcap->hca_state = RDS_HCA_STATE_REMOVED; 168700a3eaf3SRamaswamy Tummala rw_exit(&statep->rds_hca_lock); 168800a3eaf3SRamaswamy Tummala 168900a3eaf3SRamaswamy Tummala RDS_DPRINTF2("rdsib_del_hca", "Return: GUID: 0x%llx", hca_guid); 169000a3eaf3SRamaswamy Tummala } 169100a3eaf3SRamaswamy Tummala 1692b86efd96Sagiri static void 1693b86efd96Sagiri rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, 1694b86efd96Sagiri ibt_async_event_t *event) 1695b86efd96Sagiri { 169600a3eaf3SRamaswamy Tummala rds_state_t *statep = (rds_state_t *)clntp; 1697b86efd96Sagiri 1698b86efd96Sagiri RDS_DPRINTF2("rds_async_handler", "Async code: %d", code); 1699b86efd96Sagiri 1700b86efd96Sagiri switch (code) { 1701b86efd96Sagiri case IBT_EVENT_PORT_UP: 1702b86efd96Sagiri rds_handle_portup_event(statep, hdl, event); 1703b86efd96Sagiri break; 170400a3eaf3SRamaswamy Tummala case IBT_HCA_ATTACH_EVENT: 170500a3eaf3SRamaswamy Tummala /* 170600a3eaf3SRamaswamy Tummala * NOTE: In some error recovery paths, it is possible to 170700a3eaf3SRamaswamy Tummala * receive IBT_HCA_ATTACH_EVENTs on already known HCAs. 170800a3eaf3SRamaswamy Tummala */ 170900a3eaf3SRamaswamy Tummala (void) rdsib_add_hca(event->ev_hca_guid); 171000a3eaf3SRamaswamy Tummala break; 171100a3eaf3SRamaswamy Tummala case IBT_HCA_DETACH_EVENT: 171200a3eaf3SRamaswamy Tummala (void) rdsib_del_hca(statep, event->ev_hca_guid); 171300a3eaf3SRamaswamy Tummala break; 1714b86efd96Sagiri 1715b86efd96Sagiri default: 1716b86efd96Sagiri RDS_DPRINTF2(LABEL, "Async event: %d not handled", code); 1717b86efd96Sagiri } 1718b86efd96Sagiri 1719b86efd96Sagiri RDS_DPRINTF2("rds_async_handler", "Return: code: %d", code); 1720b86efd96Sagiri } 1721