/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* * hermon_qp.c * Hermon Queue Pair Processing Routines * * Implements all the routines necessary for allocating, freeing, and * querying the Hermon queue pairs. */ #include #include #include #include #include #include #include #include #include static int hermon_qp_create_qpn(hermon_state_t *state, hermon_qphdl_t qp, hermon_rsrc_t *qpc); static int hermon_qpn_avl_compare(const void *q, const void *e); static int hermon_special_qp_rsrc_alloc(hermon_state_t *state, ibt_sqp_type_t type, uint_t port, hermon_rsrc_t **qp_rsrc); static int hermon_special_qp_rsrc_free(hermon_state_t *state, ibt_sqp_type_t type, uint_t port); static void hermon_qp_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl, uint_t real_max_sgl, hermon_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl); /* * hermon_qp_alloc() * Context: Can be called only from user or kernel context. */ int hermon_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo, uint_t sleepflag) { hermon_rsrc_t *qpc, *rsrc; hermon_rsrc_type_t rsrc_type; hermon_umap_db_entry_t *umapdb; hermon_qphdl_t qp; ibt_qp_alloc_attr_t *attr_p; ibt_qp_alloc_flags_t alloc_flags; ibt_qp_type_t type; hermon_qp_wq_type_t swq_type; ibtl_qp_hdl_t ibt_qphdl; ibt_chan_sizes_t *queuesz_p; ib_qpn_t *qpn; hermon_qphdl_t *qphdl; ibt_mr_attr_t mr_attr; hermon_mr_options_t mr_op; hermon_srqhdl_t srq; hermon_pdhdl_t pd; hermon_cqhdl_t sq_cq, rq_cq; hermon_mrhdl_t mr; uint64_t value, qp_desc_off; uint64_t *thewqe, thewqesz; uint32_t *sq_buf, *rq_buf; uint32_t log_qp_sq_size, log_qp_rq_size; uint32_t sq_size, rq_size; uint32_t sq_depth, rq_depth; uint32_t sq_wqe_size, rq_wqe_size, wqesz_shift; uint32_t max_sgl, max_recv_sgl, uarpg; uint_t qp_is_umap; uint_t qp_srq_en, i, j; int status, flag; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p)) /* * Extract the necessary info from the hermon_qp_info_t structure */ attr_p = qpinfo->qpi_attrp; type = qpinfo->qpi_type; ibt_qphdl = qpinfo->qpi_ibt_qphdl; queuesz_p = qpinfo->qpi_queueszp; qpn = qpinfo->qpi_qpn; qphdl = &qpinfo->qpi_qphdl; alloc_flags = attr_p->qp_alloc_flags; /* * Verify correctness of alloc_flags. * * 1. FEXCH and RSS are only allocated via qp_range. */ if (alloc_flags & (IBT_QP_USES_FEXCH | IBT_QP_USES_RSS)) { return (IBT_INVALID_PARAM); } rsrc_type = HERMON_QPC; qp_is_umap = 0; /* 2. Make sure only one of these flags is set. */ switch (alloc_flags & (IBT_QP_USER_MAP | IBT_QP_USES_RFCI | IBT_QP_USES_FCMD)) { case IBT_QP_USER_MAP: qp_is_umap = 1; break; case IBT_QP_USES_RFCI: if (type != IBT_UD_RQP) return (IBT_INVALID_PARAM); switch (attr_p->qp_fc.fc_hca_port) { case 1: rsrc_type = HERMON_QPC_RFCI_PORT1; break; case 2: rsrc_type = HERMON_QPC_RFCI_PORT2; break; default: return (IBT_INVALID_PARAM); } break; case IBT_QP_USES_FCMD: if (type != IBT_UD_RQP) return (IBT_INVALID_PARAM); break; case 0: break; default: return (IBT_INVALID_PARAM); /* conflicting flags set */ } /* * Determine whether QP is being allocated for userland access or * whether it is being allocated for kernel access. If the QP is * being allocated for userland access, then lookup the UAR * page number for the current process. Note: If this is not found * (e.g. if the process has not previously open()'d the Hermon driver), * then an error is returned. */ if (qp_is_umap) { status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(), MLNX_UMAP_UARPG_RSRC, &value, 0, NULL); if (status != DDI_SUCCESS) { return (IBT_INVALID_PARAM); } uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx; } else { uarpg = state->hs_kernel_uar_index; } /* * Determine whether QP is being associated with an SRQ */ qp_srq_en = (alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0; if (qp_srq_en) { /* * Check for valid SRQ handle pointers */ if (attr_p->qp_ibc_srq_hdl == NULL) { status = IBT_SRQ_HDL_INVALID; goto qpalloc_fail; } srq = (hermon_srqhdl_t)attr_p->qp_ibc_srq_hdl; } /* * Check for valid QP service type (only UD/RC/UC supported) */ if (((type != IBT_UD_RQP) && (type != IBT_RC_RQP) && (type != IBT_UC_RQP))) { status = IBT_QP_SRV_TYPE_INVALID; goto qpalloc_fail; } /* * Check for valid PD handle pointer */ if (attr_p->qp_pd_hdl == NULL) { status = IBT_PD_HDL_INVALID; goto qpalloc_fail; } pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl; /* * If on an SRQ, check to make sure the PD is the same */ if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) { status = IBT_PD_HDL_INVALID; goto qpalloc_fail; } /* Increment the reference count on the protection domain (PD) */ hermon_pd_refcnt_inc(pd); /* * Check for valid CQ handle pointers * * FCMD QPs do not require a receive cq handle. */ if (attr_p->qp_ibc_scq_hdl == NULL) { status = IBT_CQ_HDL_INVALID; goto qpalloc_fail1; } sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl; if ((attr_p->qp_ibc_rcq_hdl == NULL)) { if ((alloc_flags & IBT_QP_USES_FCMD) == 0) { status = IBT_CQ_HDL_INVALID; goto qpalloc_fail1; } rq_cq = sq_cq; /* just use the send cq */ } else rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl; /* * Increment the reference count on the CQs. One or both of these * could return error if we determine that the given CQ is already * being used with a special (SMI/GSI) QP. */ status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_NORMAL); if (status != DDI_SUCCESS) { status = IBT_CQ_HDL_INVALID; goto qpalloc_fail1; } status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_NORMAL); if (status != DDI_SUCCESS) { status = IBT_CQ_HDL_INVALID; goto qpalloc_fail2; } /* * Allocate an QP context entry. This will be filled in with all * the necessary parameters to define the Queue Pair. Unlike * other Hermon hardware resources, ownership is not immediately * given to hardware in the final step here. Instead, we must * wait until the QP is later transitioned to the "Init" state before * passing the QP to hardware. If we fail here, we must undo all * the reference count (CQ and PD). */ status = hermon_rsrc_alloc(state, rsrc_type, 1, sleepflag, &qpc); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail3; } /* * Allocate the software structure for tracking the queue pair * (i.e. the Hermon Queue Pair handle). If we fail here, we must * undo the reference counts and the previous resource allocation. */ status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail4; } qp = (hermon_qphdl_t)rsrc->hr_addr; bzero(qp, sizeof (struct hermon_sw_qp_s)); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp)) qp->qp_alloc_flags = alloc_flags; /* * Calculate the QP number from QPC index. This routine handles * all of the operations necessary to keep track of used, unused, * and released QP numbers. */ if (type == IBT_UD_RQP) { qp->qp_qpnum = qpc->hr_indx; qp->qp_ring = qp->qp_qpnum << 8; qp->qp_qpn_hdl = NULL; } else { status = hermon_qp_create_qpn(state, qp, qpc); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail5; } } /* * If this will be a user-mappable QP, then allocate an entry for * the "userland resources database". This will later be added to * the database (after all further QP operations are successful). * If we fail here, we must undo the reference counts and the * previous resource allocation. */ if (qp_is_umap) { umapdb = hermon_umap_db_alloc(state->hs_instance, qp->qp_qpnum, MLNX_UMAP_QPMEM_RSRC, (uint64_t)(uintptr_t)rsrc); if (umapdb == NULL) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail6; } } /* * Allocate the doorbell record. Hermon just needs one for the RQ, * if the QP is not associated with an SRQ, and use uarpg (above) as * the uar index */ if (!qp_srq_en) { status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl, &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail6; } } qp->qp_uses_lso = (attr_p->qp_flags & IBT_USES_LSO); /* * We verify that the requested number of SGL is valid (i.e. * consistent with the device limits and/or software-configured * limits). If not, then obviously the same cleanup needs to be done. */ if (type == IBT_UD_RQP) { max_sgl = state->hs_ibtfinfo.hca_attr->hca_ud_send_sgl_sz; swq_type = HERMON_QP_WQ_TYPE_SENDQ_UD; } else { max_sgl = state->hs_ibtfinfo.hca_attr->hca_conn_send_sgl_sz; swq_type = HERMON_QP_WQ_TYPE_SENDQ_CONN; } max_recv_sgl = state->hs_ibtfinfo.hca_attr->hca_recv_sgl_sz; if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) || (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_recv_sgl))) { status = IBT_HCA_SGL_EXCEEDED; goto qpalloc_fail7; } /* * Determine this QP's WQE stride (for both the Send and Recv WQEs). * This will depend on the requested number of SGLs. Note: this * has the side-effect of also calculating the real number of SGLs * (for the calculated WQE size). * * For QP's on an SRQ, we set these to 0. */ if (qp_srq_en) { qp->qp_rq_log_wqesz = 0; qp->qp_rq_sgl = 0; } else { hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl, max_recv_sgl, HERMON_QP_WQ_TYPE_RECVQ, &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl); } hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl, max_sgl, swq_type, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl); sq_wqe_size = 1 << qp->qp_sq_log_wqesz; /* NOTE: currently policy in driver, later maybe IBTF interface */ qp->qp_no_prefetch = 0; /* * for prefetching, we need to add the number of wqes in * the 2k area plus one to the number requested, but * ONLY for send queue. If no_prefetch == 1 (prefetch off) * it's exactly TWO wqes for the headroom */ if (qp->qp_no_prefetch) qp->qp_sq_headroom = 2 * sq_wqe_size; else qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE; /* * hdrm wqes must be integral since both sq_wqe_size & * HERMON_QP_OH_SIZE are power of 2 */ qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size); /* * Calculate the appropriate size for the work queues. * For send queue, add in the headroom wqes to the calculation. * Note: All Hermon QP work queues must be a power-of-2 in size. Also * they may not be any smaller than HERMON_QP_MIN_SIZE. This step is * to round the requested size up to the next highest power-of-2 */ /* first, adjust to a minimum and tell the caller the change */ attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq, HERMON_QP_MIN_SIZE); attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq, HERMON_QP_MIN_SIZE); /* * now, calculate the alloc size, taking into account * the headroom for the sq */ log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes); /* if the total is a power of two, reduce it */ if (ISP2(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes)) { log_qp_sq_size = log_qp_sq_size - 1; } log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq); if (ISP2(attr_p->qp_sizes.cs_rq)) { log_qp_rq_size = log_qp_rq_size - 1; } /* * Next we verify that the rounded-up size is valid (i.e. consistent * with the device limits and/or software-configured limits). If not, * then obviously we have a lot of cleanup to do before returning. * * NOTE: the first condition deals with the (test) case of cs_sq * being just less than 2^32. In this case, the headroom addition * to the requested cs_sq will pass the test when it should not. * This test no longer lets that case slip through the check. */ if ((attr_p->qp_sizes.cs_sq > (1 << state->hs_cfg_profile->cp_log_max_qp_sz)) || (log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) || (!qp_srq_en && (log_qp_rq_size > state->hs_cfg_profile->cp_log_max_qp_sz))) { status = IBT_HCA_WR_EXCEEDED; goto qpalloc_fail7; } /* * Allocate the memory for QP work queues. Since Hermon work queues * are not allowed to cross a 32-bit (4GB) boundary, the alignment of * the work queue memory is very important. We used to allocate * work queues (the combined receive and send queues) so that they * would be aligned on their combined size. That alignment guaranteed * that they would never cross the 4GB boundary (Hermon work queues * are on the order of MBs at maximum). Now we are able to relax * this alignment constraint by ensuring that the IB address assigned * to the queue memory (as a result of the hermon_mr_register() call) * is offset from zero. * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to * guarantee the alignment, but when attempting to use IOMMU bypass * mode we found that we were not allowed to specify any alignment * that was more restrictive than the system page size. * So we avoided this constraint by passing two alignment values, * one for the memory allocation itself and the other for the DMA * handle (for later bind). This used to cause more memory than * necessary to be allocated (in order to guarantee the more * restrictive alignment contraint). But by guaranteeing the * zero-based IB virtual address for the queue, we are able to * conserve this memory. */ sq_wqe_size = 1 << qp->qp_sq_log_wqesz; sq_depth = 1 << log_qp_sq_size; sq_size = sq_depth * sq_wqe_size; /* QP on SRQ sets these to 0 */ if (qp_srq_en) { rq_wqe_size = 0; rq_size = 0; } else { rq_wqe_size = 1 << qp->qp_rq_log_wqesz; rq_depth = 1 << log_qp_rq_size; rq_size = rq_depth * rq_wqe_size; } qp->qp_wqinfo.qa_size = sq_size + rq_size; qp->qp_wqinfo.qa_alloc_align = PAGESIZE; qp->qp_wqinfo.qa_bind_align = PAGESIZE; if (qp_is_umap) { qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND; } else { qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL; } status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail7; } /* * Sort WQs in memory according to stride (*q_wqe_size), largest first * If they are equal, still put the SQ first */ qp->qp_sq_baseaddr = 0; qp->qp_rq_baseaddr = 0; if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) { sq_buf = qp->qp_wqinfo.qa_buf_aligned; /* if this QP is on an SRQ, set the rq_buf to NULL */ if (qp_srq_en) { rq_buf = NULL; } else { rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size); qp->qp_rq_baseaddr = sq_size; } } else { rq_buf = qp->qp_wqinfo.qa_buf_aligned; sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size); qp->qp_sq_baseaddr = rq_size; } if (qp_is_umap == 0) { qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth); if (qp->qp_sq_wqhdr == NULL) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail8; } if (qp_srq_en) { qp->qp_rq_wqavl.wqa_wq = srq->srq_wq_wqhdr; qp->qp_rq_wqavl.wqa_srq_en = 1; qp->qp_rq_wqavl.wqa_srq = srq; } else { qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(rq_depth); if (qp->qp_rq_wqhdr == NULL) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail8; } qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr; } qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum; qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND; qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr; qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum; qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV; } /* * Register the memory for the QP work queues. The memory for the * QP must be registered in the Hermon cMPT tables. This gives us the * LKey to specify in the QP context later. Note: The memory for * Hermon work queues (both Send and Recv) must be contiguous and * registered as a single memory region. Note: If the QP memory is * user-mappable, force DDI_DMA_CONSISTENT mapping. Also, in order to * meet the alignment restriction, we pass the "mro_bind_override_addr" * flag in the call to hermon_mr_register(). This guarantees that the * resulting IB vaddr will be zero-based (modulo the offset into the * first page). If we fail here, we still have the bunch of resource * and reference count cleanup to do. */ flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP; mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned; mr_attr.mr_len = qp->qp_wqinfo.qa_size; mr_attr.mr_as = NULL; mr_attr.mr_flags = flag; if (qp_is_umap) { mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass; } else { /* HERMON_QUEUE_LOCATION_NORMAL */ mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass; } mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl; mr_op.mro_bind_override_addr = 1; status = hermon_mr_register(state, pd, &mr_attr, &mr, &mr_op, HERMON_QP_CMPT); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail9; } /* * Calculate the offset between the kernel virtual address space * and the IB virtual address space. This will be used when * posting work requests to properly initialize each WQE. */ qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned - (uint64_t)mr->mr_bindinfo.bi_addr; /* * Fill in all the return arguments (if necessary). This includes * real work queue sizes (in wqes), real SGLs, and QP number */ if (queuesz_p != NULL) { queuesz_p->cs_sq = (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes; queuesz_p->cs_sq_sgl = qp->qp_sq_sgl; /* if this QP is on an SRQ, set these to 0 */ if (qp_srq_en) { queuesz_p->cs_rq = 0; queuesz_p->cs_rq_sgl = 0; } else { queuesz_p->cs_rq = (1 << log_qp_rq_size); queuesz_p->cs_rq_sgl = qp->qp_rq_sgl; } } if (qpn != NULL) { *qpn = (ib_qpn_t)qp->qp_qpnum; } /* * Fill in the rest of the Hermon Queue Pair handle. */ qp->qp_qpcrsrcp = qpc; qp->qp_rsrcp = rsrc; qp->qp_state = HERMON_QP_RESET; HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET); qp->qp_pdhdl = pd; qp->qp_mrhdl = mr; qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ? HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED; qp->qp_is_special = 0; qp->qp_uarpg = uarpg; qp->qp_umap_dhp = (devmap_cookie_t)NULL; qp->qp_sq_cqhdl = sq_cq; qp->qp_sq_bufsz = (1 << log_qp_sq_size); qp->qp_sq_logqsz = log_qp_sq_size; qp->qp_sq_buf = sq_buf; qp->qp_desc_off = qp_desc_off; qp->qp_rq_cqhdl = rq_cq; qp->qp_rq_buf = rq_buf; qp->qp_rlky = (attr_p->qp_flags & IBT_FAST_REG_RES_LKEY) != 0; /* if this QP is on an SRQ, set rq_bufsz to 0 */ if (qp_srq_en) { qp->qp_rq_bufsz = 0; qp->qp_rq_logqsz = 0; } else { qp->qp_rq_bufsz = (1 << log_qp_rq_size); qp->qp_rq_logqsz = log_qp_rq_size; } qp->qp_forward_sqd_event = 0; qp->qp_sqd_still_draining = 0; qp->qp_hdlrarg = (void *)ibt_qphdl; qp->qp_mcg_refcnt = 0; /* * If this QP is to be associated with an SRQ, set the SRQ handle */ if (qp_srq_en) { qp->qp_srqhdl = srq; hermon_srq_refcnt_inc(qp->qp_srqhdl); } else { qp->qp_srqhdl = NULL; } /* Determine the QP service type */ qp->qp_type = type; if (type == IBT_RC_RQP) { qp->qp_serv_type = HERMON_QP_RC; } else if (type == IBT_UD_RQP) { if (alloc_flags & IBT_QP_USES_RFCI) qp->qp_serv_type = HERMON_QP_RFCI; else if (alloc_flags & IBT_QP_USES_FCMD) qp->qp_serv_type = HERMON_QP_FCMND; else qp->qp_serv_type = HERMON_QP_UD; } else { qp->qp_serv_type = HERMON_QP_UC; } /* * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed */ /* * Initialize the SQ WQEs - all that needs to be done is every 64 bytes * set the quadword to all F's - high-order bit is owner (init to one) * and the rest for the headroom definition of prefetching * */ wqesz_shift = qp->qp_sq_log_wqesz; thewqesz = 1 << wqesz_shift; thewqe = (uint64_t *)(void *)(qp->qp_sq_buf); if (qp_is_umap == 0) { for (i = 0; i < sq_depth; i++) { /* * for each stride, go through and every 64 bytes * write the init value - having set the address * once, just keep incrementing it */ for (j = 0; j < thewqesz; j += 64, thewqe += 8) { *(uint32_t *)thewqe = 0xFFFFFFFF; } } } /* Zero out the QP context */ bzero(&qp->qpc, sizeof (hermon_hw_qpc_t)); /* * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the * "qphdl" and return success */ hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx, qp); /* * If this is a user-mappable QP, then we need to insert the previously * allocated entry into the "userland resources database". This will * allow for later lookup during devmap() (i.e. mmap()) calls. */ if (qp_is_umap) { hermon_umap_db_add(umapdb); } mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(state->hs_intrmsi_pri)); *qphdl = qp; return (DDI_SUCCESS); /* * The following is cleanup for all possible failure cases in this routine */ qpalloc_fail9: hermon_queue_free(&qp->qp_wqinfo); qpalloc_fail8: if (qp->qp_sq_wqhdr) hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr); if (qp->qp_rq_wqhdr) hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr); qpalloc_fail7: if (qp_is_umap) { hermon_umap_db_free(umapdb); } if (!qp_srq_en) { hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr); } qpalloc_fail6: /* * Releasing the QPN will also free up the QPC context. Update * the QPC context pointer to indicate this. */ if (qp->qp_qpn_hdl) { hermon_qp_release_qpn(state, qp->qp_qpn_hdl, HERMON_QPN_RELEASE); } else { hermon_rsrc_free(state, &qpc); } qpc = NULL; qpalloc_fail5: hermon_rsrc_free(state, &rsrc); qpalloc_fail4: if (qpc) { hermon_rsrc_free(state, &qpc); } qpalloc_fail3: hermon_cq_refcnt_dec(rq_cq); qpalloc_fail2: hermon_cq_refcnt_dec(sq_cq); qpalloc_fail1: hermon_pd_refcnt_dec(pd); qpalloc_fail: return (status); } /* * hermon_special_qp_alloc() * Context: Can be called only from user or kernel context. */ int hermon_special_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo, uint_t sleepflag) { hermon_rsrc_t *qpc, *rsrc; hermon_qphdl_t qp; ibt_qp_alloc_attr_t *attr_p; ibt_sqp_type_t type; uint8_t port; ibtl_qp_hdl_t ibt_qphdl; ibt_chan_sizes_t *queuesz_p; hermon_qphdl_t *qphdl; ibt_mr_attr_t mr_attr; hermon_mr_options_t mr_op; hermon_pdhdl_t pd; hermon_cqhdl_t sq_cq, rq_cq; hermon_mrhdl_t mr; uint64_t qp_desc_off; uint64_t *thewqe, thewqesz; uint32_t *sq_buf, *rq_buf; uint32_t log_qp_sq_size, log_qp_rq_size; uint32_t sq_size, rq_size, max_sgl; uint32_t uarpg; uint32_t sq_depth; uint32_t sq_wqe_size, rq_wqe_size, wqesz_shift; int status, flag, i, j; /* * Extract the necessary info from the hermon_qp_info_t structure */ attr_p = qpinfo->qpi_attrp; type = qpinfo->qpi_type; port = qpinfo->qpi_port; ibt_qphdl = qpinfo->qpi_ibt_qphdl; queuesz_p = qpinfo->qpi_queueszp; qphdl = &qpinfo->qpi_qphdl; /* * Check for valid special QP type (only SMI & GSI supported) */ if ((type != IBT_SMI_SQP) && (type != IBT_GSI_SQP)) { status = IBT_QP_SPECIAL_TYPE_INVALID; goto spec_qpalloc_fail; } /* * Check for valid port number */ if (!hermon_portnum_is_valid(state, port)) { status = IBT_HCA_PORT_INVALID; goto spec_qpalloc_fail; } port = port - 1; /* * Check for valid PD handle pointer */ if (attr_p->qp_pd_hdl == NULL) { status = IBT_PD_HDL_INVALID; goto spec_qpalloc_fail; } pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl; /* Increment the reference count on the PD */ hermon_pd_refcnt_inc(pd); /* * Check for valid CQ handle pointers */ if ((attr_p->qp_ibc_scq_hdl == NULL) || (attr_p->qp_ibc_rcq_hdl == NULL)) { status = IBT_CQ_HDL_INVALID; goto spec_qpalloc_fail1; } sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl; rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl; /* * Increment the reference count on the CQs. One or both of these * could return error if we determine that the given CQ is already * being used with a non-special QP (i.e. a normal QP). */ status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_SPECIAL); if (status != DDI_SUCCESS) { status = IBT_CQ_HDL_INVALID; goto spec_qpalloc_fail1; } status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_SPECIAL); if (status != DDI_SUCCESS) { status = IBT_CQ_HDL_INVALID; goto spec_qpalloc_fail2; } /* * Allocate the special QP resources. Essentially, this allocation * amounts to checking if the request special QP has already been * allocated. If successful, the QP context return is an actual * QP context that has been "aliased" to act as a special QP of the * appropriate type (and for the appropriate port). Just as in * hermon_qp_alloc() above, ownership for this QP context is not * immediately given to hardware in the final step here. Instead, we * wait until the QP is later transitioned to the "Init" state before * passing the QP to hardware. If we fail here, we must undo all * the reference count (CQ and PD). */ status = hermon_special_qp_rsrc_alloc(state, type, port, &qpc); if (status != DDI_SUCCESS) { goto spec_qpalloc_fail3; } /* * Allocate the software structure for tracking the special queue * pair (i.e. the Hermon Queue Pair handle). If we fail here, we * must undo the reference counts and the previous resource allocation. */ status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto spec_qpalloc_fail4; } qp = (hermon_qphdl_t)rsrc->hr_addr; bzero(qp, sizeof (struct hermon_sw_qp_s)); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp)) qp->qp_alloc_flags = attr_p->qp_alloc_flags; /* * Actual QP number is a combination of the index of the QPC and * the port number. This is because the special QP contexts must * be allocated two-at-a-time. */ qp->qp_qpnum = qpc->hr_indx + port; qp->qp_ring = qp->qp_qpnum << 8; uarpg = state->hs_kernel_uar_index; /* must be for spec qp */ /* * Allocate the doorbell record. Hermon uses only one for the RQ so * alloc a qp doorbell, using uarpg (above) as the uar index */ status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl, &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto spec_qpalloc_fail5; } /* * Calculate the appropriate size for the work queues. * Note: All Hermon QP work queues must be a power-of-2 in size. Also * they may not be any smaller than HERMON_QP_MIN_SIZE. This step is * to round the requested size up to the next highest power-of-2 */ attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq, HERMON_QP_MIN_SIZE); attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq, HERMON_QP_MIN_SIZE); log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq); if (ISP2(attr_p->qp_sizes.cs_sq)) { log_qp_sq_size = log_qp_sq_size - 1; } log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq); if (ISP2(attr_p->qp_sizes.cs_rq)) { log_qp_rq_size = log_qp_rq_size - 1; } /* * Next we verify that the rounded-up size is valid (i.e. consistent * with the device limits and/or software-configured limits). If not, * then obviously we have a bit of cleanup to do before returning. */ if ((log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) || (log_qp_rq_size > state->hs_cfg_profile->cp_log_max_qp_sz)) { status = IBT_HCA_WR_EXCEEDED; goto spec_qpalloc_fail5a; } /* * Next we verify that the requested number of SGL is valid (i.e. * consistent with the device limits and/or software-configured * limits). If not, then obviously the same cleanup needs to be done. */ max_sgl = state->hs_cfg_profile->cp_wqe_real_max_sgl; if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) || (attr_p->qp_sizes.cs_rq_sgl > max_sgl)) { status = IBT_HCA_SGL_EXCEEDED; goto spec_qpalloc_fail5a; } /* * Determine this QP's WQE stride (for both the Send and Recv WQEs). * This will depend on the requested number of SGLs. Note: this * has the side-effect of also calculating the real number of SGLs * (for the calculated WQE size). */ hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl, max_sgl, HERMON_QP_WQ_TYPE_RECVQ, &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl); if (type == IBT_SMI_SQP) { hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl, max_sgl, HERMON_QP_WQ_TYPE_SENDMLX_QP0, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl); } else { hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl, max_sgl, HERMON_QP_WQ_TYPE_SENDMLX_QP1, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl); } /* * Allocate the memory for QP work queues. Since Hermon work queues * are not allowed to cross a 32-bit (4GB) boundary, the alignment of * the work queue memory is very important. We used to allocate * work queues (the combined receive and send queues) so that they * would be aligned on their combined size. That alignment guaranteed * that they would never cross the 4GB boundary (Hermon work queues * are on the order of MBs at maximum). Now we are able to relax * this alignment constraint by ensuring that the IB address assigned * to the queue memory (as a result of the hermon_mr_register() call) * is offset from zero. * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to * guarantee the alignment, but when attempting to use IOMMU bypass * mode we found that we were not allowed to specify any alignment * that was more restrictive than the system page size. * So we avoided this constraint by passing two alignment values, * one for the memory allocation itself and the other for the DMA * handle (for later bind). This used to cause more memory than * necessary to be allocated (in order to guarantee the more * restrictive alignment contraint). But by guaranteeing the * zero-based IB virtual address for the queue, we are able to * conserve this memory. */ sq_wqe_size = 1 << qp->qp_sq_log_wqesz; sq_depth = 1 << log_qp_sq_size; sq_size = (1 << log_qp_sq_size) * sq_wqe_size; rq_wqe_size = 1 << qp->qp_rq_log_wqesz; rq_size = (1 << log_qp_rq_size) * rq_wqe_size; qp->qp_wqinfo.qa_size = sq_size + rq_size; qp->qp_wqinfo.qa_alloc_align = PAGESIZE; qp->qp_wqinfo.qa_bind_align = PAGESIZE; qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL; status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag); if (status != 0) { status = IBT_INSUFF_RESOURCE; goto spec_qpalloc_fail5a; } /* * Sort WQs in memory according to depth, stride (*q_wqe_size), * biggest first. If equal, the Send Queue still goes first */ qp->qp_sq_baseaddr = 0; qp->qp_rq_baseaddr = 0; if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) { sq_buf = qp->qp_wqinfo.qa_buf_aligned; rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size); qp->qp_rq_baseaddr = sq_size; } else { rq_buf = qp->qp_wqinfo.qa_buf_aligned; sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size); qp->qp_sq_baseaddr = rq_size; } qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth); if (qp->qp_sq_wqhdr == NULL) { status = IBT_INSUFF_RESOURCE; goto spec_qpalloc_fail6; } qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(1 << log_qp_rq_size); if (qp->qp_rq_wqhdr == NULL) { status = IBT_INSUFF_RESOURCE; goto spec_qpalloc_fail6; } qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum; qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND; qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr; qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum; qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV; qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr; /* * Register the memory for the special QP work queues. The memory for * the special QP must be registered in the Hermon cMPT tables. This * gives us the LKey to specify in the QP context later. Note: The * memory for Hermon work queues (both Send and Recv) must be contiguous * and registered as a single memory region. Also, in order to meet the * alignment restriction, we pass the "mro_bind_override_addr" flag in * the call to hermon_mr_register(). This guarantees that the resulting * IB vaddr will be zero-based (modulo the offset into the first page). * If we fail here, we have a bunch of resource and reference count * cleanup to do. */ flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP; mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned; mr_attr.mr_len = qp->qp_wqinfo.qa_size; mr_attr.mr_as = NULL; mr_attr.mr_flags = flag; mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass; mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl; mr_op.mro_bind_override_addr = 1; status = hermon_mr_register(state, pd, &mr_attr, &mr, &mr_op, HERMON_QP_CMPT); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto spec_qpalloc_fail6; } /* * Calculate the offset between the kernel virtual address space * and the IB virtual address space. This will be used when * posting work requests to properly initialize each WQE. */ qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned - (uint64_t)mr->mr_bindinfo.bi_addr; /* set the prefetch - initially, not prefetching */ qp->qp_no_prefetch = 1; if (qp->qp_no_prefetch) qp->qp_sq_headroom = 2 * sq_wqe_size; else qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE; /* * hdrm wqes must be integral since both sq_wqe_size & * HERMON_QP_OH_SIZE are power of 2 */ qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size); /* * Fill in all the return arguments (if necessary). This includes * real work queue sizes, real SGLs, and QP number (which will be * either zero or one, depending on the special QP type) */ if (queuesz_p != NULL) { queuesz_p->cs_sq = (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes; queuesz_p->cs_sq_sgl = qp->qp_sq_sgl; queuesz_p->cs_rq = (1 << log_qp_rq_size); queuesz_p->cs_rq_sgl = qp->qp_rq_sgl; } /* * Fill in the rest of the Hermon Queue Pair handle. We can update * the following fields for use in further operations on the QP. */ qp->qp_qpcrsrcp = qpc; qp->qp_rsrcp = rsrc; qp->qp_state = HERMON_QP_RESET; HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET); qp->qp_pdhdl = pd; qp->qp_mrhdl = mr; qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ? HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED; qp->qp_is_special = (type == IBT_SMI_SQP) ? HERMON_QP_SMI : HERMON_QP_GSI; qp->qp_uarpg = uarpg; qp->qp_umap_dhp = (devmap_cookie_t)NULL; qp->qp_sq_cqhdl = sq_cq; qp->qp_sq_bufsz = (1 << log_qp_sq_size); qp->qp_sq_buf = sq_buf; qp->qp_sq_logqsz = log_qp_sq_size; qp->qp_desc_off = qp_desc_off; qp->qp_rq_cqhdl = rq_cq; qp->qp_rq_bufsz = (1 << log_qp_rq_size); qp->qp_rq_buf = rq_buf; qp->qp_rq_logqsz = log_qp_rq_size; qp->qp_portnum = port; qp->qp_pkeyindx = 0; qp->qp_forward_sqd_event = 0; qp->qp_sqd_still_draining = 0; qp->qp_hdlrarg = (void *)ibt_qphdl; qp->qp_mcg_refcnt = 0; qp->qp_srqhdl = NULL; /* All special QPs are UD QP service type */ qp->qp_type = IBT_UD_RQP; qp->qp_serv_type = HERMON_QP_UD; /* * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed */ /* * Initialize the SQ WQEs - all that needs to be done is every 64 bytes * set the quadword to all F's - high-order bit is owner (init to one) * and the rest for the headroom definition of prefetching * */ wqesz_shift = qp->qp_sq_log_wqesz; thewqesz = 1 << wqesz_shift; thewqe = (uint64_t *)(void *)(qp->qp_sq_buf); for (i = 0; i < sq_depth; i++) { /* * for each stride, go through and every 64 bytes write the * init value - having set the address once, just keep * incrementing it */ for (j = 0; j < thewqesz; j += 64, thewqe += 8) { *(uint32_t *)thewqe = 0xFFFFFFFF; } } /* Zero out the QP context */ bzero(&qp->qpc, sizeof (hermon_hw_qpc_t)); /* * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the * "qphdl" and return success */ hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + port, qp); mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(state->hs_intrmsi_pri)); *qphdl = qp; return (DDI_SUCCESS); /* * The following is cleanup for all possible failure cases in this routine */ spec_qpalloc_fail6: hermon_queue_free(&qp->qp_wqinfo); if (qp->qp_sq_wqhdr) hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr); if (qp->qp_rq_wqhdr) hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr); spec_qpalloc_fail5a: hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr); spec_qpalloc_fail5: hermon_rsrc_free(state, &rsrc); spec_qpalloc_fail4: if (hermon_special_qp_rsrc_free(state, type, port) != DDI_SUCCESS) { HERMON_WARNING(state, "failed to free special QP rsrc"); } spec_qpalloc_fail3: hermon_cq_refcnt_dec(rq_cq); spec_qpalloc_fail2: hermon_cq_refcnt_dec(sq_cq); spec_qpalloc_fail1: hermon_pd_refcnt_dec(pd); spec_qpalloc_fail: return (status); } /* * hermon_qp_alloc_range() * Context: Can be called only from user or kernel context. */ int hermon_qp_alloc_range(hermon_state_t *state, uint_t log2, hermon_qp_info_t *qpinfo, ibtl_qp_hdl_t *ibt_qphdl, ibc_cq_hdl_t *send_cq, ibc_cq_hdl_t *recv_cq, hermon_qphdl_t *qphdl, uint_t sleepflag) { hermon_rsrc_t *qpc, *rsrc; hermon_rsrc_type_t rsrc_type; hermon_qphdl_t qp; hermon_qp_range_t *qp_range_p; ibt_qp_alloc_attr_t *attr_p; ibt_qp_type_t type; hermon_qp_wq_type_t swq_type; ibt_chan_sizes_t *queuesz_p; ibt_mr_attr_t mr_attr; hermon_mr_options_t mr_op; hermon_srqhdl_t srq; hermon_pdhdl_t pd; hermon_cqhdl_t sq_cq, rq_cq; hermon_mrhdl_t mr; uint64_t qp_desc_off; uint64_t *thewqe, thewqesz; uint32_t *sq_buf, *rq_buf; uint32_t log_qp_sq_size, log_qp_rq_size; uint32_t sq_size, rq_size; uint32_t sq_depth, rq_depth; uint32_t sq_wqe_size, rq_wqe_size, wqesz_shift; uint32_t max_sgl, max_recv_sgl, uarpg; uint_t qp_srq_en, i, j; int ii; /* loop counter for range */ int status, flag; uint_t serv_type; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p)) /* * Extract the necessary info from the hermon_qp_info_t structure */ attr_p = qpinfo->qpi_attrp; type = qpinfo->qpi_type; queuesz_p = qpinfo->qpi_queueszp; if (attr_p->qp_alloc_flags & IBT_QP_USES_RSS) { if (log2 > state->hs_ibtfinfo.hca_attr->hca_rss_max_log2_table) return (IBT_INSUFF_RESOURCE); rsrc_type = HERMON_QPC; serv_type = HERMON_QP_UD; } else if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) { if (log2 > state->hs_ibtfinfo.hca_attr->hca_fexch_max_log2_qp) return (IBT_INSUFF_RESOURCE); switch (attr_p->qp_fc.fc_hca_port) { case 1: rsrc_type = HERMON_QPC_FEXCH_PORT1; break; case 2: rsrc_type = HERMON_QPC_FEXCH_PORT2; break; default: return (IBT_INVALID_PARAM); } serv_type = HERMON_QP_FEXCH; } else return (IBT_INVALID_PARAM); /* * Determine whether QP is being allocated for userland access or * whether it is being allocated for kernel access. If the QP is * being allocated for userland access, fail (too complex for now). */ if (attr_p->qp_alloc_flags & IBT_QP_USER_MAP) { return (IBT_NOT_SUPPORTED); } else { uarpg = state->hs_kernel_uar_index; } /* * Determine whether QP is being associated with an SRQ */ qp_srq_en = (attr_p->qp_alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0; if (qp_srq_en) { /* * Check for valid SRQ handle pointers */ if (attr_p->qp_ibc_srq_hdl == NULL) { return (IBT_SRQ_HDL_INVALID); } srq = (hermon_srqhdl_t)attr_p->qp_ibc_srq_hdl; } /* * Check for valid QP service type (only UD supported) */ if (type != IBT_UD_RQP) { return (IBT_QP_SRV_TYPE_INVALID); } /* * Check for valid PD handle pointer */ if (attr_p->qp_pd_hdl == NULL) { return (IBT_PD_HDL_INVALID); } pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl; /* * If on an SRQ, check to make sure the PD is the same */ if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) { return (IBT_PD_HDL_INVALID); } /* set loop variable here, for freeing resources on error */ ii = 0; /* * Allocate 2^log2 contiguous/aligned QP context entries. This will * be filled in with all the necessary parameters to define the * Queue Pairs. Unlike other Hermon hardware resources, ownership * is not immediately given to hardware in the final step here. * Instead, we must wait until the QP is later transitioned to the * "Init" state before passing the QP to hardware. If we fail here, * we must undo all the reference count (CQ and PD). */ status = hermon_rsrc_alloc(state, rsrc_type, 1 << log2, sleepflag, &qpc); if (status != DDI_SUCCESS) { return (IBT_INSUFF_RESOURCE); } if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) /* * Need to init the MKEYs for the FEXCH QPs. * * For FEXCH QP subranges, we return the QPN base as * "relative" to the full FEXCH QP range for the port. */ *(qpinfo->qpi_qpn) = hermon_fcoib_fexch_relative_qpn(state, attr_p->qp_fc.fc_hca_port, qpc->hr_indx); else *(qpinfo->qpi_qpn) = (ib_qpn_t)qpc->hr_indx; qp_range_p = kmem_alloc(sizeof (*qp_range_p), (sleepflag == HERMON_SLEEP) ? KM_SLEEP : KM_NOSLEEP); if (qp_range_p == NULL) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail0; } mutex_init(&qp_range_p->hqpr_lock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(state->hs_intrmsi_pri)); mutex_enter(&qp_range_p->hqpr_lock); qp_range_p->hqpr_refcnt = 1 << log2; qp_range_p->hqpr_qpcrsrc = qpc; mutex_exit(&qp_range_p->hqpr_lock); for_each_qp: /* Increment the reference count on the protection domain (PD) */ hermon_pd_refcnt_inc(pd); rq_cq = (hermon_cqhdl_t)recv_cq[ii]; sq_cq = (hermon_cqhdl_t)send_cq[ii]; if (sq_cq == NULL) { if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) { /* if no send completions, just use rq_cq */ sq_cq = rq_cq; } else { status = IBT_CQ_HDL_INVALID; goto qpalloc_fail1; } } /* * Increment the reference count on the CQs. One or both of these * could return error if we determine that the given CQ is already * being used with a special (SMI/GSI) QP. */ status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_NORMAL); if (status != DDI_SUCCESS) { status = IBT_CQ_HDL_INVALID; goto qpalloc_fail1; } status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_NORMAL); if (status != DDI_SUCCESS) { status = IBT_CQ_HDL_INVALID; goto qpalloc_fail2; } /* * Allocate the software structure for tracking the queue pair * (i.e. the Hermon Queue Pair handle). If we fail here, we must * undo the reference counts and the previous resource allocation. */ status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail4; } qp = (hermon_qphdl_t)rsrc->hr_addr; bzero(qp, sizeof (struct hermon_sw_qp_s)); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp)) qp->qp_alloc_flags = attr_p->qp_alloc_flags; /* * Calculate the QP number from QPC index. This routine handles * all of the operations necessary to keep track of used, unused, * and released QP numbers. */ qp->qp_qpnum = qpc->hr_indx + ii; qp->qp_ring = qp->qp_qpnum << 8; qp->qp_qpn_hdl = NULL; /* * Allocate the doorbell record. Hermon just needs one for the RQ, * if the QP is not associated with an SRQ, and use uarpg (above) as * the uar index */ if (!qp_srq_en) { status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl, &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail6; } } qp->qp_uses_lso = (attr_p->qp_flags & IBT_USES_LSO); /* * We verify that the requested number of SGL is valid (i.e. * consistent with the device limits and/or software-configured * limits). If not, then obviously the same cleanup needs to be done. */ max_sgl = state->hs_ibtfinfo.hca_attr->hca_ud_send_sgl_sz; swq_type = HERMON_QP_WQ_TYPE_SENDQ_UD; max_recv_sgl = state->hs_ibtfinfo.hca_attr->hca_recv_sgl_sz; if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) || (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_recv_sgl))) { status = IBT_HCA_SGL_EXCEEDED; goto qpalloc_fail7; } /* * Determine this QP's WQE stride (for both the Send and Recv WQEs). * This will depend on the requested number of SGLs. Note: this * has the side-effect of also calculating the real number of SGLs * (for the calculated WQE size). * * For QP's on an SRQ, we set these to 0. */ if (qp_srq_en) { qp->qp_rq_log_wqesz = 0; qp->qp_rq_sgl = 0; } else { hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl, max_recv_sgl, HERMON_QP_WQ_TYPE_RECVQ, &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl); } hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl, max_sgl, swq_type, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl); sq_wqe_size = 1 << qp->qp_sq_log_wqesz; /* NOTE: currently policy in driver, later maybe IBTF interface */ qp->qp_no_prefetch = 0; /* * for prefetching, we need to add the number of wqes in * the 2k area plus one to the number requested, but * ONLY for send queue. If no_prefetch == 1 (prefetch off) * it's exactly TWO wqes for the headroom */ if (qp->qp_no_prefetch) qp->qp_sq_headroom = 2 * sq_wqe_size; else qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE; /* * hdrm wqes must be integral since both sq_wqe_size & * HERMON_QP_OH_SIZE are power of 2 */ qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size); /* * Calculate the appropriate size for the work queues. * For send queue, add in the headroom wqes to the calculation. * Note: All Hermon QP work queues must be a power-of-2 in size. Also * they may not be any smaller than HERMON_QP_MIN_SIZE. This step is * to round the requested size up to the next highest power-of-2 */ /* first, adjust to a minimum and tell the caller the change */ attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq, HERMON_QP_MIN_SIZE); attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq, HERMON_QP_MIN_SIZE); /* * now, calculate the alloc size, taking into account * the headroom for the sq */ log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes); /* if the total is a power of two, reduce it */ if (ISP2(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes)) { log_qp_sq_size = log_qp_sq_size - 1; } log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq); if (ISP2(attr_p->qp_sizes.cs_rq)) { log_qp_rq_size = log_qp_rq_size - 1; } /* * Next we verify that the rounded-up size is valid (i.e. consistent * with the device limits and/or software-configured limits). If not, * then obviously we have a lot of cleanup to do before returning. * * NOTE: the first condition deals with the (test) case of cs_sq * being just less than 2^32. In this case, the headroom addition * to the requested cs_sq will pass the test when it should not. * This test no longer lets that case slip through the check. */ if ((attr_p->qp_sizes.cs_sq > (1 << state->hs_cfg_profile->cp_log_max_qp_sz)) || (log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) || (!qp_srq_en && (log_qp_rq_size > state->hs_cfg_profile->cp_log_max_qp_sz))) { status = IBT_HCA_WR_EXCEEDED; goto qpalloc_fail7; } /* * Allocate the memory for QP work queues. Since Hermon work queues * are not allowed to cross a 32-bit (4GB) boundary, the alignment of * the work queue memory is very important. We used to allocate * work queues (the combined receive and send queues) so that they * would be aligned on their combined size. That alignment guaranteed * that they would never cross the 4GB boundary (Hermon work queues * are on the order of MBs at maximum). Now we are able to relax * this alignment constraint by ensuring that the IB address assigned * to the queue memory (as a result of the hermon_mr_register() call) * is offset from zero. * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to * guarantee the alignment, but when attempting to use IOMMU bypass * mode we found that we were not allowed to specify any alignment * that was more restrictive than the system page size. * So we avoided this constraint by passing two alignment values, * one for the memory allocation itself and the other for the DMA * handle (for later bind). This used to cause more memory than * necessary to be allocated (in order to guarantee the more * restrictive alignment contraint). But by guaranteeing the * zero-based IB virtual address for the queue, we are able to * conserve this memory. */ sq_wqe_size = 1 << qp->qp_sq_log_wqesz; sq_depth = 1 << log_qp_sq_size; sq_size = sq_depth * sq_wqe_size; /* QP on SRQ sets these to 0 */ if (qp_srq_en) { rq_wqe_size = 0; rq_size = 0; } else { rq_wqe_size = 1 << qp->qp_rq_log_wqesz; rq_depth = 1 << log_qp_rq_size; rq_size = rq_depth * rq_wqe_size; } qp->qp_wqinfo.qa_size = sq_size + rq_size; qp->qp_wqinfo.qa_alloc_align = PAGESIZE; qp->qp_wqinfo.qa_bind_align = PAGESIZE; qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL; status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail7; } /* * Sort WQs in memory according to stride (*q_wqe_size), largest first * If they are equal, still put the SQ first */ qp->qp_sq_baseaddr = 0; qp->qp_rq_baseaddr = 0; if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) { sq_buf = qp->qp_wqinfo.qa_buf_aligned; /* if this QP is on an SRQ, set the rq_buf to NULL */ if (qp_srq_en) { rq_buf = NULL; } else { rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size); qp->qp_rq_baseaddr = sq_size; } } else { rq_buf = qp->qp_wqinfo.qa_buf_aligned; sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size); qp->qp_sq_baseaddr = rq_size; } qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth); if (qp->qp_sq_wqhdr == NULL) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail8; } if (qp_srq_en) { qp->qp_rq_wqavl.wqa_wq = srq->srq_wq_wqhdr; qp->qp_rq_wqavl.wqa_srq_en = 1; qp->qp_rq_wqavl.wqa_srq = srq; } else { qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(rq_depth); if (qp->qp_rq_wqhdr == NULL) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail8; } qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr; } qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum; qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND; qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr; qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum; qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV; /* * Register the memory for the QP work queues. The memory for the * QP must be registered in the Hermon cMPT tables. This gives us the * LKey to specify in the QP context later. Note: The memory for * Hermon work queues (both Send and Recv) must be contiguous and * registered as a single memory region. Note: If the QP memory is * user-mappable, force DDI_DMA_CONSISTENT mapping. Also, in order to * meet the alignment restriction, we pass the "mro_bind_override_addr" * flag in the call to hermon_mr_register(). This guarantees that the * resulting IB vaddr will be zero-based (modulo the offset into the * first page). If we fail here, we still have the bunch of resource * and reference count cleanup to do. */ flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP; mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned; mr_attr.mr_len = qp->qp_wqinfo.qa_size; mr_attr.mr_as = NULL; mr_attr.mr_flags = flag; /* HERMON_QUEUE_LOCATION_NORMAL */ mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass; mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl; mr_op.mro_bind_override_addr = 1; status = hermon_mr_register(state, pd, &mr_attr, &mr, &mr_op, HERMON_QP_CMPT); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail9; } /* * Calculate the offset between the kernel virtual address space * and the IB virtual address space. This will be used when * posting work requests to properly initialize each WQE. */ qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned - (uint64_t)mr->mr_bindinfo.bi_addr; /* * Fill in all the return arguments (if necessary). This includes * real work queue sizes (in wqes), real SGLs, and QP number */ if (queuesz_p != NULL) { queuesz_p->cs_sq = (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes; queuesz_p->cs_sq_sgl = qp->qp_sq_sgl; /* if this QP is on an SRQ, set these to 0 */ if (qp_srq_en) { queuesz_p->cs_rq = 0; queuesz_p->cs_rq_sgl = 0; } else { queuesz_p->cs_rq = (1 << log_qp_rq_size); queuesz_p->cs_rq_sgl = qp->qp_rq_sgl; } } /* * Fill in the rest of the Hermon Queue Pair handle. */ qp->qp_qpcrsrcp = NULL; qp->qp_rsrcp = rsrc; qp->qp_state = HERMON_QP_RESET; HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET); qp->qp_pdhdl = pd; qp->qp_mrhdl = mr; qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ? HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED; qp->qp_is_special = 0; qp->qp_uarpg = uarpg; qp->qp_umap_dhp = (devmap_cookie_t)NULL; qp->qp_sq_cqhdl = sq_cq; qp->qp_sq_bufsz = (1 << log_qp_sq_size); qp->qp_sq_logqsz = log_qp_sq_size; qp->qp_sq_buf = sq_buf; qp->qp_desc_off = qp_desc_off; qp->qp_rq_cqhdl = rq_cq; qp->qp_rq_buf = rq_buf; qp->qp_rlky = (attr_p->qp_flags & IBT_FAST_REG_RES_LKEY) != 0; /* if this QP is on an SRQ, set rq_bufsz to 0 */ if (qp_srq_en) { qp->qp_rq_bufsz = 0; qp->qp_rq_logqsz = 0; } else { qp->qp_rq_bufsz = (1 << log_qp_rq_size); qp->qp_rq_logqsz = log_qp_rq_size; } qp->qp_forward_sqd_event = 0; qp->qp_sqd_still_draining = 0; qp->qp_hdlrarg = (void *)ibt_qphdl[ii]; qp->qp_mcg_refcnt = 0; /* * If this QP is to be associated with an SRQ, set the SRQ handle */ if (qp_srq_en) { qp->qp_srqhdl = srq; hermon_srq_refcnt_inc(qp->qp_srqhdl); } else { qp->qp_srqhdl = NULL; } qp->qp_type = IBT_UD_RQP; qp->qp_serv_type = serv_type; /* * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed */ /* * Initialize the SQ WQEs - all that needs to be done is every 64 bytes * set the quadword to all F's - high-order bit is owner (init to one) * and the rest for the headroom definition of prefetching. */ if ((attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) == 0) { wqesz_shift = qp->qp_sq_log_wqesz; thewqesz = 1 << wqesz_shift; thewqe = (uint64_t *)(void *)(qp->qp_sq_buf); for (i = 0; i < sq_depth; i++) { /* * for each stride, go through and every 64 bytes * write the init value - having set the address * once, just keep incrementing it */ for (j = 0; j < thewqesz; j += 64, thewqe += 8) { *(uint32_t *)thewqe = 0xFFFFFFFF; } } } /* Zero out the QP context */ bzero(&qp->qpc, sizeof (hermon_hw_qpc_t)); /* * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the * "qphdl" and return success */ hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + ii, qp); mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(state->hs_intrmsi_pri)); qp->qp_rangep = qp_range_p; qphdl[ii] = qp; if (++ii < (1 << log2)) goto for_each_qp; return (DDI_SUCCESS); /* * The following is cleanup for all possible failure cases in this routine */ qpalloc_fail9: hermon_queue_free(&qp->qp_wqinfo); qpalloc_fail8: if (qp->qp_sq_wqhdr) hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr); if (qp->qp_rq_wqhdr) hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr); qpalloc_fail7: if (!qp_srq_en) { hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr); } qpalloc_fail6: hermon_rsrc_free(state, &rsrc); qpalloc_fail4: hermon_cq_refcnt_dec(rq_cq); qpalloc_fail2: hermon_cq_refcnt_dec(sq_cq); qpalloc_fail1: hermon_pd_refcnt_dec(pd); qpalloc_fail0: if (ii == 0) { if (qp_range_p) kmem_free(qp_range_p, sizeof (*qp_range_p)); hermon_rsrc_free(state, &qpc); } else { /* qp_range_p and qpc rsrc will be freed in hermon_qp_free */ mutex_enter(&qp->qp_rangep->hqpr_lock); qp_range_p->hqpr_refcnt = ii; mutex_exit(&qp->qp_rangep->hqpr_lock); while (--ii >= 0) { ibc_qpn_hdl_t qpn_hdl; int free_status; free_status = hermon_qp_free(state, &qphdl[ii], IBC_FREE_QP_AND_QPN, &qpn_hdl, sleepflag); if (free_status != DDI_SUCCESS) cmn_err(CE_CONT, "!qp_range: status 0x%x: " "error status %x during free", status, free_status); } } return (status); } /* * hermon_qp_free() * This function frees up the QP resources. Depending on the value * of the "free_qp_flags", the QP number may not be released until * a subsequent call to hermon_qp_release_qpn(). * * Context: Can be called only from user or kernel context. */ /* ARGSUSED */ int hermon_qp_free(hermon_state_t *state, hermon_qphdl_t *qphdl, ibc_free_qp_flags_t free_qp_flags, ibc_qpn_hdl_t *qpnh, uint_t sleepflag) { hermon_rsrc_t *qpc, *rsrc; hermon_umap_db_entry_t *umapdb; hermon_qpn_entry_t *entry; hermon_pdhdl_t pd; hermon_mrhdl_t mr; hermon_cqhdl_t sq_cq, rq_cq; hermon_srqhdl_t srq; hermon_qphdl_t qp; uint64_t value; uint_t type, port; uint_t maxprot; uint_t qp_srq_en; int status; /* * Pull all the necessary information from the Hermon Queue Pair * handle. This is necessary here because the resource for the * QP handle is going to be freed up as part of this operation. */ qp = *qphdl; mutex_enter(&qp->qp_lock); qpc = qp->qp_qpcrsrcp; /* NULL if part of a "range" */ rsrc = qp->qp_rsrcp; pd = qp->qp_pdhdl; srq = qp->qp_srqhdl; mr = qp->qp_mrhdl; rq_cq = qp->qp_rq_cqhdl; sq_cq = qp->qp_sq_cqhdl; port = qp->qp_portnum; qp_srq_en = qp->qp_alloc_flags & IBT_QP_USES_SRQ; /* * If the QP is part of an MCG, then we fail the qp_free */ if (qp->qp_mcg_refcnt != 0) { mutex_exit(&qp->qp_lock); status = ibc_get_ci_failure(0); goto qpfree_fail; } /* * If the QP is not already in "Reset" state, then transition to * "Reset". This is necessary because software does not reclaim * ownership of the QP context until the QP is in the "Reset" state. * If the ownership transfer fails for any reason, then it is an * indication that something (either in HW or SW) has gone seriously * wrong. So we print a warning message and return. */ if (qp->qp_state != HERMON_QP_RESET) { if (hermon_qp_to_reset(state, qp) != DDI_SUCCESS) { mutex_exit(&qp->qp_lock); HERMON_WARNING(state, "failed to reset QP context"); status = ibc_get_ci_failure(0); goto qpfree_fail; } qp->qp_state = HERMON_QP_RESET; HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET); /* * Do any additional handling necessary for the transition * to the "Reset" state (e.g. update the WRID lists) */ if (hermon_wrid_to_reset_handling(state, qp) != DDI_SUCCESS) { mutex_exit(&qp->qp_lock); HERMON_WARNING(state, "failed to reset QP WRID list"); status = ibc_get_ci_failure(0); goto qpfree_fail; } } /* * If this was a user-mappable QP, then we need to remove its entry * from the "userland resources database". If it is also currently * mmap()'d out to a user process, then we need to call * devmap_devmem_remap() to remap the QP memory to an invalid mapping. * We also need to invalidate the QP tracking information for the * user mapping. */ if (qp->qp_alloc_flags & IBT_QP_USER_MAP) { status = hermon_umap_db_find(state->hs_instance, qp->qp_qpnum, MLNX_UMAP_QPMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE, &umapdb); if (status != DDI_SUCCESS) { mutex_exit(&qp->qp_lock); HERMON_WARNING(state, "failed to find in database"); return (ibc_get_ci_failure(0)); } hermon_umap_db_free(umapdb); if (qp->qp_umap_dhp != NULL) { maxprot = (PROT_READ | PROT_WRITE | PROT_USER); status = devmap_devmem_remap(qp->qp_umap_dhp, state->hs_dip, 0, 0, qp->qp_wqinfo.qa_size, maxprot, DEVMAP_MAPPING_INVALID, NULL); if (status != DDI_SUCCESS) { mutex_exit(&qp->qp_lock); HERMON_WARNING(state, "failed in QP memory " "devmap_devmem_remap()"); return (ibc_get_ci_failure(0)); } qp->qp_umap_dhp = (devmap_cookie_t)NULL; } } /* * Put NULL into the Hermon QPNum-to-QPHdl list. This will allow any * in-progress events to detect that the QP corresponding to this * number has been freed. Note: it does depend in whether we are * freeing a special QP or not. */ if (qpc == NULL) { hermon_icm_set_num_to_hdl(state, HERMON_QPC, qp->qp_qpnum, NULL); } else if (qp->qp_is_special) { hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + port, NULL); } else { hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx, NULL); } /* * Drop the QP lock * At this point the lock is no longer necessary. We cannot * protect from multiple simultaneous calls to free the same QP. * In addition, since the QP lock is contained in the QP "software * handle" resource, which we will free (see below), it is * important that we have no further references to that memory. */ mutex_exit(&qp->qp_lock); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp)) /* * Free the QP resources * Start by deregistering and freeing the memory for work queues. * Next free any previously allocated context information * (depending on QP type) * Finally, decrement the necessary reference counts. * If this fails for any reason, then it is an indication that * something (either in HW or SW) has gone seriously wrong. So we * print a warning message and return. */ status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL, sleepflag); if (status != DDI_SUCCESS) { HERMON_WARNING(state, "failed to deregister QP memory"); status = ibc_get_ci_failure(0); goto qpfree_fail; } /* Free the memory for the QP */ hermon_queue_free(&qp->qp_wqinfo); if (qp->qp_sq_wqhdr) hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr); if (qp->qp_rq_wqhdr) hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr); /* Free the dbr */ if (!qp_srq_en) { hermon_dbr_free(state, qp->qp_uarpg, qp->qp_rq_vdbr); } /* * Free up the remainder of the QP resources. Note: we have a few * different resources to free up depending on whether the QP is a * special QP or not. As described above, if any of these fail for * any reason it is an indication that something (either in HW or SW) * has gone seriously wrong. So we print a warning message and * return. */ if (qp->qp_is_special) { type = (qp->qp_is_special == HERMON_QP_SMI) ? IBT_SMI_SQP : IBT_GSI_SQP; /* Free up resources for the special QP */ status = hermon_special_qp_rsrc_free(state, type, port); if (status != DDI_SUCCESS) { HERMON_WARNING(state, "failed to free special QP rsrc"); status = ibc_get_ci_failure(0); goto qpfree_fail; } } else if (qp->qp_rangep) { int refcnt; mutex_enter(&qp->qp_rangep->hqpr_lock); refcnt = --qp->qp_rangep->hqpr_refcnt; mutex_exit(&qp->qp_rangep->hqpr_lock); if (refcnt == 0) { mutex_destroy(&qp->qp_rangep->hqpr_lock); hermon_rsrc_free(state, &qp->qp_rangep->hqpr_qpcrsrc); kmem_free(qp->qp_rangep, sizeof (*qp->qp_rangep)); } qp->qp_rangep = NULL; } else if (qp->qp_qpn_hdl == NULL) { hermon_rsrc_free(state, &qpc); } else { /* * Check the flags and determine whether to release the * QPN or not, based on their value. */ if (free_qp_flags == IBC_FREE_QP_ONLY) { entry = qp->qp_qpn_hdl; hermon_qp_release_qpn(state, qp->qp_qpn_hdl, HERMON_QPN_FREE_ONLY); *qpnh = (ibc_qpn_hdl_t)entry; } else { hermon_qp_release_qpn(state, qp->qp_qpn_hdl, HERMON_QPN_RELEASE); } } mutex_destroy(&qp->qp_sq_lock); /* Free the Hermon Queue Pair handle */ hermon_rsrc_free(state, &rsrc); /* Decrement the reference counts on CQs, PD and SRQ (if needed) */ hermon_cq_refcnt_dec(rq_cq); hermon_cq_refcnt_dec(sq_cq); hermon_pd_refcnt_dec(pd); if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { hermon_srq_refcnt_dec(srq); } /* Set the qphdl pointer to NULL and return success */ *qphdl = NULL; return (DDI_SUCCESS); qpfree_fail: return (status); } /* * hermon_qp_query() * Context: Can be called from interrupt or base context. */ int hermon_qp_query(hermon_state_t *state, hermon_qphdl_t qp, ibt_qp_query_attr_t *attr_p) { ibt_cep_state_t qp_state; ibt_qp_ud_attr_t *ud; ibt_qp_rc_attr_t *rc; ibt_qp_uc_attr_t *uc; ibt_cep_flags_t enable_flags; hermon_hw_addr_path_t *qpc_path, *qpc_alt_path; ibt_cep_path_t *path_ptr, *alt_path_ptr; hermon_hw_qpc_t *qpc; int status; uint_t tmp_sched_q, tmp_alt_sched_q; mutex_enter(&qp->qp_lock); /* * Grab the temporary QPC entry from QP software state */ qpc = &qp->qpc; /* Convert the current Hermon QP state to IBTF QP state */ switch (qp->qp_state) { case HERMON_QP_RESET: qp_state = IBT_STATE_RESET; /* "Reset" */ break; case HERMON_QP_INIT: qp_state = IBT_STATE_INIT; /* Initialized */ break; case HERMON_QP_RTR: qp_state = IBT_STATE_RTR; /* Ready to Receive */ break; case HERMON_QP_RTS: qp_state = IBT_STATE_RTS; /* Ready to Send */ break; case HERMON_QP_SQERR: qp_state = IBT_STATE_SQE; /* Send Queue Error */ break; case HERMON_QP_SQD: if (qp->qp_sqd_still_draining) { qp_state = IBT_STATE_SQDRAIN; /* SQ Draining */ } else { qp_state = IBT_STATE_SQD; /* SQ Drained */ } break; case HERMON_QP_ERR: qp_state = IBT_STATE_ERROR; /* Error */ break; default: mutex_exit(&qp->qp_lock); return (ibc_get_ci_failure(0)); } attr_p->qp_info.qp_state = qp_state; /* SRQ Hook. */ attr_p->qp_srq = NULL; /* * The following QP information is always returned, regardless of * the current QP state. Note: Some special handling is necessary * for calculating the QP number on special QP (QP0 and QP1). */ attr_p->qp_sq_cq = (qp->qp_sq_cqhdl == NULL) ? NULL : qp->qp_sq_cqhdl->cq_hdlrarg; attr_p->qp_rq_cq = (qp->qp_rq_cqhdl == NULL) ? NULL : qp->qp_rq_cqhdl->cq_hdlrarg; if (qp->qp_is_special) { attr_p->qp_qpn = (qp->qp_is_special == HERMON_QP_SMI) ? 0 : 1; } else { attr_p->qp_qpn = (ib_qpn_t)qp->qp_qpnum; } attr_p->qp_sq_sgl = qp->qp_sq_sgl; attr_p->qp_rq_sgl = qp->qp_rq_sgl; attr_p->qp_info.qp_sq_sz = qp->qp_sq_bufsz - qp->qp_sq_hdrmwqes; attr_p->qp_info.qp_rq_sz = qp->qp_rq_bufsz; /* * If QP is currently in the "Reset" state, then only the above are * returned */ if (qp_state == IBT_STATE_RESET) { mutex_exit(&qp->qp_lock); return (DDI_SUCCESS); } /* * Post QUERY_QP command to firmware * * We do a HERMON_NOSLEEP here because we are holding the "qp_lock". * Since we may be in the interrupt context (or subsequently raised * to interrupt level by priority inversion), we do not want to block * in this routine waiting for success. */ tmp_sched_q = qpc->pri_addr_path.sched_q; tmp_alt_sched_q = qpc->alt_addr_path.sched_q; status = hermon_cmn_query_cmd_post(state, QUERY_QP, 0, qp->qp_qpnum, qpc, sizeof (hermon_hw_qpc_t), HERMON_CMD_NOSLEEP_SPIN); if (status != HERMON_CMD_SUCCESS) { mutex_exit(&qp->qp_lock); cmn_err(CE_WARN, "hermon%d: hermon_qp_query: QUERY_QP " "command failed: %08x\n", state->hs_instance, status); if (status == HERMON_CMD_INVALID_STATUS) { hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); } return (ibc_get_ci_failure(0)); } qpc->pri_addr_path.sched_q = tmp_sched_q; qpc->alt_addr_path.sched_q = tmp_alt_sched_q; /* * Fill in the additional QP info based on the QP's transport type. */ if (qp->qp_type == IBT_UD_RQP) { /* Fill in the UD-specific info */ ud = &attr_p->qp_info.qp_transport.ud; ud->ud_qkey = (ib_qkey_t)qpc->qkey; ud->ud_sq_psn = qpc->next_snd_psn; ud->ud_pkey_ix = qpc->pri_addr_path.pkey_indx; /* port+1 for port 1/2 */ ud->ud_port = (uint8_t)(((qpc->pri_addr_path.sched_q >> 6) & 0x01) + 1); attr_p->qp_info.qp_trans = IBT_UD_SRV; if (qp->qp_serv_type == HERMON_QP_FEXCH) { ibt_pmr_desc_t *pmr; uint64_t heart_beat; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pmr)) pmr = &attr_p->qp_query_fexch.fq_uni_mem_desc; pmr->pmd_iova = 0; pmr->pmd_lkey = pmr->pmd_rkey = hermon_fcoib_qpn_to_mkey(state, qp->qp_qpnum); pmr->pmd_phys_buf_list_sz = state->hs_fcoib.hfc_mtts_per_mpt; pmr->pmd_sync_required = 0; pmr = &attr_p->qp_query_fexch.fq_bi_mem_desc; pmr->pmd_iova = 0; pmr->pmd_lkey = 0; pmr->pmd_rkey = 0; pmr->pmd_phys_buf_list_sz = 0; pmr->pmd_sync_required = 0; attr_p->qp_query_fexch.fq_flags = ((hermon_get_heart_beat_rq_cmd_post(state, qp->qp_qpnum, &heart_beat) == HERMON_CMD_SUCCESS) && (heart_beat == 0)) ? IBT_FEXCH_HEART_BEAT_OK : IBT_FEXCH_NO_FLAGS; ud->ud_fc = qp->qp_fc_attr; } else if (qp->qp_serv_type == HERMON_QP_FCMND || qp->qp_serv_type == HERMON_QP_RFCI) { ud->ud_fc = qp->qp_fc_attr; } } else if (qp->qp_serv_type == HERMON_QP_RC) { /* Fill in the RC-specific info */ rc = &attr_p->qp_info.qp_transport.rc; rc->rc_sq_psn = qpc->next_snd_psn; rc->rc_rq_psn = qpc->next_rcv_psn; rc->rc_dst_qpn = qpc->rem_qpn; /* Grab the path migration state information */ if (qpc->pm_state == HERMON_QP_PMSTATE_MIGRATED) { rc->rc_mig_state = IBT_STATE_MIGRATED; } else if (qpc->pm_state == HERMON_QP_PMSTATE_REARM) { rc->rc_mig_state = IBT_STATE_REARMED; } else { rc->rc_mig_state = IBT_STATE_ARMED; } rc->rc_rdma_ra_out = (1 << qpc->sra_max); rc->rc_rdma_ra_in = (1 << qpc->rra_max); rc->rc_min_rnr_nak = qpc->min_rnr_nak; rc->rc_path_mtu = qpc->mtu; rc->rc_retry_cnt = qpc->retry_cnt; /* Get the common primary address path fields */ qpc_path = &qpc->pri_addr_path; path_ptr = &rc->rc_path; hermon_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP); /* Fill in the additional primary address path fields */ path_ptr->cep_pkey_ix = qpc_path->pkey_indx; path_ptr->cep_hca_port_num = path_ptr->cep_adds_vect.av_port_num = (uint8_t)(((qpc_path->sched_q >> 6) & 0x01) + 1); path_ptr->cep_timeout = qpc_path->ack_timeout; /* Get the common alternate address path fields */ qpc_alt_path = &qpc->alt_addr_path; alt_path_ptr = &rc->rc_alt_path; hermon_get_addr_path(state, qpc_alt_path, &alt_path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP); /* Fill in the additional alternate address path fields */ alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx; alt_path_ptr->cep_hca_port_num = alt_path_ptr->cep_adds_vect.av_port_num = (uint8_t)(((qpc_alt_path->sched_q >> 6) & 0x01) + 1); alt_path_ptr->cep_timeout = qpc_alt_path->ack_timeout; /* Get the RNR retry time from primary path */ rc->rc_rnr_retry_cnt = qpc->rnr_retry; /* Set the enable flags based on RDMA/Atomic enable bits */ enable_flags = IBT_CEP_NO_FLAGS; enable_flags |= ((qpc->rre == 0) ? 0 : IBT_CEP_RDMA_RD); enable_flags |= ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR); enable_flags |= ((qpc->rae == 0) ? 0 : IBT_CEP_ATOMIC); attr_p->qp_info.qp_flags = enable_flags; attr_p->qp_info.qp_trans = IBT_RC_SRV; } else if (qp->qp_serv_type == HERMON_QP_UC) { /* Fill in the UC-specific info */ uc = &attr_p->qp_info.qp_transport.uc; uc->uc_sq_psn = qpc->next_snd_psn; uc->uc_rq_psn = qpc->next_rcv_psn; uc->uc_dst_qpn = qpc->rem_qpn; /* Grab the path migration state information */ if (qpc->pm_state == HERMON_QP_PMSTATE_MIGRATED) { uc->uc_mig_state = IBT_STATE_MIGRATED; } else if (qpc->pm_state == HERMON_QP_PMSTATE_REARM) { uc->uc_mig_state = IBT_STATE_REARMED; } else { uc->uc_mig_state = IBT_STATE_ARMED; } uc->uc_path_mtu = qpc->mtu; /* Get the common primary address path fields */ qpc_path = &qpc->pri_addr_path; path_ptr = &uc->uc_path; hermon_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP); /* Fill in the additional primary address path fields */ path_ptr->cep_pkey_ix = qpc_path->pkey_indx; path_ptr->cep_hca_port_num = path_ptr->cep_adds_vect.av_port_num = (uint8_t)(((qpc_path->sched_q >> 6) & 0x01) + 1); /* Get the common alternate address path fields */ qpc_alt_path = &qpc->alt_addr_path; alt_path_ptr = &uc->uc_alt_path; hermon_get_addr_path(state, qpc_alt_path, &alt_path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP); /* Fill in the additional alternate address path fields */ alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx; alt_path_ptr->cep_hca_port_num = alt_path_ptr->cep_adds_vect.av_port_num = (uint8_t)(((qpc_alt_path->sched_q >> 6) & 0x01) + 1); /* * Set the enable flags based on RDMA enable bits (by * definition UC doesn't support Atomic or RDMA Read) */ enable_flags = ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR); attr_p->qp_info.qp_flags = enable_flags; attr_p->qp_info.qp_trans = IBT_UC_SRV; } else { HERMON_WARNING(state, "unexpected QP transport type"); mutex_exit(&qp->qp_lock); return (ibc_get_ci_failure(0)); } /* * Under certain circumstances it is possible for the Hermon hardware * to transition to one of the error states without software directly * knowing about it. The QueryQP() call is the one place where we * have an opportunity to sample and update our view of the QP state. */ if (qpc->state == HERMON_QP_SQERR) { attr_p->qp_info.qp_state = IBT_STATE_SQE; qp->qp_state = HERMON_QP_SQERR; HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_SQERR); } if (qpc->state == HERMON_QP_ERR) { attr_p->qp_info.qp_state = IBT_STATE_ERROR; qp->qp_state = HERMON_QP_ERR; HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_ERR); } mutex_exit(&qp->qp_lock); return (DDI_SUCCESS); } /* * hermon_qp_create_qpn() * Context: Can be called from interrupt or base context. */ static int hermon_qp_create_qpn(hermon_state_t *state, hermon_qphdl_t qp, hermon_rsrc_t *qpc) { hermon_qpn_entry_t query; hermon_qpn_entry_t *entry; avl_index_t where; /* * Build a query (for the AVL tree lookup) and attempt to find * a previously added entry that has a matching QPC index. If * no matching entry is found, then allocate, initialize, and * add an entry to the AVL tree. * If a matching entry is found, then increment its QPN counter * and reference counter. */ query.qpn_indx = qpc->hr_indx; mutex_enter(&state->hs_qpn_avl_lock); entry = (hermon_qpn_entry_t *)avl_find(&state->hs_qpn_avl, &query, &where); if (entry == NULL) { /* * Allocate and initialize a QPN entry, then insert * it into the AVL tree. */ entry = (hermon_qpn_entry_t *)kmem_zalloc( sizeof (hermon_qpn_entry_t), KM_NOSLEEP); if (entry == NULL) { mutex_exit(&state->hs_qpn_avl_lock); return (DDI_FAILURE); } _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry)) entry->qpn_indx = qpc->hr_indx; entry->qpn_refcnt = 0; entry->qpn_counter = 0; avl_insert(&state->hs_qpn_avl, entry, where); } /* * Make the AVL tree entry point to the QP context resource that * it will be responsible for tracking */ entry->qpn_qpc = qpc; /* * Setup the QP handle to point to the AVL tree entry. Then * generate the new QP number from the entry's QPN counter value * and the hardware's QP context table index. */ qp->qp_qpn_hdl = entry; qp->qp_qpnum = ((entry->qpn_counter << state->hs_cfg_profile->cp_log_num_qp) | qpc->hr_indx) & HERMON_QP_MAXNUMBER_MSK; qp->qp_ring = qp->qp_qpnum << 8; /* * Increment the reference counter and QPN counter. The QPN * counter always indicates the next available number for use. */ entry->qpn_counter++; entry->qpn_refcnt++; mutex_exit(&state->hs_qpn_avl_lock); return (DDI_SUCCESS); } /* * hermon_qp_release_qpn() * Context: Can be called only from user or kernel context. */ void hermon_qp_release_qpn(hermon_state_t *state, hermon_qpn_entry_t *entry, int flags) { ASSERT(entry != NULL); mutex_enter(&state->hs_qpn_avl_lock); /* * If we are releasing the QP number here, then we decrement the * reference count and check for zero references. If there are * zero references, then we free the QPC context (if it hadn't * already been freed during a HERMON_QPN_FREE_ONLY free, i.e. for * reuse with another similar QP number) and remove the tracking * structure from the QP number AVL tree and free the structure. * If we are not releasing the QP number here, then, as long as we * have not exhausted the usefulness of the QPC context (that is, * re-used it too many times without the reference count having * gone to zero), we free up the QPC context for use by another * thread (which will use it to construct a different QP number * from the same QPC table index). */ if (flags == HERMON_QPN_RELEASE) { entry->qpn_refcnt--; /* * If the reference count is zero, then we free the QPC * context (if it hadn't already been freed in an early * step, e.g. HERMON_QPN_FREE_ONLY) and remove/free the * tracking structure from the QP number AVL tree. */ if (entry->qpn_refcnt == 0) { if (entry->qpn_qpc != NULL) { hermon_rsrc_free(state, &entry->qpn_qpc); } /* * If the current entry has served it's useful * purpose (i.e. been reused the maximum allowable * number of times), then remove it from QP number * AVL tree and free it up. */ if (entry->qpn_counter >= (1 << (24 - state->hs_cfg_profile->cp_log_num_qp))) { avl_remove(&state->hs_qpn_avl, entry); kmem_free(entry, sizeof (hermon_qpn_entry_t)); } } } else if (flags == HERMON_QPN_FREE_ONLY) { /* * Even if we are not freeing the QP number, that will not * always prevent us from releasing the QPC context. In fact, * since the QPC context only forms part of the whole QPN, * we want to free it up for use by other consumers. But * if the reference count is non-zero (which it will always * be when we are doing HERMON_QPN_FREE_ONLY) and the counter * has reached its maximum value, then we cannot reuse the * QPC context until the reference count eventually reaches * zero (in HERMON_QPN_RELEASE, above). */ if (entry->qpn_counter < (1 << (24 - state->hs_cfg_profile->cp_log_num_qp))) { hermon_rsrc_free(state, &entry->qpn_qpc); } } mutex_exit(&state->hs_qpn_avl_lock); } /* * hermon_qpn_avl_compare() * Context: Can be called from user or kernel context. */ static int hermon_qpn_avl_compare(const void *q, const void *e) { hermon_qpn_entry_t *entry, *query; entry = (hermon_qpn_entry_t *)e; query = (hermon_qpn_entry_t *)q; if (query->qpn_indx < entry->qpn_indx) { return (-1); } else if (query->qpn_indx > entry->qpn_indx) { return (+1); } else { return (0); } } /* * hermon_qpn_avl_init() * Context: Only called from attach() path context */ void hermon_qpn_avl_init(hermon_state_t *state) { /* Initialize the lock used for QP number (QPN) AVL tree access */ mutex_init(&state->hs_qpn_avl_lock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(state->hs_intrmsi_pri)); /* Initialize the AVL tree for the QP number (QPN) storage */ avl_create(&state->hs_qpn_avl, hermon_qpn_avl_compare, sizeof (hermon_qpn_entry_t), offsetof(hermon_qpn_entry_t, qpn_avlnode)); } /* * hermon_qpn_avl_fini() * Context: Only called from attach() and/or detach() path contexts */ void hermon_qpn_avl_fini(hermon_state_t *state) { hermon_qpn_entry_t *entry; void *cookie; /* * Empty all entries (if necessary) and destroy the AVL tree * that was used for QP number (QPN) tracking. */ cookie = NULL; while ((entry = (hermon_qpn_entry_t *)avl_destroy_nodes( &state->hs_qpn_avl, &cookie)) != NULL) { kmem_free(entry, sizeof (hermon_qpn_entry_t)); } avl_destroy(&state->hs_qpn_avl); /* Destroy the lock used for QP number (QPN) AVL tree access */ mutex_destroy(&state->hs_qpn_avl_lock); } /* * hermon_qphdl_from_qpnum() * Context: Can be called from interrupt or base context. * * This routine is important because changing the unconstrained * portion of the QP number is critical to the detection of a * potential race condition in the QP event handler code (i.e. the case * where a QP is freed and alloc'd again before an event for the * "old" QP can be handled). * * While this is not a perfect solution (not sure that one exists) * it does help to mitigate the chance that this race condition will * cause us to deliver a "stale" event to the new QP owner. Note: * this solution does not scale well because the number of constrained * bits increases (and, hence, the number of unconstrained bits * decreases) as the number of supported QPs grows. For small and * intermediate values, it should hopefully provide sufficient * protection. */ hermon_qphdl_t hermon_qphdl_from_qpnum(hermon_state_t *state, uint_t qpnum) { uint_t qpindx, qpmask; /* Calculate the QP table index from the qpnum */ qpmask = (1 << state->hs_cfg_profile->cp_log_num_qp) - 1; qpindx = qpnum & qpmask; return (hermon_icm_num_to_hdl(state, HERMON_QPC, qpindx)); } /* * hermon_special_qp_rsrc_alloc * Context: Can be called from interrupt or base context. */ static int hermon_special_qp_rsrc_alloc(hermon_state_t *state, ibt_sqp_type_t type, uint_t port, hermon_rsrc_t **qp_rsrc) { uint_t mask, flags; int status; mutex_enter(&state->hs_spec_qplock); flags = state->hs_spec_qpflags; if (type == IBT_SMI_SQP) { /* * Check here to see if the driver has been configured * to instruct the Hermon firmware to handle all incoming * SMP messages (i.e. messages sent to SMA). If so, * then we will treat QP0 as if it has already been * allocated (for internal use). Otherwise, if we allow * the allocation to happen, it will cause unexpected * behaviors (e.g. Hermon SMA becomes unresponsive). */ if (state->hs_cfg_profile->cp_qp0_agents_in_fw != 0) { mutex_exit(&state->hs_spec_qplock); return (IBT_QP_IN_USE); } /* * If this is the first QP0 allocation, then post * a CONF_SPECIAL_QP firmware command */ if ((flags & HERMON_SPECIAL_QP0_RSRC_MASK) == 0) { status = hermon_conf_special_qp_cmd_post(state, state->hs_spec_qp0->hr_indx, HERMON_CMD_QP_SMI, HERMON_CMD_NOSLEEP_SPIN, HERMON_CMD_SPEC_QP_OPMOD( state->hs_cfg_profile->cp_qp0_agents_in_fw, state->hs_cfg_profile->cp_qp1_agents_in_fw)); if (status != HERMON_CMD_SUCCESS) { mutex_exit(&state->hs_spec_qplock); cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP " "command failed: %08x\n", state->hs_instance, status); return (IBT_INSUFF_RESOURCE); } } /* * Now check (and, if necessary, modify) the flags to indicate * whether the allocation was successful */ mask = (1 << (HERMON_SPECIAL_QP0_RSRC + port)); if (flags & mask) { mutex_exit(&state->hs_spec_qplock); return (IBT_QP_IN_USE); } state->hs_spec_qpflags |= mask; *qp_rsrc = state->hs_spec_qp0; } else { /* * If this is the first QP1 allocation, then post * a CONF_SPECIAL_QP firmware command */ if ((flags & HERMON_SPECIAL_QP1_RSRC_MASK) == 0) { status = hermon_conf_special_qp_cmd_post(state, state->hs_spec_qp1->hr_indx, HERMON_CMD_QP_GSI, HERMON_CMD_NOSLEEP_SPIN, HERMON_CMD_SPEC_QP_OPMOD( state->hs_cfg_profile->cp_qp0_agents_in_fw, state->hs_cfg_profile->cp_qp1_agents_in_fw)); if (status != HERMON_CMD_SUCCESS) { mutex_exit(&state->hs_spec_qplock); cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP " "command failed: %08x\n", state->hs_instance, status); return (IBT_INSUFF_RESOURCE); } } /* * Now check (and, if necessary, modify) the flags to indicate * whether the allocation was successful */ mask = (1 << (HERMON_SPECIAL_QP1_RSRC + port)); if (flags & mask) { mutex_exit(&state->hs_spec_qplock); return (IBT_QP_IN_USE); } state->hs_spec_qpflags |= mask; *qp_rsrc = state->hs_spec_qp1; } mutex_exit(&state->hs_spec_qplock); return (DDI_SUCCESS); } /* * hermon_special_qp_rsrc_free * Context: Can be called from interrupt or base context. */ static int hermon_special_qp_rsrc_free(hermon_state_t *state, ibt_sqp_type_t type, uint_t port) { uint_t mask, flags; int status; mutex_enter(&state->hs_spec_qplock); if (type == IBT_SMI_SQP) { mask = (1 << (HERMON_SPECIAL_QP0_RSRC + port)); state->hs_spec_qpflags &= ~mask; flags = state->hs_spec_qpflags; /* * If this is the last QP0 free, then post a CONF_SPECIAL_QP * NOW, If this is the last Special QP free, then post a * CONF_SPECIAL_QP firmware command - it'll stop them all */ if (flags) { status = hermon_conf_special_qp_cmd_post(state, 0, HERMON_CMD_QP_SMI, HERMON_CMD_NOSLEEP_SPIN, 0); if (status != HERMON_CMD_SUCCESS) { mutex_exit(&state->hs_spec_qplock); cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP " "command failed: %08x\n", state->hs_instance, status); if (status == HERMON_CMD_INVALID_STATUS) { hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); } return (ibc_get_ci_failure(0)); } } } else { mask = (1 << (HERMON_SPECIAL_QP1_RSRC + port)); state->hs_spec_qpflags &= ~mask; flags = state->hs_spec_qpflags; /* * If this is the last QP1 free, then post a CONF_SPECIAL_QP * NOW, if this is the last special QP free, then post a * CONF_SPECIAL_QP firmware command - it'll stop them all */ if (flags) { status = hermon_conf_special_qp_cmd_post(state, 0, HERMON_CMD_QP_GSI, HERMON_CMD_NOSLEEP_SPIN, 0); if (status != HERMON_CMD_SUCCESS) { mutex_exit(&state->hs_spec_qplock); cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP " "command failed: %08x\n", state->hs_instance, status); if (status == HERMON_CMD_INVALID_STATUS) { hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); } return (ibc_get_ci_failure(0)); } } } mutex_exit(&state->hs_spec_qplock); return (DDI_SUCCESS); } /* * hermon_qp_sgl_to_logwqesz() * Context: Can be called from interrupt or base context. */ static void hermon_qp_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl, uint_t real_max_sgl, hermon_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl) { uint_t max_size, log2, actual_sgl; switch (wq_type) { case HERMON_QP_WQ_TYPE_SENDQ_UD: /* * Use requested maximum SGL to calculate max descriptor size * (while guaranteeing that the descriptor size is a * power-of-2 cachelines). */ max_size = (HERMON_QP_WQE_MLX_SND_HDRS + (num_sgl << 4)); log2 = highbit(max_size); if (ISP2(max_size)) { log2 = log2 - 1; } /* Make sure descriptor is at least the minimum size */ log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM); /* Calculate actual number of SGL (given WQE size) */ actual_sgl = ((1 << log2) - sizeof (hermon_hw_snd_wqe_ctrl_t)) >> 4; break; case HERMON_QP_WQ_TYPE_SENDQ_CONN: /* * Use requested maximum SGL to calculate max descriptor size * (while guaranteeing that the descriptor size is a * power-of-2 cachelines). */ max_size = (HERMON_QP_WQE_MLX_SND_HDRS + (num_sgl << 4)); log2 = highbit(max_size); if (ISP2(max_size)) { log2 = log2 - 1; } /* Make sure descriptor is at least the minimum size */ log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM); /* Calculate actual number of SGL (given WQE size) */ actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_SND_HDRS) >> 4; break; case HERMON_QP_WQ_TYPE_RECVQ: /* * Same as above (except for Recv WQEs) */ max_size = (HERMON_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4)); log2 = highbit(max_size); if (ISP2(max_size)) { log2 = log2 - 1; } /* Make sure descriptor is at least the minimum size */ log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM); /* Calculate actual number of SGL (given WQE size) */ actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_RCV_HDRS) >> 4; break; case HERMON_QP_WQ_TYPE_SENDMLX_QP0: /* * Same as above (except for MLX transport WQEs). For these * WQEs we have to account for the space consumed by the * "inline" packet headers. (This is smaller than for QP1 * below because QP0 is not allowed to send packets with a GRH. */ max_size = (HERMON_QP_WQE_MLX_QP0_HDRS + (num_sgl << 4)); log2 = highbit(max_size); if (ISP2(max_size)) { log2 = log2 - 1; } /* Make sure descriptor is at least the minimum size */ log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM); /* Calculate actual number of SGL (given WQE size) */ actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_QP0_HDRS) >> 4; break; case HERMON_QP_WQ_TYPE_SENDMLX_QP1: /* * Same as above. For these WQEs we again have to account for * the space consumed by the "inline" packet headers. (This * is larger than for QP0 above because we have to account for * the possibility of a GRH in each packet - and this * introduces an alignment issue that causes us to consume * an additional 8 bytes). */ max_size = (HERMON_QP_WQE_MLX_QP1_HDRS + (num_sgl << 4)); log2 = highbit(max_size); if (ISP2(max_size)) { log2 = log2 - 1; } /* Make sure descriptor is at least the minimum size */ log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM); /* Calculate actual number of SGL (given WQE size) */ actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_QP1_HDRS) >> 4; break; default: HERMON_WARNING(state, "unexpected work queue type"); break; } /* Fill in the return values */ *logwqesz = log2; *max_sgl = min(real_max_sgl, actual_sgl); }