/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* * hermon_mr.c * Hermon Memory Region/Window Routines * * Implements all the routines necessary to provide the requisite memory * registration verbs. These include operations like RegisterMemRegion(), * DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion, * etc., that affect Memory Regions. It also includes the verbs that * affect Memory Windows, including AllocMemWindow(), FreeMemWindow(), * and QueryMemWindow(). */ #include #include #include #include #include #include #include extern uint32_t hermon_kernel_data_ro; extern uint32_t hermon_user_data_ro; extern int hermon_rdma_debug; /* * Used by hermon_mr_keycalc() below to fill in the "unconstrained" portion * of Hermon memory keys (LKeys and RKeys) */ static uint_t hermon_memkey_cnt = 0x00; #define HERMON_MEMKEY_SHIFT 24 /* initial state of an MPT */ #define HERMON_MPT_SW_OWNERSHIP 0xF /* memory regions */ #define HERMON_MPT_FREE 0x3 /* allocate lkey */ static int hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type); static int hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr, hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op); static int hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr, hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr, uint_t sleep, uint_t *dereg_level); static uint64_t hermon_mr_nummtt_needed(hermon_state_t *state, hermon_bind_info_t *bind, uint_t *mtt_pgsize); static int hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind, ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer); static void hermon_mr_mem_unbind(hermon_state_t *state, hermon_bind_info_t *bind); static int hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt, hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits); static int hermon_mr_fast_mtt_write_fmr(hermon_state_t *state, hermon_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits); static uint_t hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc); static uint_t hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc); /* * The Hermon umem_lockmemory() callback ops. When userland memory is * registered, these callback ops are specified. The hermon_umap_umemlock_cb() * callback will be called whenever the memory for the corresponding * ddi_umem_cookie_t is being freed. */ static struct umem_callback_ops hermon_umem_cbops = { UMEM_CALLBACK_VERSION, hermon_umap_umemlock_cb, }; /* * hermon_mr_register() * Context: Can be called from interrupt or base context. */ int hermon_mr_register(hermon_state_t *state, hermon_pdhdl_t pd, ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type) { hermon_bind_info_t bind; int status; /* * Fill in the "bind" struct. This struct provides the majority * of the information that will be used to distinguish between an * "addr" binding (as is the case here) and a "buf" binding (see * below). The "bind" struct is later passed to hermon_mr_mem_bind() * which does most of the "heavy lifting" for the Hermon memory * registration routines. */ bind.bi_type = HERMON_BINDHDL_VADDR; bind.bi_addr = mr_attr->mr_vaddr; bind.bi_len = mr_attr->mr_len; bind.bi_as = mr_attr->mr_as; bind.bi_flags = mr_attr->mr_flags; status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, mpt_type); return (status); } /* * hermon_mr_register_buf() * Context: Can be called from interrupt or base context. */ int hermon_mr_register_buf(hermon_state_t *state, hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type) { hermon_bind_info_t bind; int status; /* * Fill in the "bind" struct. This struct provides the majority * of the information that will be used to distinguish between an * "addr" binding (see above) and a "buf" binding (as is the case * here). The "bind" struct is later passed to hermon_mr_mem_bind() * which does most of the "heavy lifting" for the Hermon memory * registration routines. Note: We have chosen to provide * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is * not set). It is not critical what value we choose here as it need * only be unique for the given RKey (which will happen by default), * so the choice here is somewhat arbitrary. */ bind.bi_type = HERMON_BINDHDL_BUF; bind.bi_buf = buf; if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { bind.bi_addr = mr_attr->mr_vaddr; } else { bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; } bind.bi_as = NULL; bind.bi_len = (uint64_t)buf->b_bcount; bind.bi_flags = mr_attr->mr_flags; status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, mpt_type); return (status); } /* * hermon_mr_register_shared() * Context: Can be called from interrupt or base context. */ int hermon_mr_register_shared(hermon_state_t *state, hermon_mrhdl_t mrhdl, hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new) { hermon_rsrc_t *mpt, *mtt, *rsrc; hermon_umap_db_entry_t *umapdb; hermon_hw_dmpt_t mpt_entry; hermon_mrhdl_t mr; hermon_bind_info_t *bind; ddi_umem_cookie_t umem_cookie; size_t umem_len; caddr_t umem_addr; uint64_t mtt_addr, pgsize_msk; uint_t sleep, mr_is_umem; int status, umem_flags; /* * Check the sleep flag. Ensure that it is consistent with the * current thread context (i.e. if we are currently in the interrupt * context, then we shouldn't be attempting to sleep). */ sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP : HERMON_SLEEP; if ((sleep == HERMON_SLEEP) && (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { status = IBT_INVALID_PARAM; goto mrshared_fail; } /* Increment the reference count on the protection domain (PD) */ hermon_pd_refcnt_inc(pd); /* * Allocate an MPT entry. This will be filled in with all the * necessary parameters to define the shared memory region. * Specifically, it will be made to reference the currently existing * MTT entries and ownership of the MPT will be passed to the hardware * in the last step below. If we fail here, we must undo the * protection domain reference count. */ status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto mrshared_fail1; } /* * Allocate the software structure for tracking the shared memory * region (i.e. the Hermon Memory Region handle). If we fail here, we * must undo the protection domain reference count and the previous * resource allocation. */ status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto mrshared_fail2; } mr = (hermon_mrhdl_t)rsrc->hr_addr; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) /* * Setup and validate the memory region access flags. This means * translating the IBTF's enable flags into the access flags that * will be used in later operations. */ mr->mr_accflag = 0; if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND) mr->mr_accflag |= IBT_MR_WINDOW_BIND; if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE) mr->mr_accflag |= IBT_MR_LOCAL_WRITE; if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ) mr->mr_accflag |= IBT_MR_REMOTE_READ; if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE) mr->mr_accflag |= IBT_MR_REMOTE_WRITE; if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; /* * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed * from a certain number of "constrained" bits (the least significant * bits) and some number of "unconstrained" bits. The constrained * bits must be set to the index of the entry in the MPT table, but * the unconstrained bits can be set to any value we wish. Note: * if no remote access is required, then the RKey value is not filled * in. Otherwise both Rkey and LKey are given the same value. */ mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); /* Grab the MR lock for the current memory region */ mutex_enter(&mrhdl->mr_lock); /* * Check here to see if the memory region has already been partially * deregistered as a result of a hermon_umap_umemlock_cb() callback. * If so, this is an error, return failure. */ if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { mutex_exit(&mrhdl->mr_lock); status = IBT_MR_HDL_INVALID; goto mrshared_fail3; } /* * Determine if the original memory was from userland and, if so, pin * the pages (again) with umem_lockmemory(). This will guarantee a * separate callback for each of this shared region's MR handles. * If this is userland memory, then allocate an entry in the * "userland resources database". This will later be added to * the database (after all further memory registration operations are * successful). If we fail here, we must undo all the above setup. */ mr_is_umem = mrhdl->mr_is_umem; if (mr_is_umem) { umem_len = ptob(btopr(mrhdl->mr_bindinfo.bi_len)); umem_addr = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr & ~PAGEOFFSET); umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | DDI_UMEMLOCK_LONGTERM); status = umem_lockmemory(umem_addr, umem_len, umem_flags, &umem_cookie, &hermon_umem_cbops, NULL); if (status != 0) { mutex_exit(&mrhdl->mr_lock); status = IBT_INSUFF_RESOURCE; goto mrshared_fail3; } umapdb = hermon_umap_db_alloc(state->hs_instance, (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, (uint64_t)(uintptr_t)rsrc); if (umapdb == NULL) { mutex_exit(&mrhdl->mr_lock); status = IBT_INSUFF_RESOURCE; goto mrshared_fail4; } } /* * Copy the MTT resource pointer (and additional parameters) from * the original Hermon Memory Region handle. Note: this is normally * where the hermon_mr_mem_bind() routine would be called, but because * we already have bound and filled-in MTT entries it is simply a * matter here of managing the MTT reference count and grabbing the * address of the MTT table entries (for filling in the shared region's * MPT entry). */ mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp; mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz; mr->mr_bindinfo = mrhdl->mr_bindinfo; mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp; mutex_exit(&mrhdl->mr_lock); bind = &mr->mr_bindinfo; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) mtt = mr->mr_mttrsrcp; /* * Increment the MTT reference count (to reflect the fact that * the MTT is now shared) */ (void) hermon_mtt_refcnt_inc(mr->mr_mttrefcntp); /* * Update the new "bind" virtual address. Do some extra work here * to ensure proper alignment. That is, make sure that the page * offset for the beginning of the old range is the same as the * offset for this new mapping */ pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1); bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) | (mr->mr_bindinfo.bi_addr & pgsize_msk)); /* * Fill in the MPT entry. This is the final step before passing * ownership of the MPT entry to the Hermon hardware. We use all of * the information collected/calculated above to fill in the * requisite portions of the MPT. */ bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; mpt_entry.lr = 1; mpt_entry.reg_win = HERMON_MPT_IS_REGION; mpt_entry.entity_sz = mr->mr_logmttpgsz; mpt_entry.mem_key = mr->mr_lkey; mpt_entry.pd = pd->pd_pdnum; mpt_entry.start_addr = bind->bi_addr; mpt_entry.reg_win_len = bind->bi_len; mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); mpt_entry.mtt_addr_h = mtt_addr >> 32; mpt_entry.mtt_addr_l = mtt_addr >> 3; /* * Write the MPT entry to hardware. Lastly, we pass ownership of * the entry to the hardware. Note: in general, this operation * shouldn't fail. But if it does, we have to undo everything we've * done above before returning error. */ status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); if (status != HERMON_CMD_SUCCESS) { cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", status); if (status == HERMON_CMD_INVALID_STATUS) { hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); } status = ibc_get_ci_failure(0); goto mrshared_fail5; } /* * Fill in the rest of the Hermon Memory Region handle. Having * successfully transferred ownership of the MPT, we can update the * following fields for use in further operations on the MR. */ mr->mr_mptrsrcp = mpt; mr->mr_mttrsrcp = mtt; mr->mr_mpt_type = HERMON_MPT_DMPT; mr->mr_pdhdl = pd; mr->mr_rsrcp = rsrc; mr->mr_is_umem = mr_is_umem; mr->mr_is_fmr = 0; mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; mr->mr_umem_cbfunc = NULL; mr->mr_umem_cbarg1 = NULL; mr->mr_umem_cbarg2 = NULL; mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); /* * If this is userland memory, then we need to insert the previously * allocated entry into the "userland resources database". This will * allow for later coordination between the hermon_umap_umemlock_cb() * callback and hermon_mr_deregister(). */ if (mr_is_umem) { hermon_umap_db_add(umapdb); } *mrhdl_new = mr; return (DDI_SUCCESS); /* * The following is cleanup for all possible failure cases in this routine */ mrshared_fail5: (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp); if (mr_is_umem) { hermon_umap_db_free(umapdb); } mrshared_fail4: if (mr_is_umem) { ddi_umem_unlock(umem_cookie); } mrshared_fail3: hermon_rsrc_free(state, &rsrc); mrshared_fail2: hermon_rsrc_free(state, &mpt); mrshared_fail1: hermon_pd_refcnt_dec(pd); mrshared_fail: return (status); } /* * hermon_mr_alloc_fmr() * Context: Can be called from interrupt or base context. */ int hermon_mr_alloc_fmr(hermon_state_t *state, hermon_pdhdl_t pd, hermon_fmrhdl_t fmr_pool, hermon_mrhdl_t *mrhdl) { hermon_rsrc_t *mpt, *mtt, *rsrc; hermon_hw_dmpt_t mpt_entry; hermon_mrhdl_t mr; hermon_bind_info_t bind; uint64_t mtt_addr; uint64_t nummtt; uint_t sleep, mtt_pgsize_bits; int status; offset_t i; hermon_icm_table_t *icm_table; hermon_dma_info_t *dma_info; uint32_t index1, index2, rindx; /* * Check the sleep flag. Ensure that it is consistent with the * current thread context (i.e. if we are currently in the interrupt * context, then we shouldn't be attempting to sleep). */ sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP : HERMON_NOSLEEP; if ((sleep == HERMON_SLEEP) && (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { return (IBT_INVALID_PARAM); } /* Increment the reference count on the protection domain (PD) */ hermon_pd_refcnt_inc(pd); /* * Allocate an MPT entry. This will be filled in with all the * necessary parameters to define the FMR. Specifically, it will be * made to reference the currently existing MTT entries and ownership * of the MPT will be passed to the hardware in the last step below. * If we fail here, we must undo the protection domain reference count. */ status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto fmralloc_fail1; } /* * Allocate the software structure for tracking the fmr memory * region (i.e. the Hermon Memory Region handle). If we fail here, we * must undo the protection domain reference count and the previous * resource allocation. */ status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto fmralloc_fail2; } mr = (hermon_mrhdl_t)rsrc->hr_addr; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) /* * Setup and validate the memory region access flags. This means * translating the IBTF's enable flags into the access flags that * will be used in later operations. */ mr->mr_accflag = 0; if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE) mr->mr_accflag |= IBT_MR_LOCAL_WRITE; if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ) mr->mr_accflag |= IBT_MR_REMOTE_READ; if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE) mr->mr_accflag |= IBT_MR_REMOTE_WRITE; if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; /* * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed * from a certain number of "constrained" bits (the least significant * bits) and some number of "unconstrained" bits. The constrained * bits must be set to the index of the entry in the MPT table, but * the unconstrained bits can be set to any value we wish. Note: * if no remote access is required, then the RKey value is not filled * in. Otherwise both Rkey and LKey are given the same value. */ mr->mr_fmr_key = 1; /* ready for the next reload */ mr->mr_rkey = mr->mr_lkey = mpt->hr_indx; /* * Determine number of pages spanned. This routine uses the * information in the "bind" struct to determine the required * number of MTT entries needed (and returns the suggested page size - * as a "power-of-2" - for each MTT entry). */ /* Assume address will be page aligned later */ bind.bi_addr = 0; /* Calculate size based on given max pages */ bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT; nummtt = hermon_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits); /* * Allocate the MTT entries. Use the calculations performed above to * allocate the required number of MTT entries. If we fail here, we * must not only undo all the previous resource allocation (and PD * reference count), but we must also unbind the memory. */ status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt); if (status != DDI_SUCCESS) { IBTF_DPRINTF_L2("FMR", "FATAL: too few MTTs"); status = IBT_INSUFF_RESOURCE; goto fmralloc_fail3; } mr->mr_logmttpgsz = mtt_pgsize_bits; /* * Fill in the MPT entry. This is the final step before passing * ownership of the MPT entry to the Hermon hardware. We use all of * the information collected/calculated above to fill in the * requisite portions of the MPT. */ bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); mpt_entry.en_bind = 0; mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; mpt_entry.lr = 1; mpt_entry.reg_win = HERMON_MPT_IS_REGION; mpt_entry.pd = pd->pd_pdnum; mpt_entry.entity_sz = mr->mr_logmttpgsz; mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); mpt_entry.fast_reg_en = 1; mpt_entry.mtt_size = (uint_t)nummtt; mpt_entry.mtt_addr_h = mtt_addr >> 32; mpt_entry.mtt_addr_l = mtt_addr >> 3; mpt_entry.mem_key = mr->mr_lkey; /* * FMR sets these to 0 for now. Later during actual fmr registration * these values are filled in. */ mpt_entry.start_addr = 0; mpt_entry.reg_win_len = 0; /* * Write the MPT entry to hardware. Lastly, we pass ownership of * the entry to the hardware. Note: in general, this operation * shouldn't fail. But if it does, we have to undo everything we've * done above before returning error. */ status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); if (status != HERMON_CMD_SUCCESS) { cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", status); if (status == HERMON_CMD_INVALID_STATUS) { hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); } status = ibc_get_ci_failure(0); goto fmralloc_fail4; } /* * Fill in the rest of the Hermon Memory Region handle. Having * successfully transferred ownership of the MPT, we can update the * following fields for use in further operations on the MR. Also, set * that this is an FMR region. */ mr->mr_mptrsrcp = mpt; mr->mr_mttrsrcp = mtt; mr->mr_mpt_type = HERMON_MPT_DMPT; mr->mr_pdhdl = pd; mr->mr_rsrcp = rsrc; mr->mr_is_fmr = 1; mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); mr->mr_mttaddr = mtt_addr; (void) memcpy(&mr->mr_bindinfo, &bind, sizeof (hermon_bind_info_t)); /* initialize hr_addr for use during register/deregister/invalidate */ icm_table = &state->hs_icm[HERMON_DMPT]; rindx = mpt->hr_indx; hermon_index(index1, index2, rindx, icm_table, i); dma_info = icm_table->icm_dma[index1] + index2; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mpt)) mpt->hr_addr = (void *)((uintptr_t)(dma_info->vaddr + i * mpt->hr_len)); *mrhdl = mr; return (DDI_SUCCESS); /* * The following is cleanup for all possible failure cases in this routine */ fmralloc_fail4: kmem_free(mtt, sizeof (hermon_rsrc_t) * nummtt); fmralloc_fail3: hermon_rsrc_free(state, &rsrc); fmralloc_fail2: hermon_rsrc_free(state, &mpt); fmralloc_fail1: hermon_pd_refcnt_dec(pd); fmralloc_fail: return (status); } /* * hermon_mr_register_physical_fmr() * Context: Can be called from interrupt or base context. */ /*ARGSUSED*/ int hermon_mr_register_physical_fmr(hermon_state_t *state, ibt_pmr_attr_t *mem_pattr_p, hermon_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p) { hermon_rsrc_t *mpt; uint64_t *mpt_table; int status; uint32_t key; mutex_enter(&mr->mr_lock); mpt = mr->mr_mptrsrcp; mpt_table = (uint64_t *)mpt->hr_addr; /* Write MPT status to SW bit */ *(uint8_t *)mpt_table = 0xF0; membar_producer(); /* * Write the mapped addresses into the MTT entries. FMR needs to do * this a little differently, so we call the fmr specific fast mtt * write here. */ status = hermon_mr_fast_mtt_write_fmr(state, mr->mr_mttrsrcp, mem_pattr_p, mr->mr_logmttpgsz); if (status != DDI_SUCCESS) { mutex_exit(&mr->mr_lock); status = ibc_get_ci_failure(0); goto fmr_reg_fail1; } /* * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed * from a certain number of "constrained" bits (the least significant * bits) and some number of "unconstrained" bits. The constrained * bits must be set to the index of the entry in the MPT table, but * the unconstrained bits can be set to any value we wish. Note: * if no remote access is required, then the RKey value is not filled * in. Otherwise both Rkey and LKey are given the same value. */ key = mpt->hr_indx | (mr->mr_fmr_key++ << HERMON_MEMKEY_SHIFT); mr->mr_lkey = mr->mr_rkey = hermon_mr_key_swap(key); /* write mem key value */ *(uint32_t *)&mpt_table[1] = htonl(key); /* write length value */ mpt_table[3] = htonll(mem_pattr_p->pmr_len); /* write start addr value */ mpt_table[2] = htonll(mem_pattr_p->pmr_iova); /* write lkey value */ *(uint32_t *)&mpt_table[4] = htonl(key); membar_producer(); /* Write MPT status to HW bit */ *(uint8_t *)mpt_table = 0x00; /* Fill in return parameters */ mem_desc_p->pmd_lkey = mr->mr_lkey; mem_desc_p->pmd_rkey = mr->mr_rkey; mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova; mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len; /* Fill in MR bindinfo struct for later sync or query operations */ mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova; mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT; mutex_exit(&mr->mr_lock); return (DDI_SUCCESS); fmr_reg_fail1: /* * Note, we fail here, and purposely leave the memory ownership in * software. The memory tables may be corrupt, so we leave the region * unregistered. */ return (status); } /* * hermon_mr_deregister() * Context: Can be called from interrupt or base context. */ /* ARGSUSED */ int hermon_mr_deregister(hermon_state_t *state, hermon_mrhdl_t *mrhdl, uint_t level, uint_t sleep) { hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; hermon_umap_db_entry_t *umapdb; hermon_pdhdl_t pd; hermon_mrhdl_t mr; hermon_bind_info_t *bind; uint64_t value; int status; uint_t shared_mtt; /* * Check the sleep flag. Ensure that it is consistent with the * current thread context (i.e. if we are currently in the interrupt * context, then we shouldn't be attempting to sleep). */ if ((sleep == HERMON_SLEEP) && (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { status = IBT_INVALID_PARAM; return (status); } /* * Pull all the necessary information from the Hermon Memory Region * handle. This is necessary here because the resource for the * MR handle is going to be freed up as part of the this * deregistration */ mr = *mrhdl; mutex_enter(&mr->mr_lock); mpt = mr->mr_mptrsrcp; mtt = mr->mr_mttrsrcp; mtt_refcnt = mr->mr_mttrefcntp; rsrc = mr->mr_rsrcp; pd = mr->mr_pdhdl; bind = &mr->mr_bindinfo; /* * Check here if the memory region is really an FMR. If so, this is a * bad thing and we shouldn't be here. Return failure. */ if (mr->mr_is_fmr) { mutex_exit(&mr->mr_lock); return (IBT_INVALID_PARAM); } /* * Check here to see if the memory region has already been partially * deregistered as a result of the hermon_umap_umemlock_cb() callback. * If so, then jump to the end and free the remaining resources. */ if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { goto mrdereg_finish_cleanup; } if (hermon_rdma_debug & 0x4) IBTF_DPRINTF_L2("mr", "dereg: mr %p key %x", mr, mr->mr_rkey); /* * We must drop the "mr_lock" here to ensure that both SLEEP and * NOSLEEP calls into the firmware work as expected. Also, if two * threads are attemping to access this MR (via de-register, * re-register, or otherwise), then we allow the firmware to enforce * the checking, that only one deregister is valid. */ mutex_exit(&mr->mr_lock); /* * Reclaim MPT entry from hardware (if necessary). Since the * hermon_mr_deregister() routine is used in the memory region * reregistration process as well, it is possible that we will * not always wish to reclaim ownership of the MPT. Check the * "level" arg and, if necessary, attempt to reclaim it. If * the ownership transfer fails for any reason, we check to see * what command status was returned from the hardware. The only * "expected" error status is the one that indicates an attempt to * deregister a memory region that has memory windows bound to it */ if (level >= HERMON_MR_DEREG_ALL) { if (mr->mr_mpt_type >= HERMON_MPT_DMPT) { status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL, 0, mpt->hr_indx, sleep); if (status != HERMON_CMD_SUCCESS) { if (status == HERMON_CMD_REG_BOUND) { return (IBT_MR_IN_USE); } else { cmn_err(CE_CONT, "Hermon: HW2SW_MPT " "command failed: %08x\n", status); if (status == HERMON_CMD_INVALID_STATUS) { hermon_fm_ereport(state, HCA_SYS_ERR, DDI_SERVICE_LOST); } return (IBT_INVALID_PARAM); } } } } /* * Re-grab the mr_lock here. Since further access to the protected * 'mr' structure is needed, and we would have returned previously for * the multiple deregistration case, we can safely grab the lock here. */ mutex_enter(&mr->mr_lock); /* * If the memory had come from userland, then we do a lookup in the * "userland resources database". On success, we free the entry, call * ddi_umem_unlock(), and continue the cleanup. On failure (which is * an indication that the umem_lockmemory() callback has called * hermon_mr_deregister()), we call ddi_umem_unlock() and invalidate * the "mr_umemcookie" field in the MR handle (this will be used * later to detect that only partial cleaup still remains to be done * on the MR handle). */ if (mr->mr_is_umem) { status = hermon_umap_db_find(state->hs_instance, (uint64_t)(uintptr_t)mr->mr_umemcookie, MLNX_UMAP_MRMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE, &umapdb); if (status == DDI_SUCCESS) { hermon_umap_db_free(umapdb); ddi_umem_unlock(mr->mr_umemcookie); } else { ddi_umem_unlock(mr->mr_umemcookie); mr->mr_umemcookie = NULL; } } /* mtt_refcnt is NULL in the case of hermon_dma_mr_register() */ if (mtt_refcnt != NULL) { /* * Decrement the MTT reference count. Since the MTT resource * may be shared between multiple memory regions (as a result * of a "RegisterSharedMR" verb) it is important that we not * free up or unbind resources prematurely. If it's not shared * (as indicated by the return status), then free the resource. */ shared_mtt = hermon_mtt_refcnt_dec(mtt_refcnt); if (!shared_mtt) { hermon_rsrc_free(state, &mtt_refcnt); } /* * Free up the MTT entries and unbind the memory. Here, * as above, we attempt to free these resources only if * it is appropriate to do so. * Note, 'bind' is NULL in the alloc_lkey case. */ if (!shared_mtt) { if (level >= HERMON_MR_DEREG_NO_HW2SW_MPT) { hermon_mr_mem_unbind(state, bind); } hermon_rsrc_free(state, &mtt); } } /* * If the MR handle has been invalidated, then drop the * lock and return success. Note: This only happens because * the umem_lockmemory() callback has been triggered. The * cleanup here is partial, and further cleanup (in a * subsequent hermon_mr_deregister() call) will be necessary. */ if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { mutex_exit(&mr->mr_lock); return (DDI_SUCCESS); } mrdereg_finish_cleanup: mutex_exit(&mr->mr_lock); /* Free the Hermon Memory Region handle */ hermon_rsrc_free(state, &rsrc); /* Free up the MPT entry resource */ if (mpt != NULL) hermon_rsrc_free(state, &mpt); /* Decrement the reference count on the protection domain (PD) */ hermon_pd_refcnt_dec(pd); /* Set the mrhdl pointer to NULL and return success */ *mrhdl = NULL; return (DDI_SUCCESS); } /* * hermon_mr_dealloc_fmr() * Context: Can be called from interrupt or base context. */ /* ARGSUSED */ int hermon_mr_dealloc_fmr(hermon_state_t *state, hermon_mrhdl_t *mrhdl) { hermon_rsrc_t *mpt, *mtt, *rsrc; hermon_pdhdl_t pd; hermon_mrhdl_t mr; /* * Pull all the necessary information from the Hermon Memory Region * handle. This is necessary here because the resource for the * MR handle is going to be freed up as part of the this * deregistration */ mr = *mrhdl; mutex_enter(&mr->mr_lock); mpt = mr->mr_mptrsrcp; mtt = mr->mr_mttrsrcp; rsrc = mr->mr_rsrcp; pd = mr->mr_pdhdl; mutex_exit(&mr->mr_lock); /* Free the MTT entries */ hermon_rsrc_free(state, &mtt); /* Free the Hermon Memory Region handle */ hermon_rsrc_free(state, &rsrc); /* Free up the MPT entry resource */ hermon_rsrc_free(state, &mpt); /* Decrement the reference count on the protection domain (PD) */ hermon_pd_refcnt_dec(pd); /* Set the mrhdl pointer to NULL and return success */ *mrhdl = NULL; return (DDI_SUCCESS); } /* * hermon_mr_query() * Context: Can be called from interrupt or base context. */ /* ARGSUSED */ int hermon_mr_query(hermon_state_t *state, hermon_mrhdl_t mr, ibt_mr_query_attr_t *attr) { int status; hermon_hw_dmpt_t mpt_entry; uint32_t lkey; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr)) mutex_enter(&mr->mr_lock); /* * Check here to see if the memory region has already been partially * deregistered as a result of a hermon_umap_umemlock_cb() callback. * If so, this is an error, return failure. */ if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { mutex_exit(&mr->mr_lock); return (IBT_MR_HDL_INVALID); } status = hermon_cmn_query_cmd_post(state, QUERY_MPT, 0, mr->mr_lkey >> 8, &mpt_entry, sizeof (hermon_hw_dmpt_t), HERMON_NOSLEEP); if (status != HERMON_CMD_SUCCESS) { cmn_err(CE_CONT, "Hermon: QUERY_MPT failed: status %x", status); mutex_exit(&mr->mr_lock); return (ibc_get_ci_failure(0)); } /* Update the mr sw struct from the hw struct. */ lkey = mpt_entry.mem_key; mr->mr_lkey = mr->mr_rkey = (lkey >> 8) | (lkey << 24); mr->mr_bindinfo.bi_addr = mpt_entry.start_addr; mr->mr_bindinfo.bi_len = mpt_entry.reg_win_len; mr->mr_accflag = (mr->mr_accflag & IBT_MR_RO_DISABLED) | (mpt_entry.lw ? IBT_MR_LOCAL_WRITE : 0) | (mpt_entry.rr ? IBT_MR_REMOTE_READ : 0) | (mpt_entry.rw ? IBT_MR_REMOTE_WRITE : 0) | (mpt_entry.atomic ? IBT_MR_REMOTE_ATOMIC : 0) | (mpt_entry.en_bind ? IBT_MR_WINDOW_BIND : 0); mr->mr_mttaddr = ((uint64_t)mpt_entry.mtt_addr_h << 32) | (mpt_entry.mtt_addr_l << 3); mr->mr_logmttpgsz = mpt_entry.entity_sz; /* Fill in the queried attributes */ attr->mr_lkey_state = (mpt_entry.status == HERMON_MPT_FREE) ? IBT_KEY_FREE : (mpt_entry.status == HERMON_MPT_SW_OWNERSHIP) ? IBT_KEY_INVALID : IBT_KEY_VALID; attr->mr_phys_buf_list_sz = mpt_entry.mtt_size; attr->mr_attr_flags = mr->mr_accflag; attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl; /* Fill in the "local" attributes */ attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey; attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; attr->mr_lbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; /* * Fill in the "remote" attributes (if necessary). Note: the * remote attributes are only valid if the memory region has one * or more of the remote access flags set. */ if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey; attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; attr->mr_rbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; } /* * If region is mapped for streaming (i.e. noncoherent), then set sync * is required */ attr->mr_sync_required = (mr->mr_bindinfo.bi_flags & IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE; mutex_exit(&mr->mr_lock); return (DDI_SUCCESS); } /* * hermon_mr_reregister() * Context: Can be called from interrupt or base context. */ int hermon_mr_reregister(hermon_state_t *state, hermon_mrhdl_t mr, hermon_pdhdl_t pd, ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op) { hermon_bind_info_t bind; int status; /* * Fill in the "bind" struct. This struct provides the majority * of the information that will be used to distinguish between an * "addr" binding (as is the case here) and a "buf" binding (see * below). The "bind" struct is later passed to hermon_mr_mem_bind() * which does most of the "heavy lifting" for the Hermon memory * registration (and reregistration) routines. */ bind.bi_type = HERMON_BINDHDL_VADDR; bind.bi_addr = mr_attr->mr_vaddr; bind.bi_len = mr_attr->mr_len; bind.bi_as = mr_attr->mr_as; bind.bi_flags = mr_attr->mr_flags; status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); return (status); } /* * hermon_mr_reregister_buf() * Context: Can be called from interrupt or base context. */ int hermon_mr_reregister_buf(hermon_state_t *state, hermon_mrhdl_t mr, hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf, hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op) { hermon_bind_info_t bind; int status; /* * Fill in the "bind" struct. This struct provides the majority * of the information that will be used to distinguish between an * "addr" binding (see above) and a "buf" binding (as is the case * here). The "bind" struct is later passed to hermon_mr_mem_bind() * which does most of the "heavy lifting" for the Hermon memory * registration routines. Note: We have chosen to provide * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is * not set). It is not critical what value we choose here as it need * only be unique for the given RKey (which will happen by default), * so the choice here is somewhat arbitrary. */ bind.bi_type = HERMON_BINDHDL_BUF; bind.bi_buf = buf; if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { bind.bi_addr = mr_attr->mr_vaddr; } else { bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; } bind.bi_len = (uint64_t)buf->b_bcount; bind.bi_flags = mr_attr->mr_flags; bind.bi_as = NULL; status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); return (status); } /* * hermon_mr_sync() * Context: Can be called from interrupt or base context. */ /* ARGSUSED */ int hermon_mr_sync(hermon_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs) { hermon_mrhdl_t mrhdl; uint64_t seg_vaddr, seg_len, seg_end; uint64_t mr_start, mr_end; uint_t type; int status, i; /* Process each of the ibt_mr_sync_t's */ for (i = 0; i < num_segs; i++) { mrhdl = (hermon_mrhdl_t)mr_segs[i].ms_handle; /* Check for valid memory region handle */ if (mrhdl == NULL) { status = IBT_MR_HDL_INVALID; goto mrsync_fail; } mutex_enter(&mrhdl->mr_lock); /* * Check here to see if the memory region has already been * partially deregistered as a result of a * hermon_umap_umemlock_cb() callback. If so, this is an * error, return failure. */ if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { mutex_exit(&mrhdl->mr_lock); status = IBT_MR_HDL_INVALID; goto mrsync_fail; } /* Check for valid bounds on sync request */ seg_vaddr = mr_segs[i].ms_vaddr; seg_len = mr_segs[i].ms_len; seg_end = seg_vaddr + seg_len - 1; mr_start = mrhdl->mr_bindinfo.bi_addr; mr_end = mr_start + mrhdl->mr_bindinfo.bi_len - 1; if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) { mutex_exit(&mrhdl->mr_lock); status = IBT_MR_VA_INVALID; goto mrsync_fail; } if ((seg_end < mr_start) || (seg_end > mr_end)) { mutex_exit(&mrhdl->mr_lock); status = IBT_MR_LEN_INVALID; goto mrsync_fail; } /* Determine what type (i.e. direction) for sync */ if (mr_segs[i].ms_flags & IBT_SYNC_READ) { type = DDI_DMA_SYNC_FORDEV; } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) { type = DDI_DMA_SYNC_FORCPU; } else { mutex_exit(&mrhdl->mr_lock); status = IBT_INVALID_PARAM; goto mrsync_fail; } (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl, (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type); mutex_exit(&mrhdl->mr_lock); } return (DDI_SUCCESS); mrsync_fail: return (status); } /* * hermon_mw_alloc() * Context: Can be called from interrupt or base context. */ int hermon_mw_alloc(hermon_state_t *state, hermon_pdhdl_t pd, ibt_mw_flags_t flags, hermon_mwhdl_t *mwhdl) { hermon_rsrc_t *mpt, *rsrc; hermon_hw_dmpt_t mpt_entry; hermon_mwhdl_t mw; uint_t sleep; int status; if (state != NULL) /* XXX - bogus test that is always TRUE */ return (IBT_INSUFF_RESOURCE); /* * Check the sleep flag. Ensure that it is consistent with the * current thread context (i.e. if we are currently in the interrupt * context, then we shouldn't be attempting to sleep). */ sleep = (flags & IBT_MW_NOSLEEP) ? HERMON_NOSLEEP : HERMON_SLEEP; if ((sleep == HERMON_SLEEP) && (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { status = IBT_INVALID_PARAM; goto mwalloc_fail; } /* Increment the reference count on the protection domain (PD) */ hermon_pd_refcnt_inc(pd); /* * Allocate an MPT entry (for use as a memory window). Since the * Hermon hardware uses the MPT entry for memory regions and for * memory windows, we will fill in this MPT with all the necessary * parameters for the memory window. And then (just as we do for * memory regions) ownership will be passed to the hardware in the * final step below. If we fail here, we must undo the protection * domain reference count. */ status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto mwalloc_fail1; } /* * Allocate the software structure for tracking the memory window (i.e. * the Hermon Memory Window handle). Note: This is actually the same * software structure used for tracking memory regions, but since many * of the same properties are needed, only a single structure is * necessary. If we fail here, we must undo the protection domain * reference count and the previous resource allocation. */ status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto mwalloc_fail2; } mw = (hermon_mwhdl_t)rsrc->hr_addr; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) /* * Calculate an "unbound" RKey from MPT index. In much the same way * as we do for memory regions (above), this key is constructed from * a "constrained" (which depends on the MPT index) and an * "unconstrained" portion (which may be arbitrarily chosen). */ mw->mr_rkey = hermon_mr_keycalc(mpt->hr_indx); /* * Fill in the MPT entry. This is the final step before passing * ownership of the MPT entry to the Hermon hardware. We use all of * the information collected/calculated above to fill in the * requisite portions of the MPT. Note: fewer entries in the MPT * entry are necessary to allocate a memory window. */ bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); mpt_entry.reg_win = HERMON_MPT_IS_WINDOW; mpt_entry.mem_key = mw->mr_rkey; mpt_entry.pd = pd->pd_pdnum; mpt_entry.lr = 1; /* * Write the MPT entry to hardware. Lastly, we pass ownership of * the entry to the hardware. Note: in general, this operation * shouldn't fail. But if it does, we have to undo everything we've * done above before returning error. */ status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); if (status != HERMON_CMD_SUCCESS) { cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", status); if (status == HERMON_CMD_INVALID_STATUS) { hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); } status = ibc_get_ci_failure(0); goto mwalloc_fail3; } /* * Fill in the rest of the Hermon Memory Window handle. Having * successfully transferred ownership of the MPT, we can update the * following fields for use in further operations on the MW. */ mw->mr_mptrsrcp = mpt; mw->mr_pdhdl = pd; mw->mr_rsrcp = rsrc; mw->mr_rkey = hermon_mr_key_swap(mw->mr_rkey); *mwhdl = mw; return (DDI_SUCCESS); mwalloc_fail3: hermon_rsrc_free(state, &rsrc); mwalloc_fail2: hermon_rsrc_free(state, &mpt); mwalloc_fail1: hermon_pd_refcnt_dec(pd); mwalloc_fail: return (status); } /* * hermon_mw_free() * Context: Can be called from interrupt or base context. */ int hermon_mw_free(hermon_state_t *state, hermon_mwhdl_t *mwhdl, uint_t sleep) { hermon_rsrc_t *mpt, *rsrc; hermon_mwhdl_t mw; int status; hermon_pdhdl_t pd; /* * Check the sleep flag. Ensure that it is consistent with the * current thread context (i.e. if we are currently in the interrupt * context, then we shouldn't be attempting to sleep). */ if ((sleep == HERMON_SLEEP) && (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { status = IBT_INVALID_PARAM; return (status); } /* * Pull all the necessary information from the Hermon Memory Window * handle. This is necessary here because the resource for the * MW handle is going to be freed up as part of the this operation. */ mw = *mwhdl; mutex_enter(&mw->mr_lock); mpt = mw->mr_mptrsrcp; rsrc = mw->mr_rsrcp; pd = mw->mr_pdhdl; mutex_exit(&mw->mr_lock); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) /* * Reclaim the MPT entry from hardware. Note: in general, it is * unexpected for this operation to return an error. */ status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL, 0, mpt->hr_indx, sleep); if (status != HERMON_CMD_SUCCESS) { cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: %08x\n", status); if (status == HERMON_CMD_INVALID_STATUS) { hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); } return (ibc_get_ci_failure(0)); } /* Free the Hermon Memory Window handle */ hermon_rsrc_free(state, &rsrc); /* Free up the MPT entry resource */ hermon_rsrc_free(state, &mpt); /* Decrement the reference count on the protection domain (PD) */ hermon_pd_refcnt_dec(pd); /* Set the mwhdl pointer to NULL and return success */ *mwhdl = NULL; return (DDI_SUCCESS); } /* * hermon_mr_keycalc() * Context: Can be called from interrupt or base context. * NOTE: Produces a key in the form of * KKKKKKKK IIIIIIII IIIIIIII IIIIIIIII * where K == the arbitrary bits and I == the index */ uint32_t hermon_mr_keycalc(uint32_t indx) { uint32_t tmp_key, tmp_indx; /* * Generate a simple key from counter. Note: We increment this * static variable _intentionally_ without any kind of mutex around * it. First, single-threading all operations through a single lock * would be a bad idea (from a performance point-of-view). Second, * the upper "unconstrained" bits don't really have to be unique * because the lower bits are guaranteed to be (although we do make a * best effort to ensure that they are). Third, the window for the * race (where both threads read and update the counter at the same * time) is incredibly small. * And, lastly, we'd like to make this into a "random" key */ _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hermon_memkey_cnt)) tmp_key = (hermon_memkey_cnt++) << HERMON_MEMKEY_SHIFT; tmp_indx = indx & 0xffffff; return (tmp_key | tmp_indx); } /* * hermon_mr_key_swap() * Context: Can be called from interrupt or base context. * NOTE: Produces a key in the form of * IIIIIIII IIIIIIII IIIIIIIII KKKKKKKK * where K == the arbitrary bits and I == the index */ uint32_t hermon_mr_key_swap(uint32_t indx) { /* * The memory key format to pass down to the hardware is * (key[7:0],index[23:0]), which defines the index to the * hardware resource. When the driver passes this as a memory * key, (i.e. to retrieve a resource) the format is * (index[23:0],key[7:0]). */ return (((indx >> 24) & 0x000000ff) | ((indx << 8) & 0xffffff00)); } /* * hermon_mr_common_reg() * Context: Can be called from interrupt or base context. */ static int hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type) { hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; hermon_umap_db_entry_t *umapdb; hermon_sw_refcnt_t *swrc_tmp; hermon_hw_dmpt_t mpt_entry; hermon_mrhdl_t mr; ibt_mr_flags_t flags; hermon_bind_info_t *bh; ddi_dma_handle_t bind_dmahdl; ddi_umem_cookie_t umem_cookie; size_t umem_len; caddr_t umem_addr; uint64_t mtt_addr, max_sz; uint_t sleep, mtt_pgsize_bits, bind_type, mr_is_umem; int status, umem_flags, bind_override_addr; /* * Check the "options" flag. Currently this flag tells the driver * whether or not the region should be bound normally (i.e. with * entries written into the PCI IOMMU), whether it should be * registered to bypass the IOMMU, and whether or not the resulting * address should be "zero-based" (to aid the alignment restrictions * for QPs). */ if (op == NULL) { bind_type = HERMON_BINDMEM_NORMAL; bind_dmahdl = NULL; bind_override_addr = 0; } else { bind_type = op->mro_bind_type; bind_dmahdl = op->mro_bind_dmahdl; bind_override_addr = op->mro_bind_override_addr; } /* check what kind of mpt to use */ /* Extract the flags field from the hermon_bind_info_t */ flags = bind->bi_flags; /* * Check for invalid length. Check is the length is zero or if the * length is larger than the maximum configured value. Return error * if it is. */ max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz); if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { status = IBT_MR_LEN_INVALID; goto mrcommon_fail; } /* * Check the sleep flag. Ensure that it is consistent with the * current thread context (i.e. if we are currently in the interrupt * context, then we shouldn't be attempting to sleep). */ sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP; if ((sleep == HERMON_SLEEP) && (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { status = IBT_INVALID_PARAM; goto mrcommon_fail; } /* Increment the reference count on the protection domain (PD) */ hermon_pd_refcnt_inc(pd); /* * Allocate an MPT entry. This will be filled in with all the * necessary parameters to define the memory region. And then * ownership will be passed to the hardware in the final step * below. If we fail here, we must undo the protection domain * reference count. */ if (mpt_type == HERMON_MPT_DMPT) { status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto mrcommon_fail1; } } else { mpt = NULL; } /* * Allocate the software structure for tracking the memory region (i.e. * the Hermon Memory Region handle). If we fail here, we must undo * the protection domain reference count and the previous resource * allocation. */ status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto mrcommon_fail2; } mr = (hermon_mrhdl_t)rsrc->hr_addr; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) /* * Setup and validate the memory region access flags. This means * translating the IBTF's enable flags into the access flags that * will be used in later operations. */ mr->mr_accflag = 0; if (flags & IBT_MR_ENABLE_WINDOW_BIND) mr->mr_accflag |= IBT_MR_WINDOW_BIND; if (flags & IBT_MR_ENABLE_LOCAL_WRITE) mr->mr_accflag |= IBT_MR_LOCAL_WRITE; if (flags & IBT_MR_ENABLE_REMOTE_READ) mr->mr_accflag |= IBT_MR_REMOTE_READ; if (flags & IBT_MR_ENABLE_REMOTE_WRITE) mr->mr_accflag |= IBT_MR_REMOTE_WRITE; if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; /* * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed * from a certain number of "constrained" bits (the least significant * bits) and some number of "unconstrained" bits. The constrained * bits must be set to the index of the entry in the MPT table, but * the unconstrained bits can be set to any value we wish. Note: * if no remote access is required, then the RKey value is not filled * in. Otherwise both Rkey and LKey are given the same value. */ if (mpt) mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); /* * Determine if the memory is from userland and pin the pages * with umem_lockmemory() if necessary. * Then, if this is userland memory, allocate an entry in the * "userland resources database". This will later be added to * the database (after all further memory registration operations are * successful). If we fail here, we must undo the reference counts * and the previous resource allocations. */ mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0); if (mr_is_umem) { umem_len = ptob(btopr(bind->bi_len + ((uintptr_t)bind->bi_addr & PAGEOFFSET))); umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET); umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | DDI_UMEMLOCK_LONGTERM); status = umem_lockmemory(umem_addr, umem_len, umem_flags, &umem_cookie, &hermon_umem_cbops, NULL); if (status != 0) { status = IBT_INSUFF_RESOURCE; goto mrcommon_fail3; } _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len, B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); if (bind->bi_buf == NULL) { status = IBT_INSUFF_RESOURCE; goto mrcommon_fail3; } bind->bi_type = HERMON_BINDHDL_UBUF; bind->bi_buf->b_flags |= B_READ; _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) umapdb = hermon_umap_db_alloc(state->hs_instance, (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, (uint64_t)(uintptr_t)rsrc); if (umapdb == NULL) { status = IBT_INSUFF_RESOURCE; goto mrcommon_fail4; } } /* * Setup the bindinfo for the mtt bind call */ bh = &mr->mr_bindinfo; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh)) bcopy(bind, bh, sizeof (hermon_bind_info_t)); bh->bi_bypass = bind_type; status = hermon_mr_mtt_bind(state, bh, bind_dmahdl, &mtt, &mtt_pgsize_bits, mpt != NULL); if (status != DDI_SUCCESS) { /* * When mtt_bind fails, freerbuf has already been done, * so make sure not to call it again. */ bind->bi_type = bh->bi_type; goto mrcommon_fail5; } mr->mr_logmttpgsz = mtt_pgsize_bits; /* * Allocate MTT reference count (to track shared memory regions). * This reference count resource may never be used on the given * memory region, but if it is ever later registered as "shared" * memory region then this resource will be necessary. If we fail * here, we do pretty much the same as above to clean up. */ status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep, &mtt_refcnt); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto mrcommon_fail6; } mr->mr_mttrefcntp = mtt_refcnt; swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp)) HERMON_MTT_REFCNT_INIT(swrc_tmp); mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); /* * Fill in the MPT entry. This is the final step before passing * ownership of the MPT entry to the Hermon hardware. We use all of * the information collected/calculated above to fill in the * requisite portions of the MPT. Do this ONLY for DMPTs. */ if (mpt == NULL) goto no_passown; bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); mpt_entry.status = HERMON_MPT_SW_OWNERSHIP; mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; mpt_entry.lr = 1; mpt_entry.phys_addr = 0; mpt_entry.reg_win = HERMON_MPT_IS_REGION; mpt_entry.entity_sz = mr->mr_logmttpgsz; mpt_entry.mem_key = mr->mr_lkey; mpt_entry.pd = pd->pd_pdnum; mpt_entry.rem_acc_en = 0; mpt_entry.fast_reg_en = 0; mpt_entry.en_inval = 0; mpt_entry.lkey = 0; mpt_entry.win_cnt = 0; if (bind_override_addr == 0) { mpt_entry.start_addr = bh->bi_addr; } else { bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1); mpt_entry.start_addr = bh->bi_addr; } mpt_entry.reg_win_len = bh->bi_len; mpt_entry.mtt_addr_h = mtt_addr >> 32; /* only 8 more bits */ mpt_entry.mtt_addr_l = mtt_addr >> 3; /* only 29 bits */ /* * Write the MPT entry to hardware. Lastly, we pass ownership of * the entry to the hardware if needed. Note: in general, this * operation shouldn't fail. But if it does, we have to undo * everything we've done above before returning error. * * For Hermon, this routine (which is common to the contexts) will only * set the ownership if needed - the process of passing the context * itself to HW will take care of setting up the MPT (based on type * and index). */ mpt_entry.bnd_qp = 0; /* dMPT for a qp, check for window */ status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); if (status != HERMON_CMD_SUCCESS) { cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", status); if (status == HERMON_CMD_INVALID_STATUS) { hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); } status = ibc_get_ci_failure(0); goto mrcommon_fail7; } if (hermon_rdma_debug & 0x4) IBTF_DPRINTF_L2("mr", " reg: mr %p key %x", mr, hermon_mr_key_swap(mr->mr_rkey)); no_passown: /* * Fill in the rest of the Hermon Memory Region handle. Having * successfully transferred ownership of the MPT, we can update the * following fields for use in further operations on the MR. */ mr->mr_mttaddr = mtt_addr; mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT); mr->mr_mptrsrcp = mpt; mr->mr_mttrsrcp = mtt; mr->mr_pdhdl = pd; mr->mr_rsrcp = rsrc; mr->mr_is_umem = mr_is_umem; mr->mr_is_fmr = 0; mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; mr->mr_umem_cbfunc = NULL; mr->mr_umem_cbarg1 = NULL; mr->mr_umem_cbarg2 = NULL; mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); mr->mr_mpt_type = mpt_type; /* * If this is userland memory, then we need to insert the previously * allocated entry into the "userland resources database". This will * allow for later coordination between the hermon_umap_umemlock_cb() * callback and hermon_mr_deregister(). */ if (mr_is_umem) { hermon_umap_db_add(umapdb); } *mrhdl = mr; return (DDI_SUCCESS); /* * The following is cleanup for all possible failure cases in this routine */ mrcommon_fail7: hermon_rsrc_free(state, &mtt_refcnt); mrcommon_fail6: hermon_mr_mem_unbind(state, bh); bind->bi_type = bh->bi_type; mrcommon_fail5: if (mr_is_umem) { hermon_umap_db_free(umapdb); } mrcommon_fail4: if (mr_is_umem) { /* * Free up the memory ddi_umem_iosetup() allocates * internally. */ if (bind->bi_type == HERMON_BINDHDL_UBUF) { freerbuf(bind->bi_buf); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) bind->bi_type = HERMON_BINDHDL_NONE; _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) } ddi_umem_unlock(umem_cookie); } mrcommon_fail3: hermon_rsrc_free(state, &rsrc); mrcommon_fail2: if (mpt != NULL) hermon_rsrc_free(state, &mpt); mrcommon_fail1: hermon_pd_refcnt_dec(pd); mrcommon_fail: return (status); } /* * hermon_dma_mr_register() * Context: Can be called from base context. */ int hermon_dma_mr_register(hermon_state_t *state, hermon_pdhdl_t pd, ibt_dmr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl) { hermon_rsrc_t *mpt, *rsrc; hermon_hw_dmpt_t mpt_entry; hermon_mrhdl_t mr; ibt_mr_flags_t flags; uint_t sleep; int status; /* Extract the flags field */ flags = mr_attr->dmr_flags; /* * Check the sleep flag. Ensure that it is consistent with the * current thread context (i.e. if we are currently in the interrupt * context, then we shouldn't be attempting to sleep). */ sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP; if ((sleep == HERMON_SLEEP) && (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { status = IBT_INVALID_PARAM; goto mrcommon_fail; } /* Increment the reference count on the protection domain (PD) */ hermon_pd_refcnt_inc(pd); /* * Allocate an MPT entry. This will be filled in with all the * necessary parameters to define the memory region. And then * ownership will be passed to the hardware in the final step * below. If we fail here, we must undo the protection domain * reference count. */ status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto mrcommon_fail1; } /* * Allocate the software structure for tracking the memory region (i.e. * the Hermon Memory Region handle). If we fail here, we must undo * the protection domain reference count and the previous resource * allocation. */ status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto mrcommon_fail2; } mr = (hermon_mrhdl_t)rsrc->hr_addr; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) bzero(mr, sizeof (*mr)); /* * Setup and validate the memory region access flags. This means * translating the IBTF's enable flags into the access flags that * will be used in later operations. */ mr->mr_accflag = 0; if (flags & IBT_MR_ENABLE_WINDOW_BIND) mr->mr_accflag |= IBT_MR_WINDOW_BIND; if (flags & IBT_MR_ENABLE_LOCAL_WRITE) mr->mr_accflag |= IBT_MR_LOCAL_WRITE; if (flags & IBT_MR_ENABLE_REMOTE_READ) mr->mr_accflag |= IBT_MR_REMOTE_READ; if (flags & IBT_MR_ENABLE_REMOTE_WRITE) mr->mr_accflag |= IBT_MR_REMOTE_WRITE; if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; /* * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed * from a certain number of "constrained" bits (the least significant * bits) and some number of "unconstrained" bits. The constrained * bits must be set to the index of the entry in the MPT table, but * the unconstrained bits can be set to any value we wish. Note: * if no remote access is required, then the RKey value is not filled * in. Otherwise both Rkey and LKey are given the same value. */ if (mpt) mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); /* * Fill in the MPT entry. This is the final step before passing * ownership of the MPT entry to the Hermon hardware. We use all of * the information collected/calculated above to fill in the * requisite portions of the MPT. Do this ONLY for DMPTs. */ bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); mpt_entry.status = HERMON_MPT_SW_OWNERSHIP; mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; mpt_entry.lr = 1; mpt_entry.phys_addr = 1; /* critical bit for this */ mpt_entry.reg_win = HERMON_MPT_IS_REGION; mpt_entry.entity_sz = mr->mr_logmttpgsz; mpt_entry.mem_key = mr->mr_lkey; mpt_entry.pd = pd->pd_pdnum; mpt_entry.rem_acc_en = 0; mpt_entry.fast_reg_en = 0; mpt_entry.en_inval = 0; mpt_entry.lkey = 0; mpt_entry.win_cnt = 0; mpt_entry.start_addr = mr_attr->dmr_paddr; mpt_entry.reg_win_len = mr_attr->dmr_len; if (mr_attr->dmr_len == 0) mpt_entry.len_b64 = 1; /* needed for 2^^64 length */ mpt_entry.mtt_addr_h = 0; mpt_entry.mtt_addr_l = 0; /* * Write the MPT entry to hardware. Lastly, we pass ownership of * the entry to the hardware if needed. Note: in general, this * operation shouldn't fail. But if it does, we have to undo * everything we've done above before returning error. * * For Hermon, this routine (which is common to the contexts) will only * set the ownership if needed - the process of passing the context * itself to HW will take care of setting up the MPT (based on type * and index). */ mpt_entry.bnd_qp = 0; /* dMPT for a qp, check for window */ status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); if (status != HERMON_CMD_SUCCESS) { cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", status); if (status == HERMON_CMD_INVALID_STATUS) { hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); } status = ibc_get_ci_failure(0); goto mrcommon_fail7; } /* * Fill in the rest of the Hermon Memory Region handle. Having * successfully transferred ownership of the MPT, we can update the * following fields for use in further operations on the MR. */ mr->mr_mttaddr = 0; mr->mr_log2_pgsz = 0; mr->mr_mptrsrcp = mpt; mr->mr_mttrsrcp = NULL; mr->mr_pdhdl = pd; mr->mr_rsrcp = rsrc; mr->mr_is_umem = 0; mr->mr_is_fmr = 0; mr->mr_umemcookie = NULL; mr->mr_umem_cbfunc = NULL; mr->mr_umem_cbarg1 = NULL; mr->mr_umem_cbarg2 = NULL; mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); mr->mr_mpt_type = HERMON_MPT_DMPT; *mrhdl = mr; return (DDI_SUCCESS); /* * The following is cleanup for all possible failure cases in this routine */ mrcommon_fail7: hermon_rsrc_free(state, &rsrc); mrcommon_fail2: hermon_rsrc_free(state, &mpt); mrcommon_fail1: hermon_pd_refcnt_dec(pd); mrcommon_fail: return (status); } /* * hermon_mr_alloc_lkey() * Context: Can be called from base context. */ int hermon_mr_alloc_lkey(hermon_state_t *state, hermon_pdhdl_t pd, ibt_lkey_flags_t flags, uint_t nummtt, hermon_mrhdl_t *mrhdl) { hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; hermon_sw_refcnt_t *swrc_tmp; hermon_hw_dmpt_t mpt_entry; hermon_mrhdl_t mr; uint64_t mtt_addr; uint_t sleep; int status; /* Increment the reference count on the protection domain (PD) */ hermon_pd_refcnt_inc(pd); sleep = (flags & IBT_KEY_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP; /* * Allocate an MPT entry. This will be filled in with "some" of the * necessary parameters to define the memory region. And then * ownership will be passed to the hardware in the final step * below. If we fail here, we must undo the protection domain * reference count. * * The MTTs will get filled in when the FRWR is processed. */ status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto alloclkey_fail1; } /* * Allocate the software structure for tracking the memory region (i.e. * the Hermon Memory Region handle). If we fail here, we must undo * the protection domain reference count and the previous resource * allocation. */ status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto alloclkey_fail2; } mr = (hermon_mrhdl_t)rsrc->hr_addr; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) bzero(mr, sizeof (*mr)); mr->mr_bindinfo.bi_type = HERMON_BINDHDL_LKEY; mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto alloclkey_fail3; } mr->mr_logmttpgsz = PAGESHIFT; /* * Allocate MTT reference count (to track shared memory regions). * This reference count resource may never be used on the given * memory region, but if it is ever later registered as "shared" * memory region then this resource will be necessary. If we fail * here, we do pretty much the same as above to clean up. */ status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep, &mtt_refcnt); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto alloclkey_fail4; } mr->mr_mttrefcntp = mtt_refcnt; swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp)) HERMON_MTT_REFCNT_INIT(swrc_tmp); mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); mpt_entry.status = HERMON_MPT_FREE; mpt_entry.lw = 1; mpt_entry.lr = 1; mpt_entry.reg_win = HERMON_MPT_IS_REGION; mpt_entry.entity_sz = mr->mr_logmttpgsz; mpt_entry.mem_key = mr->mr_lkey; mpt_entry.pd = pd->pd_pdnum; mpt_entry.fast_reg_en = 1; mpt_entry.rem_acc_en = 1; mpt_entry.en_inval = 1; if (flags & IBT_KEY_REMOTE) { mpt_entry.ren_inval = 1; } mpt_entry.mtt_size = nummtt; mpt_entry.mtt_addr_h = mtt_addr >> 32; /* only 8 more bits */ mpt_entry.mtt_addr_l = mtt_addr >> 3; /* only 29 bits */ /* * Write the MPT entry to hardware. Lastly, we pass ownership of * the entry to the hardware if needed. Note: in general, this * operation shouldn't fail. But if it does, we have to undo * everything we've done above before returning error. * * For Hermon, this routine (which is common to the contexts) will only * set the ownership if needed - the process of passing the context * itself to HW will take care of setting up the MPT (based on type * and index). */ status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); if (status != HERMON_CMD_SUCCESS) { cmn_err(CE_CONT, "Hermon: alloc_lkey: SW2HW_MPT command " "failed: %08x\n", status); if (status == HERMON_CMD_INVALID_STATUS) { hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); } status = ibc_get_ci_failure(0); goto alloclkey_fail5; } /* * Fill in the rest of the Hermon Memory Region handle. Having * successfully transferred ownership of the MPT, we can update the * following fields for use in further operations on the MR. */ mr->mr_accflag = IBT_MR_LOCAL_WRITE; mr->mr_mttaddr = mtt_addr; mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT); mr->mr_mptrsrcp = mpt; mr->mr_mttrsrcp = mtt; mr->mr_pdhdl = pd; mr->mr_rsrcp = rsrc; mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); mr->mr_rkey = mr->mr_lkey; mr->mr_mpt_type = HERMON_MPT_DMPT; *mrhdl = mr; return (DDI_SUCCESS); alloclkey_fail5: hermon_rsrc_free(state, &mtt_refcnt); alloclkey_fail4: hermon_rsrc_free(state, &mtt); alloclkey_fail3: hermon_rsrc_free(state, &rsrc); alloclkey_fail2: hermon_rsrc_free(state, &mpt); alloclkey_fail1: hermon_pd_refcnt_dec(pd); return (status); } /* * hermon_mr_fexch_mpt_init() * Context: Can be called from base context. * * This is the same as alloc_lkey, but not returning an mrhdl. */ int hermon_mr_fexch_mpt_init(hermon_state_t *state, hermon_pdhdl_t pd, uint32_t mpt_indx, uint_t nummtt, uint64_t mtt_addr, uint_t sleep) { hermon_hw_dmpt_t mpt_entry; int status; /* * The MTTs will get filled in when the FRWR is processed. */ bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); mpt_entry.status = HERMON_MPT_FREE; mpt_entry.lw = 1; mpt_entry.lr = 1; mpt_entry.rw = 1; mpt_entry.rr = 1; mpt_entry.reg_win = HERMON_MPT_IS_REGION; mpt_entry.entity_sz = PAGESHIFT; mpt_entry.mem_key = mpt_indx; mpt_entry.pd = pd->pd_pdnum; mpt_entry.fast_reg_en = 1; mpt_entry.rem_acc_en = 1; mpt_entry.en_inval = 1; mpt_entry.ren_inval = 1; mpt_entry.mtt_size = nummtt; mpt_entry.mtt_addr_h = mtt_addr >> 32; /* only 8 more bits */ mpt_entry.mtt_addr_l = mtt_addr >> 3; /* only 29 bits */ /* * Write the MPT entry to hardware. Lastly, we pass ownership of * the entry to the hardware if needed. Note: in general, this * operation shouldn't fail. But if it does, we have to undo * everything we've done above before returning error. * * For Hermon, this routine (which is common to the contexts) will only * set the ownership if needed - the process of passing the context * itself to HW will take care of setting up the MPT (based on type * and index). */ status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, sizeof (hermon_hw_dmpt_t), mpt_indx, sleep); if (status != HERMON_CMD_SUCCESS) { cmn_err(CE_CONT, "Hermon: fexch_mpt_init: SW2HW_MPT command " "failed: %08x\n", status); if (status == HERMON_CMD_INVALID_STATUS) { hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); } status = ibc_get_ci_failure(0); return (status); } /* Increment the reference count on the protection domain (PD) */ hermon_pd_refcnt_inc(pd); return (DDI_SUCCESS); } /* * hermon_mr_fexch_mpt_fini() * Context: Can be called from base context. * * This is the same as deregister_mr, without an mrhdl. */ int hermon_mr_fexch_mpt_fini(hermon_state_t *state, hermon_pdhdl_t pd, uint32_t mpt_indx, uint_t sleep) { int status; status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL, 0, mpt_indx, sleep); if (status != DDI_SUCCESS) { cmn_err(CE_CONT, "Hermon: fexch_mpt_fini: HW2SW_MPT command " "failed: %08x\n", status); if (status == HERMON_CMD_INVALID_STATUS) { hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); } status = ibc_get_ci_failure(0); return (status); } /* Decrement the reference count on the protection domain (PD) */ hermon_pd_refcnt_dec(pd); return (DDI_SUCCESS); } /* * hermon_mr_mtt_bind() * Context: Can be called from interrupt or base context. */ int hermon_mr_mtt_bind(hermon_state_t *state, hermon_bind_info_t *bind, ddi_dma_handle_t bind_dmahdl, hermon_rsrc_t **mtt, uint_t *mtt_pgsize_bits, uint_t is_buffer) { uint64_t nummtt; uint_t sleep; int status; /* * Check the sleep flag. Ensure that it is consistent with the * current thread context (i.e. if we are currently in the interrupt * context, then we shouldn't be attempting to sleep). */ sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP : HERMON_SLEEP; if ((sleep == HERMON_SLEEP) && (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { status = IBT_INVALID_PARAM; goto mrmttbind_fail; } /* * Bind the memory and determine the mapped addresses. This is * the first of two routines that do all the "heavy lifting" for * the Hermon memory registration routines. The hermon_mr_mem_bind() * routine takes the "bind" struct with all its fields filled * in and returns a list of DMA cookies (for the PCI mapped addresses * corresponding to the specified address region) which are used by * the hermon_mr_fast_mtt_write() routine below. If we fail here, we * must undo all the previous resource allocation (and PD reference * count). */ status = hermon_mr_mem_bind(state, bind, bind_dmahdl, sleep, is_buffer); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto mrmttbind_fail; } /* * Determine number of pages spanned. This routine uses the * information in the "bind" struct to determine the required * number of MTT entries needed (and returns the suggested page size - * as a "power-of-2" - for each MTT entry). */ nummtt = hermon_mr_nummtt_needed(state, bind, mtt_pgsize_bits); /* * Allocate the MTT entries. Use the calculations performed above to * allocate the required number of MTT entries. If we fail here, we * must not only undo all the previous resource allocation (and PD * reference count), but we must also unbind the memory. */ status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, mtt); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto mrmttbind_fail2; } /* * Write the mapped addresses into the MTT entries. This is part two * of the "heavy lifting" routines that we talked about above. Note: * we pass the suggested page size from the earlier operation here. * And if we fail here, we again do pretty much the same huge clean up. */ status = hermon_mr_fast_mtt_write(state, *mtt, bind, *mtt_pgsize_bits); if (status != DDI_SUCCESS) { /* * hermon_mr_fast_mtt_write() returns DDI_FAILURE * only if it detects a HW error during DMA. */ hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); status = ibc_get_ci_failure(0); goto mrmttbind_fail3; } return (DDI_SUCCESS); /* * The following is cleanup for all possible failure cases in this routine */ mrmttbind_fail3: hermon_rsrc_free(state, mtt); mrmttbind_fail2: hermon_mr_mem_unbind(state, bind); mrmttbind_fail: return (status); } /* * hermon_mr_mtt_unbind() * Context: Can be called from interrupt or base context. */ int hermon_mr_mtt_unbind(hermon_state_t *state, hermon_bind_info_t *bind, hermon_rsrc_t *mtt) { /* * Free up the MTT entries and unbind the memory. Here, as above, we * attempt to free these resources only if it is appropriate to do so. */ hermon_mr_mem_unbind(state, bind); hermon_rsrc_free(state, &mtt); return (DDI_SUCCESS); } /* * hermon_mr_common_rereg() * Context: Can be called from interrupt or base context. */ static int hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr, hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op) { hermon_rsrc_t *mpt; ibt_mr_attr_flags_t acc_flags_to_use; ibt_mr_flags_t flags; hermon_pdhdl_t pd_to_use; hermon_hw_dmpt_t mpt_entry; uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use; uint_t sleep, dereg_level; int status; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) /* * Check here to see if the memory region corresponds to a userland * mapping. Reregistration of userland memory regions is not * currently supported. Return failure. */ if (mr->mr_is_umem) { status = IBT_MR_HDL_INVALID; goto mrrereg_fail; } mutex_enter(&mr->mr_lock); /* Pull MPT resource pointer from the Hermon Memory Region handle */ mpt = mr->mr_mptrsrcp; /* Extract the flags field from the hermon_bind_info_t */ flags = bind->bi_flags; /* * Check the sleep flag. Ensure that it is consistent with the * current thread context (i.e. if we are currently in the interrupt * context, then we shouldn't be attempting to sleep). */ sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP; if ((sleep == HERMON_SLEEP) && (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { mutex_exit(&mr->mr_lock); status = IBT_INVALID_PARAM; goto mrrereg_fail; } /* * First step is to temporarily invalidate the MPT entry. This * regains ownership from the hardware, and gives us the opportunity * to modify the entry. Note: The HW2SW_MPT command returns the * current MPT entry contents. These are saved away here because * they will be reused in a later step below. If the region has * bound memory windows that we fail returning an "in use" error code. * Otherwise, this is an unexpected error and we deregister the * memory region and return error. * * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect * against holding the lock around this rereg call in all contexts. */ status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry, sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN); if (status != HERMON_CMD_SUCCESS) { mutex_exit(&mr->mr_lock); if (status == HERMON_CMD_REG_BOUND) { return (IBT_MR_IN_USE); } else { cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: " "%08x\n", status); if (status == HERMON_CMD_INVALID_STATUS) { hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); } /* * Call deregister and ensure that all current * resources get freed up */ if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL, sleep) != DDI_SUCCESS) { HERMON_WARNING(state, "failed to deregister " "memory region"); } return (ibc_get_ci_failure(0)); } } /* * If we're changing the protection domain, then validate the new one */ if (flags & IBT_MR_CHANGE_PD) { /* Check for valid PD handle pointer */ if (pd == NULL) { mutex_exit(&mr->mr_lock); /* * Call deregister and ensure that all current * resources get properly freed up. Unnecessary * here to attempt to regain software ownership * of the MPT entry as that has already been * done above. */ if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) { HERMON_WARNING(state, "failed to deregister " "memory region"); } status = IBT_PD_HDL_INVALID; goto mrrereg_fail; } /* Use the new PD handle in all operations below */ pd_to_use = pd; } else { /* Use the current PD handle in all operations below */ pd_to_use = mr->mr_pdhdl; } /* * If we're changing access permissions, then validate the new ones */ if (flags & IBT_MR_CHANGE_ACCESS) { /* * Validate the access flags. Both remote write and remote * atomic require the local write flag to be set */ if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) || (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) && !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) { mutex_exit(&mr->mr_lock); /* * Call deregister and ensure that all current * resources get properly freed up. Unnecessary * here to attempt to regain software ownership * of the MPT entry as that has already been * done above. */ if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) { HERMON_WARNING(state, "failed to deregister " "memory region"); } status = IBT_MR_ACCESS_REQ_INVALID; goto mrrereg_fail; } /* * Setup and validate the memory region access flags. This * means translating the IBTF's enable flags into the access * flags that will be used in later operations. */ acc_flags_to_use = 0; if (flags & IBT_MR_ENABLE_WINDOW_BIND) acc_flags_to_use |= IBT_MR_WINDOW_BIND; if (flags & IBT_MR_ENABLE_LOCAL_WRITE) acc_flags_to_use |= IBT_MR_LOCAL_WRITE; if (flags & IBT_MR_ENABLE_REMOTE_READ) acc_flags_to_use |= IBT_MR_REMOTE_READ; if (flags & IBT_MR_ENABLE_REMOTE_WRITE) acc_flags_to_use |= IBT_MR_REMOTE_WRITE; if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC; } else { acc_flags_to_use = mr->mr_accflag; } /* * If we're modifying the translation, then figure out whether * we can reuse the current MTT resources. This means calling * hermon_mr_rereg_xlat_helper() which does most of the heavy lifting * for the reregistration. If the current memory region contains * sufficient MTT entries for the new regions, then it will be * reused and filled in. Otherwise, new entries will be allocated, * the old ones will be freed, and the new entries will be filled * in. Note: If we're not modifying the translation, then we * should already have all the information we need to update the MPT. * Also note: If hermon_mr_rereg_xlat_helper() fails, it will return * a "dereg_level" which is the level of cleanup that needs to be * passed to hermon_mr_deregister() to finish the cleanup. */ if (flags & IBT_MR_CHANGE_TRANSLATION) { status = hermon_mr_rereg_xlat_helper(state, mr, bind, op, &mtt_addr_to_use, sleep, &dereg_level); if (status != DDI_SUCCESS) { mutex_exit(&mr->mr_lock); /* * Call deregister and ensure that all resources get * properly freed up. */ if (hermon_mr_deregister(state, &mr, dereg_level, sleep) != DDI_SUCCESS) { HERMON_WARNING(state, "failed to deregister " "memory region"); } goto mrrereg_fail; } vaddr_to_use = mr->mr_bindinfo.bi_addr; len_to_use = mr->mr_bindinfo.bi_len; } else { mtt_addr_to_use = mr->mr_mttaddr; vaddr_to_use = mr->mr_bindinfo.bi_addr; len_to_use = mr->mr_bindinfo.bi_len; } /* * Calculate new keys (Lkey, Rkey) from MPT index. Just like they were * when the region was first registered, each key is formed from * "constrained" bits and "unconstrained" bits. Note: If no remote * access is required, then the RKey value is not filled in. Otherwise * both Rkey and LKey are given the same value. */ mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); if ((acc_flags_to_use & IBT_MR_REMOTE_READ) || (acc_flags_to_use & IBT_MR_REMOTE_WRITE) || (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) { mr->mr_rkey = mr->mr_lkey; } else mr->mr_rkey = 0; /* * Fill in the MPT entry. This is the final step before passing * ownership of the MPT entry to the Hermon hardware. We use all of * the information collected/calculated above to fill in the * requisite portions of the MPT. */ bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); mpt_entry.status = HERMON_MPT_SW_OWNERSHIP; mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND) ? 1 : 0; mpt_entry.atomic = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; mpt_entry.rw = (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ? 1 : 0; mpt_entry.rr = (acc_flags_to_use & IBT_MR_REMOTE_READ) ? 1 : 0; mpt_entry.lw = (acc_flags_to_use & IBT_MR_LOCAL_WRITE) ? 1 : 0; mpt_entry.lr = 1; mpt_entry.phys_addr = 0; mpt_entry.reg_win = HERMON_MPT_IS_REGION; mpt_entry.entity_sz = mr->mr_logmttpgsz; mpt_entry.mem_key = mr->mr_lkey; mpt_entry.pd = pd_to_use->pd_pdnum; mpt_entry.start_addr = vaddr_to_use; mpt_entry.reg_win_len = len_to_use; mpt_entry.mtt_addr_h = mtt_addr_to_use >> 32; mpt_entry.mtt_addr_l = mtt_addr_to_use >> 3; /* * Write the updated MPT entry to hardware * * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect * against holding the lock around this rereg call in all contexts. */ status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN); if (status != HERMON_CMD_SUCCESS) { mutex_exit(&mr->mr_lock); cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", status); if (status == HERMON_CMD_INVALID_STATUS) { hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); } /* * Call deregister and ensure that all current resources get * properly freed up. Unnecessary here to attempt to regain * software ownership of the MPT entry as that has already * been done above. */ if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) { HERMON_WARNING(state, "failed to deregister memory " "region"); } return (ibc_get_ci_failure(0)); } /* * If we're changing PD, then update their reference counts now. * This means decrementing the reference count on the old PD and * incrementing the reference count on the new PD. */ if (flags & IBT_MR_CHANGE_PD) { hermon_pd_refcnt_dec(mr->mr_pdhdl); hermon_pd_refcnt_inc(pd); } /* * Update the contents of the Hermon Memory Region handle to reflect * what has been changed. */ mr->mr_pdhdl = pd_to_use; mr->mr_accflag = acc_flags_to_use; mr->mr_is_umem = 0; mr->mr_is_fmr = 0; mr->mr_umemcookie = NULL; mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); /* New MR handle is same as the old */ *mrhdl_new = mr; mutex_exit(&mr->mr_lock); return (DDI_SUCCESS); mrrereg_fail: return (status); } /* * hermon_mr_rereg_xlat_helper * Context: Can be called from interrupt or base context. * Note: This routine expects the "mr_lock" to be held when it * is called. Upon returning failure, this routine passes information * about what "dereg_level" should be passed to hermon_mr_deregister(). */ static int hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr, hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr, uint_t sleep, uint_t *dereg_level) { hermon_rsrc_t *mtt, *mtt_refcnt; hermon_sw_refcnt_t *swrc_old, *swrc_new; ddi_dma_handle_t dmahdl; uint64_t nummtt_needed, nummtt_in_currrsrc, max_sz; uint_t mtt_pgsize_bits, bind_type, reuse_dmahdl; int status; ASSERT(MUTEX_HELD(&mr->mr_lock)); /* * Check the "options" flag. Currently this flag tells the driver * whether or not the region should be bound normally (i.e. with * entries written into the PCI IOMMU) or whether it should be * registered to bypass the IOMMU. */ if (op == NULL) { bind_type = HERMON_BINDMEM_NORMAL; } else { bind_type = op->mro_bind_type; } /* * Check for invalid length. Check is the length is zero or if the * length is larger than the maximum configured value. Return error * if it is. */ max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz); if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { /* * Deregister will be called upon returning failure from this * routine. This will ensure that all current resources get * properly freed up. Unnecessary to attempt to regain * software ownership of the MPT entry as that has already * been done above (in hermon_mr_reregister()) */ *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT; status = IBT_MR_LEN_INVALID; goto mrrereghelp_fail; } /* * Determine the number of pages necessary for new region and the * number of pages supported by the current MTT resources */ nummtt_needed = hermon_mr_nummtt_needed(state, bind, &mtt_pgsize_bits); nummtt_in_currrsrc = mr->mr_mttrsrcp->hr_len >> HERMON_MTT_SIZE_SHIFT; /* * Depending on whether we have enough pages or not, the next step is * to fill in a set of MTT entries that reflect the new mapping. In * the first case below, we already have enough entries. This means * we need to unbind the memory from the previous mapping, bind the * memory for the new mapping, write the new MTT entries, and update * the mr to reflect the changes. * In the second case below, we do not have enough entries in the * current mapping. So, in this case, we need not only to unbind the * current mapping, but we need to free up the MTT resources associated * with that mapping. After we've successfully done that, we continue * by binding the new memory, allocating new MTT entries, writing the * new MTT entries, and updating the mr to reflect the changes. */ /* * If this region is being shared (i.e. MTT refcount != 1), then we * can't reuse the current MTT resources regardless of their size. * Instead we'll need to alloc new ones (below) just as if there * hadn't been enough room in the current entries. */ swrc_old = (hermon_sw_refcnt_t *)mr->mr_mttrefcntp->hr_addr; if (HERMON_MTT_IS_NOT_SHARED(swrc_old) && (nummtt_needed <= nummtt_in_currrsrc)) { /* * Unbind the old mapping for this memory region, but retain * the ddi_dma_handle_t (if possible) for reuse in the bind * operation below. Note: If original memory region was * bound for IOMMU bypass and the new region can not use * bypass, then a new DMA handle will be necessary. */ if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { mr->mr_bindinfo.bi_free_dmahdl = 0; hermon_mr_mem_unbind(state, &mr->mr_bindinfo); dmahdl = mr->mr_bindinfo.bi_dmahdl; reuse_dmahdl = 1; } else { hermon_mr_mem_unbind(state, &mr->mr_bindinfo); dmahdl = NULL; reuse_dmahdl = 0; } /* * Bind the new memory and determine the mapped addresses. * As described, this routine and hermon_mr_fast_mtt_write() * do the majority of the work for the memory registration * operations. Note: When we successfully finish the binding, * we will set the "bi_free_dmahdl" flag to indicate that * even though we may have reused the ddi_dma_handle_t we do * wish it to be freed up at some later time. Note also that * if we fail, we may need to cleanup the ddi_dma_handle_t. */ bind->bi_bypass = bind_type; status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1); if (status != DDI_SUCCESS) { if (reuse_dmahdl) { ddi_dma_free_handle(&dmahdl); } /* * Deregister will be called upon returning failure * from this routine. This will ensure that all * current resources get properly freed up. * Unnecessary to attempt to regain software ownership * of the MPT entry as that has already been done * above (in hermon_mr_reregister()). Also unnecessary * to attempt to unbind the memory. */ *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; status = IBT_INSUFF_RESOURCE; goto mrrereghelp_fail; } if (reuse_dmahdl) { bind->bi_free_dmahdl = 1; } /* * Using the new mapping, but reusing the current MTT * resources, write the updated entries to MTT */ mtt = mr->mr_mttrsrcp; status = hermon_mr_fast_mtt_write(state, mtt, bind, mtt_pgsize_bits); if (status != DDI_SUCCESS) { /* * Deregister will be called upon returning failure * from this routine. This will ensure that all * current resources get properly freed up. * Unnecessary to attempt to regain software ownership * of the MPT entry as that has already been done * above (in hermon_mr_reregister()). Also unnecessary * to attempt to unbind the memory. * * But we do need to unbind the newly bound memory * before returning. */ hermon_mr_mem_unbind(state, bind); *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; /* * hermon_mr_fast_mtt_write() returns DDI_FAILURE * only if it detects a HW error during DMA. */ hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); status = ibc_get_ci_failure(0); goto mrrereghelp_fail; } /* Put the updated information into the Mem Region handle */ mr->mr_bindinfo = *bind; mr->mr_logmttpgsz = mtt_pgsize_bits; } else { /* * Check if the memory region MTT is shared by any other MRs. * Since the resource may be shared between multiple memory * regions (as a result of a "RegisterSharedMR()" verb) it is * important that we not unbind any resources prematurely. */ if (!HERMON_MTT_IS_SHARED(swrc_old)) { /* * Unbind the old mapping for this memory region, but * retain the ddi_dma_handle_t for reuse in the bind * operation below. Note: This can only be done here * because the region being reregistered is not * currently shared. Also if original memory region * was bound for IOMMU bypass and the new region can * not use bypass, then a new DMA handle will be * necessary. */ if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { mr->mr_bindinfo.bi_free_dmahdl = 0; hermon_mr_mem_unbind(state, &mr->mr_bindinfo); dmahdl = mr->mr_bindinfo.bi_dmahdl; reuse_dmahdl = 1; } else { hermon_mr_mem_unbind(state, &mr->mr_bindinfo); dmahdl = NULL; reuse_dmahdl = 0; } } else { dmahdl = NULL; reuse_dmahdl = 0; } /* * Bind the new memory and determine the mapped addresses. * As described, this routine and hermon_mr_fast_mtt_write() * do the majority of the work for the memory registration * operations. Note: When we successfully finish the binding, * we will set the "bi_free_dmahdl" flag to indicate that * even though we may have reused the ddi_dma_handle_t we do * wish it to be freed up at some later time. Note also that * if we fail, we may need to cleanup the ddi_dma_handle_t. */ bind->bi_bypass = bind_type; status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1); if (status != DDI_SUCCESS) { if (reuse_dmahdl) { ddi_dma_free_handle(&dmahdl); } /* * Deregister will be called upon returning failure * from this routine. This will ensure that all * current resources get properly freed up. * Unnecessary to attempt to regain software ownership * of the MPT entry as that has already been done * above (in hermon_mr_reregister()). Also unnecessary * to attempt to unbind the memory. */ *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; status = IBT_INSUFF_RESOURCE; goto mrrereghelp_fail; } if (reuse_dmahdl) { bind->bi_free_dmahdl = 1; } /* * Allocate the new MTT entries resource */ status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt_needed, sleep, &mtt); if (status != DDI_SUCCESS) { /* * Deregister will be called upon returning failure * from this routine. This will ensure that all * current resources get properly freed up. * Unnecessary to attempt to regain software ownership * of the MPT entry as that has already been done * above (in hermon_mr_reregister()). Also unnecessary * to attempt to unbind the memory. * * But we do need to unbind the newly bound memory * before returning. */ hermon_mr_mem_unbind(state, bind); *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; status = IBT_INSUFF_RESOURCE; goto mrrereghelp_fail; } /* * Allocate MTT reference count (to track shared memory * regions). As mentioned elsewhere above, this reference * count resource may never be used on the given memory region, * but if it is ever later registered as a "shared" memory * region then this resource will be necessary. Note: This * is only necessary here if the existing memory region is * already being shared (because otherwise we already have * a useable reference count resource). */ if (HERMON_MTT_IS_SHARED(swrc_old)) { status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep, &mtt_refcnt); if (status != DDI_SUCCESS) { /* * Deregister will be called upon returning * failure from this routine. This will ensure * that all current resources get properly * freed up. Unnecessary to attempt to regain * software ownership of the MPT entry as that * has already been done above (in * hermon_mr_reregister()). Also unnecessary * to attempt to unbind the memory. * * But we need to unbind the newly bound * memory and free up the newly allocated MTT * entries before returning. */ hermon_mr_mem_unbind(state, bind); hermon_rsrc_free(state, &mtt); *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; status = IBT_INSUFF_RESOURCE; goto mrrereghelp_fail; } swrc_new = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new)) HERMON_MTT_REFCNT_INIT(swrc_new); } else { mtt_refcnt = mr->mr_mttrefcntp; } /* * Using the new mapping and the new MTT resources, write the * updated entries to MTT */ status = hermon_mr_fast_mtt_write(state, mtt, bind, mtt_pgsize_bits); if (status != DDI_SUCCESS) { /* * Deregister will be called upon returning failure * from this routine. This will ensure that all * current resources get properly freed up. * Unnecessary to attempt to regain software ownership * of the MPT entry as that has already been done * above (in hermon_mr_reregister()). Also unnecessary * to attempt to unbind the memory. * * But we need to unbind the newly bound memory, * free up the newly allocated MTT entries, and * (possibly) free the new MTT reference count * resource before returning. */ if (HERMON_MTT_IS_SHARED(swrc_old)) { hermon_rsrc_free(state, &mtt_refcnt); } hermon_mr_mem_unbind(state, bind); hermon_rsrc_free(state, &mtt); *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; status = IBT_INSUFF_RESOURCE; goto mrrereghelp_fail; } /* * Check if the memory region MTT is shared by any other MRs. * Since the resource may be shared between multiple memory * regions (as a result of a "RegisterSharedMR()" verb) it is * important that we not free up any resources prematurely. */ if (HERMON_MTT_IS_SHARED(swrc_old)) { /* Decrement MTT reference count for "old" region */ (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp); } else { /* Free up the old MTT entries resource */ hermon_rsrc_free(state, &mr->mr_mttrsrcp); } /* Put the updated information into the mrhdl */ mr->mr_bindinfo = *bind; mr->mr_logmttpgsz = mtt_pgsize_bits; mr->mr_mttrsrcp = mtt; mr->mr_mttrefcntp = mtt_refcnt; } /* * Calculate and return the updated MTT address (in the DDR address * space). This will be used by the caller (hermon_mr_reregister) in * the updated MPT entry */ *mtt_addr = mtt->hr_indx << HERMON_MTT_SIZE_SHIFT; return (DDI_SUCCESS); mrrereghelp_fail: return (status); } /* * hermon_mr_nummtt_needed() * Context: Can be called from interrupt or base context. */ /* ARGSUSED */ static uint64_t hermon_mr_nummtt_needed(hermon_state_t *state, hermon_bind_info_t *bind, uint_t *mtt_pgsize_bits) { uint64_t pg_offset_mask; uint64_t pg_offset, tmp_length; /* * For now we specify the page size as 8Kb (the default page size for * the sun4u architecture), or 4Kb for x86. Figure out optimal page * size by examining the dmacookies */ *mtt_pgsize_bits = PAGESHIFT; pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1; pg_offset = bind->bi_addr & pg_offset_mask; tmp_length = pg_offset + (bind->bi_len - 1); return ((tmp_length >> *mtt_pgsize_bits) + 1); } /* * hermon_mr_mem_bind() * Context: Can be called from interrupt or base context. */ static int hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind, ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer) { ddi_dma_attr_t dma_attr; int (*callback)(caddr_t); int status; /* bi_type must be set to a meaningful value to get a bind handle */ ASSERT(bind->bi_type == HERMON_BINDHDL_VADDR || bind->bi_type == HERMON_BINDHDL_BUF || bind->bi_type == HERMON_BINDHDL_UBUF); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) /* Set the callback flag appropriately */ callback = (sleep == HERMON_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT; /* * Initialize many of the default DMA attributes. Then, if we're * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag. */ if (dmahdl == NULL) { hermon_dma_attr_init(state, &dma_attr); #ifdef __sparc if (bind->bi_bypass == HERMON_BINDMEM_BYPASS) { dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; } #endif /* set RO if needed - tunable set and 'is_buffer' is non-0 */ if (is_buffer) { if (! (bind->bi_flags & IBT_MR_DISABLE_RO)) { if ((bind->bi_type != HERMON_BINDHDL_UBUF) && (hermon_kernel_data_ro == HERMON_RO_ENABLED)) { dma_attr.dma_attr_flags |= DDI_DMA_RELAXED_ORDERING; } if (((bind->bi_type == HERMON_BINDHDL_UBUF) && (hermon_user_data_ro == HERMON_RO_ENABLED))) { dma_attr.dma_attr_flags |= DDI_DMA_RELAXED_ORDERING; } } } /* Allocate a DMA handle for the binding */ status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, callback, NULL, &bind->bi_dmahdl); if (status != DDI_SUCCESS) { return (status); } bind->bi_free_dmahdl = 1; } else { bind->bi_dmahdl = dmahdl; bind->bi_free_dmahdl = 0; } /* * Bind the memory to get the PCI mapped addresses. The decision * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle() * is determined by the "bi_type" flag. Note: if the bind operation * fails then we have to free up the DMA handle and return error. */ if (bind->bi_type == HERMON_BINDHDL_VADDR) { status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL, (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len, (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt); } else { /* HERMON_BINDHDL_BUF or HERMON_BINDHDL_UBUF */ status = ddi_dma_buf_bind_handle(bind->bi_dmahdl, bind->bi_buf, (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt); } if (status != DDI_DMA_MAPPED) { if (bind->bi_free_dmahdl != 0) { ddi_dma_free_handle(&bind->bi_dmahdl); } return (status); } return (DDI_SUCCESS); } /* * hermon_mr_mem_unbind() * Context: Can be called from interrupt or base context. */ static void hermon_mr_mem_unbind(hermon_state_t *state, hermon_bind_info_t *bind) { int status; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) /* there is nothing to unbind for alloc_lkey */ if (bind->bi_type == HERMON_BINDHDL_LKEY) return; /* * In case of HERMON_BINDHDL_UBUF, the memory bi_buf points to * is actually allocated by ddi_umem_iosetup() internally, then * it's required to free it here. Reset bi_type to HERMON_BINDHDL_NONE * not to free it again later. */ _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) if (bind->bi_type == HERMON_BINDHDL_UBUF) { freerbuf(bind->bi_buf); bind->bi_type = HERMON_BINDHDL_NONE; } _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) /* * Unbind the DMA memory for the region * * Note: The only way ddi_dma_unbind_handle() currently * can return an error is if the handle passed in is invalid. * Since this should never happen, we choose to return void * from this function! If this does return an error, however, * then we print a warning message to the console. */ status = ddi_dma_unbind_handle(bind->bi_dmahdl); if (status != DDI_SUCCESS) { HERMON_WARNING(state, "failed to unbind DMA mapping"); return; } /* Free up the DMA handle */ if (bind->bi_free_dmahdl != 0) { ddi_dma_free_handle(&bind->bi_dmahdl); } } /* * hermon_mr_fast_mtt_write() * Context: Can be called from interrupt or base context. */ static int hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt, hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits) { hermon_icm_table_t *icm_table; hermon_dma_info_t *dma_info; uint32_t index1, index2, rindx; ddi_dma_cookie_t dmacookie; uint_t cookie_cnt; uint64_t *mtt_table; uint64_t mtt_entry; uint64_t addr, endaddr; uint64_t pagesize; offset_t i, start; uint_t per_span; int sync_needed; /* * XXX According to the PRM, we are to use the WRITE_MTT * command to write out MTTs. Tavor does not do this, * instead taking advantage of direct access to the MTTs, * and knowledge that Mellanox FMR relies on our ability * to write directly to the MTTs without any further * notification to the firmware. Likewise, we will choose * to not use the WRITE_MTT command, but to simply write * out the MTTs. */ /* Calculate page size from the suggested value passed in */ pagesize = ((uint64_t)1 << mtt_pgsize_bits); /* Walk the "cookie list" and fill in the MTT table entries */ dmacookie = bind->bi_dmacookie; cookie_cnt = bind->bi_cookiecnt; icm_table = &state->hs_icm[HERMON_MTT]; rindx = mtt->hr_indx; hermon_index(index1, index2, rindx, icm_table, i); start = i; per_span = icm_table->span; dma_info = icm_table->icm_dma[index1] + index2; mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr; sync_needed = 0; while (cookie_cnt-- > 0) { addr = dmacookie.dmac_laddress; endaddr = addr + (dmacookie.dmac_size - 1); addr = addr & ~((uint64_t)pagesize - 1); while (addr <= endaddr) { /* * Fill in the mapped addresses (calculated above) and * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry. */ mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT; mtt_table[i] = htonll(mtt_entry); i++; rindx++; if (i == per_span) { (void) ddi_dma_sync(dma_info->dma_hdl, start * sizeof (hermon_hw_mtt_t), (i - start) * sizeof (hermon_hw_mtt_t), DDI_DMA_SYNC_FORDEV); if ((addr + pagesize > endaddr) && (cookie_cnt == 0)) return (DDI_SUCCESS); hermon_index(index1, index2, rindx, icm_table, i); start = i * sizeof (hermon_hw_mtt_t); dma_info = icm_table->icm_dma[index1] + index2; mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr; sync_needed = 0; } else { sync_needed = 1; } addr += pagesize; if (addr == 0) { static int do_once = 1; _NOTE(SCHEME_PROTECTS_DATA("safe sharing", do_once)) if (do_once) { do_once = 0; cmn_err(CE_NOTE, "probable error in " "dma_cookie address from caller\n"); } break; } } /* * When we've reached the end of the current DMA cookie, * jump to the next cookie (if there are more) */ if (cookie_cnt != 0) { ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie); } } /* done all the cookies, now sync the memory for the device */ if (sync_needed) (void) ddi_dma_sync(dma_info->dma_hdl, start * sizeof (hermon_hw_mtt_t), (i - start) * sizeof (hermon_hw_mtt_t), DDI_DMA_SYNC_FORDEV); return (DDI_SUCCESS); } /* * hermon_mr_fast_mtt_write_fmr() * Context: Can be called from interrupt or base context. */ /* ARGSUSED */ static int hermon_mr_fast_mtt_write_fmr(hermon_state_t *state, hermon_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits) { hermon_icm_table_t *icm_table; hermon_dma_info_t *dma_info; uint32_t index1, index2, rindx; uint64_t *mtt_table; offset_t i, j; uint_t per_span; icm_table = &state->hs_icm[HERMON_MTT]; rindx = mtt->hr_indx; hermon_index(index1, index2, rindx, icm_table, i); per_span = icm_table->span; dma_info = icm_table->icm_dma[index1] + index2; mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr; /* * Fill in the MTT table entries */ for (j = 0; j < mem_pattr->pmr_num_buf; j++) { mtt_table[i] = mem_pattr->pmr_addr_list[j].p_laddr; i++; rindx++; if (i == per_span) { hermon_index(index1, index2, rindx, icm_table, i); dma_info = icm_table->icm_dma[index1] + index2; mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr; } } return (DDI_SUCCESS); } /* * hermon_mtt_refcnt_inc() * Context: Can be called from interrupt or base context. */ static uint_t hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc) { hermon_sw_refcnt_t *rc; rc = (hermon_sw_refcnt_t *)rsrc->hr_addr; return (atomic_inc_uint_nv(&rc->swrc_refcnt)); } /* * hermon_mtt_refcnt_dec() * Context: Can be called from interrupt or base context. */ static uint_t hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc) { hermon_sw_refcnt_t *rc; rc = (hermon_sw_refcnt_t *)rsrc->hr_addr; return (atomic_dec_uint_nv(&rc->swrc_refcnt)); }