1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * hermon_mr.c 29 * Hermon Memory Region/Window Routines 30 * 31 * Implements all the routines necessary to provide the requisite memory 32 * registration verbs. These include operations like RegisterMemRegion(), 33 * DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion, 34 * etc., that affect Memory Regions. It also includes the verbs that 35 * affect Memory Windows, including AllocMemWindow(), FreeMemWindow(), 36 * and QueryMemWindow(). 37 */ 38 39 #include <sys/types.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/esunddi.h> 45 46 #include <sys/ib/adapters/hermon/hermon.h> 47 48 extern uint32_t hermon_kernel_data_ro; 49 extern uint32_t hermon_user_data_ro; 50 51 /* 52 * Used by hermon_mr_keycalc() below to fill in the "unconstrained" portion 53 * of Hermon memory keys (LKeys and RKeys) 54 */ 55 static uint_t hermon_memkey_cnt = 0x00; 56 #define HERMON_MEMKEY_SHIFT 24 57 #define HERMON_MPT_SW_OWNERSHIP 0xF 58 59 static int hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd, 60 hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, 61 hermon_mpt_rsrc_type_t mpt_type); 62 static int hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr, 63 hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new, 64 hermon_mr_options_t *op); 65 static int hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr, 66 hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr, 67 uint_t sleep, uint_t *dereg_level); 68 static uint64_t hermon_mr_nummtt_needed(hermon_state_t *state, 69 hermon_bind_info_t *bind, uint_t *mtt_pgsize); 70 static int hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind, 71 ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer); 72 static void hermon_mr_mem_unbind(hermon_state_t *state, 73 hermon_bind_info_t *bind); 74 static int hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt, 75 hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits); 76 static int hermon_mr_fast_mtt_write_fmr(hermon_rsrc_t *mtt, 77 ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits); 78 static uint_t hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc); 79 static uint_t hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc); 80 81 82 /* 83 * The Hermon umem_lockmemory() callback ops. When userland memory is 84 * registered, these callback ops are specified. The hermon_umap_umemlock_cb() 85 * callback will be called whenever the memory for the corresponding 86 * ddi_umem_cookie_t is being freed. 87 */ 88 static struct umem_callback_ops hermon_umem_cbops = { 89 UMEM_CALLBACK_VERSION, 90 hermon_umap_umemlock_cb, 91 }; 92 93 94 95 /* 96 * hermon_mr_register() 97 * Context: Can be called from interrupt or base context. 98 */ 99 int 100 hermon_mr_register(hermon_state_t *state, hermon_pdhdl_t pd, 101 ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, 102 hermon_mpt_rsrc_type_t mpt_type) 103 { 104 hermon_bind_info_t bind; 105 int status; 106 107 /* 108 * Fill in the "bind" struct. This struct provides the majority 109 * of the information that will be used to distinguish between an 110 * "addr" binding (as is the case here) and a "buf" binding (see 111 * below). The "bind" struct is later passed to hermon_mr_mem_bind() 112 * which does most of the "heavy lifting" for the Hermon memory 113 * registration routines. 114 */ 115 bind.bi_type = HERMON_BINDHDL_VADDR; 116 bind.bi_addr = mr_attr->mr_vaddr; 117 bind.bi_len = mr_attr->mr_len; 118 bind.bi_as = mr_attr->mr_as; 119 bind.bi_flags = mr_attr->mr_flags; 120 status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, 121 mpt_type); 122 return (status); 123 } 124 125 126 /* 127 * hermon_mr_register_buf() 128 * Context: Can be called from interrupt or base context. 129 */ 130 int 131 hermon_mr_register_buf(hermon_state_t *state, hermon_pdhdl_t pd, 132 ibt_smr_attr_t *mr_attr, struct buf *buf, hermon_mrhdl_t *mrhdl, 133 hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type) 134 { 135 hermon_bind_info_t bind; 136 int status; 137 138 /* 139 * Fill in the "bind" struct. This struct provides the majority 140 * of the information that will be used to distinguish between an 141 * "addr" binding (see above) and a "buf" binding (as is the case 142 * here). The "bind" struct is later passed to hermon_mr_mem_bind() 143 * which does most of the "heavy lifting" for the Hermon memory 144 * registration routines. Note: We have chosen to provide 145 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 146 * not set). It is not critical what value we choose here as it need 147 * only be unique for the given RKey (which will happen by default), 148 * so the choice here is somewhat arbitrary. 149 */ 150 bind.bi_type = HERMON_BINDHDL_BUF; 151 bind.bi_buf = buf; 152 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 153 bind.bi_addr = mr_attr->mr_vaddr; 154 } else { 155 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 156 } 157 bind.bi_as = NULL; 158 bind.bi_len = (uint64_t)buf->b_bcount; 159 bind.bi_flags = mr_attr->mr_flags; 160 status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, mpt_type); 161 return (status); 162 } 163 164 165 /* 166 * hermon_mr_register_shared() 167 * Context: Can be called from interrupt or base context. 168 */ 169 int 170 hermon_mr_register_shared(hermon_state_t *state, hermon_mrhdl_t mrhdl, 171 hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new) 172 { 173 hermon_rsrc_t *mpt, *mtt, *rsrc; 174 hermon_umap_db_entry_t *umapdb; 175 hermon_hw_dmpt_t mpt_entry; 176 hermon_mrhdl_t mr; 177 hermon_bind_info_t *bind; 178 ddi_umem_cookie_t umem_cookie; 179 size_t umem_len; 180 caddr_t umem_addr; 181 uint64_t mtt_addr, pgsize_msk; 182 uint_t sleep, mr_is_umem; 183 int status, umem_flags; 184 185 /* 186 * Check the sleep flag. Ensure that it is consistent with the 187 * current thread context (i.e. if we are currently in the interrupt 188 * context, then we shouldn't be attempting to sleep). 189 */ 190 sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP : 191 HERMON_SLEEP; 192 if ((sleep == HERMON_SLEEP) && 193 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 194 status = IBT_INVALID_PARAM; 195 goto mrshared_fail; 196 } 197 198 /* Increment the reference count on the protection domain (PD) */ 199 hermon_pd_refcnt_inc(pd); 200 201 /* 202 * Allocate an MPT entry. This will be filled in with all the 203 * necessary parameters to define the shared memory region. 204 * Specifically, it will be made to reference the currently existing 205 * MTT entries and ownership of the MPT will be passed to the hardware 206 * in the last step below. If we fail here, we must undo the 207 * protection domain reference count. 208 */ 209 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); 210 if (status != DDI_SUCCESS) { 211 status = IBT_INSUFF_RESOURCE; 212 goto mrshared_fail1; 213 } 214 215 /* 216 * Allocate the software structure for tracking the shared memory 217 * region (i.e. the Hermon Memory Region handle). If we fail here, we 218 * must undo the protection domain reference count and the previous 219 * resource allocation. 220 */ 221 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); 222 if (status != DDI_SUCCESS) { 223 status = IBT_INSUFF_RESOURCE; 224 goto mrshared_fail2; 225 } 226 mr = (hermon_mrhdl_t)rsrc->hr_addr; 227 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 228 229 /* 230 * Setup and validate the memory region access flags. This means 231 * translating the IBTF's enable flags into the access flags that 232 * will be used in later operations. 233 */ 234 mr->mr_accflag = 0; 235 if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND) 236 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 237 if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 238 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 239 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ) 240 mr->mr_accflag |= IBT_MR_REMOTE_READ; 241 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 242 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 243 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 244 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 245 246 /* 247 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 248 * from a certain number of "constrained" bits (the least significant 249 * bits) and some number of "unconstrained" bits. The constrained 250 * bits must be set to the index of the entry in the MPT table, but 251 * the unconstrained bits can be set to any value we wish. Note: 252 * if no remote access is required, then the RKey value is not filled 253 * in. Otherwise both Rkey and LKey are given the same value. 254 */ 255 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 256 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 257 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 258 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 259 mr->mr_rkey = mr->mr_lkey; 260 } 261 262 /* Grab the MR lock for the current memory region */ 263 mutex_enter(&mrhdl->mr_lock); 264 265 /* 266 * Check here to see if the memory region has already been partially 267 * deregistered as a result of a hermon_umap_umemlock_cb() callback. 268 * If so, this is an error, return failure. 269 */ 270 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 271 mutex_exit(&mrhdl->mr_lock); 272 status = IBT_MR_HDL_INVALID; 273 goto mrshared_fail3; 274 } 275 276 /* 277 * Determine if the original memory was from userland and, if so, pin 278 * the pages (again) with umem_lockmemory(). This will guarantee a 279 * separate callback for each of this shared region's MR handles. 280 * If this is userland memory, then allocate an entry in the 281 * "userland resources database". This will later be added to 282 * the database (after all further memory registration operations are 283 * successful). If we fail here, we must undo all the above setup. 284 */ 285 mr_is_umem = mrhdl->mr_is_umem; 286 if (mr_is_umem) { 287 umem_len = ptob(btopr(mrhdl->mr_bindinfo.bi_len)); 288 umem_addr = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr & 289 ~PAGEOFFSET); 290 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 291 DDI_UMEMLOCK_LONGTERM); 292 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 293 &umem_cookie, &hermon_umem_cbops, NULL); 294 if (status != 0) { 295 mutex_exit(&mrhdl->mr_lock); 296 status = IBT_INSUFF_RESOURCE; 297 goto mrshared_fail3; 298 } 299 300 umapdb = hermon_umap_db_alloc(state->hs_instance, 301 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 302 (uint64_t)(uintptr_t)rsrc); 303 if (umapdb == NULL) { 304 mutex_exit(&mrhdl->mr_lock); 305 status = IBT_INSUFF_RESOURCE; 306 goto mrshared_fail4; 307 } 308 } 309 310 /* 311 * Copy the MTT resource pointer (and additional parameters) from 312 * the original Hermon Memory Region handle. Note: this is normally 313 * where the hermon_mr_mem_bind() routine would be called, but because 314 * we already have bound and filled-in MTT entries it is simply a 315 * matter here of managing the MTT reference count and grabbing the 316 * address of the MTT table entries (for filling in the shared region's 317 * MPT entry). 318 */ 319 mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp; 320 mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz; 321 mr->mr_bindinfo = mrhdl->mr_bindinfo; 322 mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp; 323 mutex_exit(&mrhdl->mr_lock); 324 bind = &mr->mr_bindinfo; 325 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 326 mtt = mr->mr_mttrsrcp; 327 328 /* 329 * Increment the MTT reference count (to reflect the fact that 330 * the MTT is now shared) 331 */ 332 (void) hermon_mtt_refcnt_inc(mr->mr_mttrefcntp); 333 334 /* 335 * Update the new "bind" virtual address. Do some extra work here 336 * to ensure proper alignment. That is, make sure that the page 337 * offset for the beginning of the old range is the same as the 338 * offset for this new mapping 339 */ 340 pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1); 341 bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) | 342 (mr->mr_bindinfo.bi_addr & pgsize_msk)); 343 344 /* 345 * Fill in the MPT entry. This is the final step before passing 346 * ownership of the MPT entry to the Hermon hardware. We use all of 347 * the information collected/calculated above to fill in the 348 * requisite portions of the MPT. 349 */ 350 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 351 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 352 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 353 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 354 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 355 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 356 mpt_entry.lr = 1; 357 mpt_entry.reg_win = HERMON_MPT_IS_REGION; 358 mpt_entry.entity_sz = mr->mr_logmttpgsz; 359 mpt_entry.mem_key = mr->mr_lkey; 360 mpt_entry.pd = pd->pd_pdnum; 361 mpt_entry.start_addr = bind->bi_addr; 362 mpt_entry.reg_win_len = bind->bi_len; 363 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); 364 mpt_entry.mtt_addr_h = mtt_addr >> 32; 365 mpt_entry.mtt_addr_l = mtt_addr >> 3; 366 367 /* 368 * Write the MPT entry to hardware. Lastly, we pass ownership of 369 * the entry to the hardware. Note: in general, this operation 370 * shouldn't fail. But if it does, we have to undo everything we've 371 * done above before returning error. 372 */ 373 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 374 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); 375 if (status != HERMON_CMD_SUCCESS) { 376 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 377 status); 378 if (status == HERMON_CMD_INVALID_STATUS) { 379 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 380 } 381 status = ibc_get_ci_failure(0); 382 goto mrshared_fail5; 383 } 384 385 /* 386 * Fill in the rest of the Hermon Memory Region handle. Having 387 * successfully transferred ownership of the MPT, we can update the 388 * following fields for use in further operations on the MR. 389 */ 390 mr->mr_mptrsrcp = mpt; 391 mr->mr_mttrsrcp = mtt; 392 mr->mr_mpt_type = HERMON_MPT_DMPT; 393 mr->mr_pdhdl = pd; 394 mr->mr_rsrcp = rsrc; 395 mr->mr_is_umem = mr_is_umem; 396 mr->mr_is_fmr = 0; 397 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 398 mr->mr_umem_cbfunc = NULL; 399 mr->mr_umem_cbarg1 = NULL; 400 mr->mr_umem_cbarg2 = NULL; 401 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); 402 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); 403 404 /* 405 * If this is userland memory, then we need to insert the previously 406 * allocated entry into the "userland resources database". This will 407 * allow for later coordination between the hermon_umap_umemlock_cb() 408 * callback and hermon_mr_deregister(). 409 */ 410 if (mr_is_umem) { 411 hermon_umap_db_add(umapdb); 412 } 413 414 *mrhdl_new = mr; 415 416 return (DDI_SUCCESS); 417 418 /* 419 * The following is cleanup for all possible failure cases in this routine 420 */ 421 mrshared_fail5: 422 (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp); 423 if (mr_is_umem) { 424 hermon_umap_db_free(umapdb); 425 } 426 mrshared_fail4: 427 if (mr_is_umem) { 428 ddi_umem_unlock(umem_cookie); 429 } 430 mrshared_fail3: 431 hermon_rsrc_free(state, &rsrc); 432 mrshared_fail2: 433 hermon_rsrc_free(state, &mpt); 434 mrshared_fail1: 435 hermon_pd_refcnt_dec(pd); 436 mrshared_fail: 437 return (status); 438 } 439 440 /* 441 * hermon_mr_alloc_fmr() 442 * Context: Can be called from interrupt or base context. 443 */ 444 int 445 hermon_mr_alloc_fmr(hermon_state_t *state, hermon_pdhdl_t pd, 446 hermon_fmrhdl_t fmr_pool, hermon_mrhdl_t *mrhdl) 447 { 448 hermon_rsrc_t *mpt, *mtt, *rsrc; 449 hermon_hw_dmpt_t mpt_entry; 450 hermon_mrhdl_t mr; 451 hermon_bind_info_t bind; 452 uint64_t mtt_addr; 453 uint64_t nummtt; 454 uint_t sleep, mtt_pgsize_bits; 455 int status; 456 457 /* 458 * Check the sleep flag. Ensure that it is consistent with the 459 * current thread context (i.e. if we are currently in the interrupt 460 * context, then we shouldn't be attempting to sleep). 461 */ 462 sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP : 463 HERMON_NOSLEEP; 464 if ((sleep == HERMON_SLEEP) && 465 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 466 return (IBT_INVALID_PARAM); 467 } 468 469 /* Increment the reference count on the protection domain (PD) */ 470 hermon_pd_refcnt_inc(pd); 471 472 /* 473 * Allocate an MPT entry. This will be filled in with all the 474 * necessary parameters to define the FMR. Specifically, it will be 475 * made to reference the currently existing MTT entries and ownership 476 * of the MPT will be passed to the hardware in the last step below. 477 * If we fail here, we must undo the protection domain reference count. 478 */ 479 480 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); 481 if (status != DDI_SUCCESS) { 482 status = IBT_INSUFF_RESOURCE; 483 goto fmralloc_fail1; 484 } 485 486 /* 487 * Allocate the software structure for tracking the fmr memory 488 * region (i.e. the Hermon Memory Region handle). If we fail here, we 489 * must undo the protection domain reference count and the previous 490 * resource allocation. 491 */ 492 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); 493 if (status != DDI_SUCCESS) { 494 status = IBT_INSUFF_RESOURCE; 495 goto fmralloc_fail2; 496 } 497 mr = (hermon_mrhdl_t)rsrc->hr_addr; 498 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 499 500 /* 501 * Setup and validate the memory region access flags. This means 502 * translating the IBTF's enable flags into the access flags that 503 * will be used in later operations. 504 */ 505 mr->mr_accflag = 0; 506 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 507 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 508 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ) 509 mr->mr_accflag |= IBT_MR_REMOTE_READ; 510 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 511 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 512 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 513 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 514 515 /* 516 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 517 * from a certain number of "constrained" bits (the least significant 518 * bits) and some number of "unconstrained" bits. The constrained 519 * bits must be set to the index of the entry in the MPT table, but 520 * the unconstrained bits can be set to any value we wish. Note: 521 * if no remote access is required, then the RKey value is not filled 522 * in. Otherwise both Rkey and LKey are given the same value. 523 */ 524 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 525 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 526 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 527 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 528 mr->mr_rkey = mr->mr_lkey; 529 } 530 531 /* 532 * Determine number of pages spanned. This routine uses the 533 * information in the "bind" struct to determine the required 534 * number of MTT entries needed (and returns the suggested page size - 535 * as a "power-of-2" - for each MTT entry). 536 */ 537 /* Assume address will be page aligned later */ 538 bind.bi_addr = 0; 539 /* Calculate size based on given max pages */ 540 bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT; 541 nummtt = hermon_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits); 542 543 /* 544 * Allocate the MTT entries. Use the calculations performed above to 545 * allocate the required number of MTT entries. If we fail here, we 546 * must not only undo all the previous resource allocation (and PD 547 * reference count), but we must also unbind the memory. 548 */ 549 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt); 550 if (status != DDI_SUCCESS) { 551 status = IBT_INSUFF_RESOURCE; 552 goto fmralloc_fail3; 553 } 554 mr->mr_logmttpgsz = mtt_pgsize_bits; 555 556 /* 557 * Fill in the MPT entry. This is the final step before passing 558 * ownership of the MPT entry to the Hermon hardware. We use all of 559 * the information collected/calculated above to fill in the 560 * requisite portions of the MPT. 561 */ 562 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 563 mpt_entry.en_bind = 0; 564 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 565 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 566 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 567 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 568 mpt_entry.lr = 1; 569 mpt_entry.reg_win = HERMON_MPT_IS_REGION; 570 mpt_entry.pd = pd->pd_pdnum; 571 572 mpt_entry.entity_sz = mr->mr_logmttpgsz; 573 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); 574 mpt_entry.mtt_addr_h = mtt_addr >> 32; 575 mpt_entry.mtt_addr_l = mtt_addr >> 3; 576 mpt_entry.mem_key = mr->mr_lkey; 577 578 /* 579 * FMR sets these to 0 for now. Later during actual fmr registration 580 * these values are filled in. 581 */ 582 mpt_entry.start_addr = 0; 583 mpt_entry.reg_win_len = 0; 584 585 /* 586 * Write the MPT entry to hardware. Lastly, we pass ownership of 587 * the entry to the hardware. Note: in general, this operation 588 * shouldn't fail. But if it does, we have to undo everything we've 589 * done above before returning error. 590 */ 591 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 592 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); 593 if (status != HERMON_CMD_SUCCESS) { 594 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 595 status); 596 if (status == HERMON_CMD_INVALID_STATUS) { 597 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 598 } 599 status = ibc_get_ci_failure(0); 600 goto fmralloc_fail4; 601 } 602 603 /* 604 * Fill in the rest of the Hermon Memory Region handle. Having 605 * successfully transferred ownership of the MPT, we can update the 606 * following fields for use in further operations on the MR. Also, set 607 * that this is an FMR region. 608 */ 609 mr->mr_mptrsrcp = mpt; 610 mr->mr_mttrsrcp = mtt; 611 mr->mr_mpt_type = HERMON_MPT_DMPT; 612 mr->mr_pdhdl = pd; 613 mr->mr_rsrcp = rsrc; 614 mr->mr_is_fmr = 1; 615 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); 616 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); 617 (void) memcpy(&mr->mr_bindinfo, &bind, sizeof (hermon_bind_info_t)); 618 619 *mrhdl = mr; 620 621 return (DDI_SUCCESS); 622 623 /* 624 * The following is cleanup for all possible failure cases in this routine 625 */ 626 fmralloc_fail4: 627 kmem_free(mtt, sizeof (hermon_rsrc_t) * nummtt); 628 fmralloc_fail3: 629 hermon_rsrc_free(state, &rsrc); 630 fmralloc_fail2: 631 hermon_rsrc_free(state, &mpt); 632 fmralloc_fail1: 633 hermon_pd_refcnt_dec(pd); 634 fmralloc_fail: 635 return (status); 636 } 637 638 /* 639 * hermon_mr_register_physical_fmr() 640 * Context: Can be called from interrupt or base context. 641 */ 642 /*ARGSUSED*/ 643 int 644 hermon_mr_register_physical_fmr(hermon_state_t *state, 645 ibt_pmr_attr_t *mem_pattr_p, hermon_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p) 646 { 647 hermon_rsrc_t *mpt; 648 uint64_t *mpt_table; 649 int status; 650 651 mutex_enter(&mr->mr_lock); 652 mpt = mr->mr_mptrsrcp; 653 mpt_table = (uint64_t *)mpt->hr_addr; 654 655 /* Write MPT status to SW bit */ 656 ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 657 658 /* 659 * Write the mapped addresses into the MTT entries. FMR needs to do 660 * this a little differently, so we call the fmr specific fast mtt 661 * write here. 662 */ 663 status = hermon_mr_fast_mtt_write_fmr(mr->mr_mttrsrcp, mem_pattr_p, 664 mr->mr_logmttpgsz); 665 if (status != DDI_SUCCESS) { 666 mutex_exit(&mr->mr_lock); 667 status = ibc_get_ci_failure(0); 668 goto fmr_reg_fail1; 669 } 670 671 /* 672 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 673 * from a certain number of "constrained" bits (the least significant 674 * bits) and some number of "unconstrained" bits. The constrained 675 * bits must be set to the index of the entry in the MPT table, but 676 * the unconstrained bits can be set to any value we wish. Note: 677 * if no remote access is required, then the RKey value is not filled 678 * in. Otherwise both Rkey and LKey are given the same value. 679 */ 680 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 681 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 682 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 683 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 684 mr->mr_rkey = mr->mr_lkey; 685 } 686 687 /* write mem key value */ 688 ddi_put32(mpt->hr_acchdl, (uint32_t *)&mpt_table[1], mr->mr_lkey); 689 690 /* write length value */ 691 ddi_put64(mpt->hr_acchdl, &mpt_table[3], mem_pattr_p->pmr_len); 692 693 /* write start addr value */ 694 ddi_put64(mpt->hr_acchdl, &mpt_table[2], mem_pattr_p->pmr_iova); 695 696 /* write lkey value */ 697 ddi_put32(mpt->hr_acchdl, (uint32_t *)&mpt_table[4], mr->mr_lkey); 698 699 /* Write MPT status to HW bit */ 700 ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0x0); 701 702 /* Fill in return parameters */ 703 mem_desc_p->pmd_lkey = mr->mr_lkey; 704 mem_desc_p->pmd_rkey = mr->mr_rkey; 705 mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova; 706 mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len; 707 708 /* Fill in MR bindinfo struct for later sync or query operations */ 709 mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova; 710 mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT; 711 712 mutex_exit(&mr->mr_lock); 713 714 return (DDI_SUCCESS); 715 716 fmr_reg_fail1: 717 /* 718 * Note, we fail here, and purposely leave the memory ownership in 719 * software. The memory tables may be corrupt, so we leave the region 720 * unregistered. 721 */ 722 return (DDI_FAILURE); 723 } 724 725 726 /* 727 * hermon_mr_deregister() 728 * Context: Can be called from interrupt or base context. 729 */ 730 /* ARGSUSED */ 731 int 732 hermon_mr_deregister(hermon_state_t *state, hermon_mrhdl_t *mrhdl, uint_t level, 733 uint_t sleep) 734 { 735 hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 736 hermon_umap_db_entry_t *umapdb; 737 hermon_pdhdl_t pd; 738 hermon_mrhdl_t mr; 739 hermon_bind_info_t *bind; 740 uint64_t value; 741 int status; 742 uint_t shared_mtt; 743 744 /* 745 * Check the sleep flag. Ensure that it is consistent with the 746 * current thread context (i.e. if we are currently in the interrupt 747 * context, then we shouldn't be attempting to sleep). 748 */ 749 if ((sleep == HERMON_SLEEP) && 750 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 751 status = IBT_INVALID_PARAM; 752 return (status); 753 } 754 755 /* 756 * Pull all the necessary information from the Hermon Memory Region 757 * handle. This is necessary here because the resource for the 758 * MR handle is going to be freed up as part of the this 759 * deregistration 760 */ 761 mr = *mrhdl; 762 mutex_enter(&mr->mr_lock); 763 mpt = mr->mr_mptrsrcp; 764 mtt = mr->mr_mttrsrcp; 765 mtt_refcnt = mr->mr_mttrefcntp; 766 rsrc = mr->mr_rsrcp; 767 pd = mr->mr_pdhdl; 768 bind = &mr->mr_bindinfo; 769 770 /* 771 * Check here if the memory region is really an FMR. If so, this is a 772 * bad thing and we shouldn't be here. Return failure. 773 */ 774 if (mr->mr_is_fmr) { 775 mutex_exit(&mr->mr_lock); 776 return (IBT_INVALID_PARAM); 777 } 778 779 /* 780 * Check here to see if the memory region has already been partially 781 * deregistered as a result of the hermon_umap_umemlock_cb() callback. 782 * If so, then jump to the end and free the remaining resources. 783 */ 784 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 785 goto mrdereg_finish_cleanup; 786 } 787 788 /* 789 * We must drop the "mr_lock" here to ensure that both SLEEP and 790 * NOSLEEP calls into the firmware work as expected. Also, if two 791 * threads are attemping to access this MR (via de-register, 792 * re-register, or otherwise), then we allow the firmware to enforce 793 * the checking, that only one deregister is valid. 794 */ 795 mutex_exit(&mr->mr_lock); 796 797 /* 798 * Reclaim MPT entry from hardware (if necessary). Since the 799 * hermon_mr_deregister() routine is used in the memory region 800 * reregistration process as well, it is possible that we will 801 * not always wish to reclaim ownership of the MPT. Check the 802 * "level" arg and, if necessary, attempt to reclaim it. If 803 * the ownership transfer fails for any reason, we check to see 804 * what command status was returned from the hardware. The only 805 * "expected" error status is the one that indicates an attempt to 806 * deregister a memory region that has memory windows bound to it 807 */ 808 if (level >= HERMON_MR_DEREG_ALL) { 809 if (mr->mr_mpt_type >= HERMON_MPT_DMPT) { 810 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, 811 NULL, 0, mpt->hr_indx, sleep); 812 if (status != HERMON_CMD_SUCCESS) { 813 if (status == HERMON_CMD_REG_BOUND) { 814 return (IBT_MR_IN_USE); 815 } else { 816 cmn_err(CE_CONT, "Hermon: HW2SW_MPT " 817 "command failed: %08x\n", status); 818 if (status == 819 HERMON_CMD_INVALID_STATUS) { 820 hermon_fm_ereport(state, 821 HCA_SYS_ERR, 822 DDI_SERVICE_LOST); 823 } 824 return (IBT_INVALID_PARAM); 825 } 826 } 827 } 828 } 829 830 /* 831 * Re-grab the mr_lock here. Since further access to the protected 832 * 'mr' structure is needed, and we would have returned previously for 833 * the multiple deregistration case, we can safely grab the lock here. 834 */ 835 mutex_enter(&mr->mr_lock); 836 837 /* 838 * If the memory had come from userland, then we do a lookup in the 839 * "userland resources database". On success, we free the entry, call 840 * ddi_umem_unlock(), and continue the cleanup. On failure (which is 841 * an indication that the umem_lockmemory() callback has called 842 * hermon_mr_deregister()), we call ddi_umem_unlock() and invalidate 843 * the "mr_umemcookie" field in the MR handle (this will be used 844 * later to detect that only partial cleaup still remains to be done 845 * on the MR handle). 846 */ 847 if (mr->mr_is_umem) { 848 status = hermon_umap_db_find(state->hs_instance, 849 (uint64_t)(uintptr_t)mr->mr_umemcookie, 850 MLNX_UMAP_MRMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE, 851 &umapdb); 852 if (status == DDI_SUCCESS) { 853 hermon_umap_db_free(umapdb); 854 ddi_umem_unlock(mr->mr_umemcookie); 855 } else { 856 ddi_umem_unlock(mr->mr_umemcookie); 857 mr->mr_umemcookie = NULL; 858 } 859 } 860 861 /* 862 * Decrement the MTT reference count. Since the MTT resource 863 * may be shared between multiple memory regions (as a result 864 * of a "RegisterSharedMR" verb) it is important that we not 865 * free up or unbind resources prematurely. If it's not shared (as 866 * indicated by the return status), then free the resource. 867 */ 868 shared_mtt = hermon_mtt_refcnt_dec(mtt_refcnt); 869 if (!shared_mtt) { 870 hermon_rsrc_free(state, &mtt_refcnt); 871 } 872 873 /* 874 * Free up the MTT entries and unbind the memory. Here, as above, we 875 * attempt to free these resources only if it is appropriate to do so. 876 */ 877 if (!shared_mtt) { 878 if (level >= HERMON_MR_DEREG_NO_HW2SW_MPT) { 879 hermon_mr_mem_unbind(state, bind); 880 } 881 hermon_rsrc_free(state, &mtt); 882 } 883 884 /* 885 * If the MR handle has been invalidated, then drop the 886 * lock and return success. Note: This only happens because 887 * the umem_lockmemory() callback has been triggered. The 888 * cleanup here is partial, and further cleanup (in a 889 * subsequent hermon_mr_deregister() call) will be necessary. 890 */ 891 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 892 mutex_exit(&mr->mr_lock); 893 return (DDI_SUCCESS); 894 } 895 896 mrdereg_finish_cleanup: 897 mutex_exit(&mr->mr_lock); 898 899 /* Free the Hermon Memory Region handle */ 900 hermon_rsrc_free(state, &rsrc); 901 902 /* Free up the MPT entry resource */ 903 if (mpt != NULL) 904 hermon_rsrc_free(state, &mpt); 905 906 /* Decrement the reference count on the protection domain (PD) */ 907 hermon_pd_refcnt_dec(pd); 908 909 /* Set the mrhdl pointer to NULL and return success */ 910 *mrhdl = NULL; 911 912 return (DDI_SUCCESS); 913 } 914 915 /* 916 * hermon_mr_dealloc_fmr() 917 * Context: Can be called from interrupt or base context. 918 */ 919 /* ARGSUSED */ 920 int 921 hermon_mr_dealloc_fmr(hermon_state_t *state, hermon_mrhdl_t *mrhdl) 922 { 923 hermon_rsrc_t *mpt, *mtt, *rsrc; 924 hermon_pdhdl_t pd; 925 hermon_mrhdl_t mr; 926 927 /* 928 * Pull all the necessary information from the Hermon Memory Region 929 * handle. This is necessary here because the resource for the 930 * MR handle is going to be freed up as part of the this 931 * deregistration 932 */ 933 mr = *mrhdl; 934 mutex_enter(&mr->mr_lock); 935 mpt = mr->mr_mptrsrcp; 936 mtt = mr->mr_mttrsrcp; 937 rsrc = mr->mr_rsrcp; 938 pd = mr->mr_pdhdl; 939 mutex_exit(&mr->mr_lock); 940 941 /* Free the MTT entries */ 942 hermon_rsrc_free(state, &mtt); 943 944 /* Free the Hermon Memory Region handle */ 945 hermon_rsrc_free(state, &rsrc); 946 947 /* Free up the MPT entry resource */ 948 hermon_rsrc_free(state, &mpt); 949 950 /* Decrement the reference count on the protection domain (PD) */ 951 hermon_pd_refcnt_dec(pd); 952 953 /* Set the mrhdl pointer to NULL and return success */ 954 *mrhdl = NULL; 955 956 return (DDI_SUCCESS); 957 } 958 959 /* 960 * hermon_mr_invalidate_fmr() 961 * Context: Can be called from interrupt or base context. 962 */ 963 /* ARGSUSED */ 964 int 965 hermon_mr_invalidate_fmr(hermon_state_t *state, hermon_mrhdl_t mr) 966 { 967 hermon_rsrc_t *mpt; 968 uint64_t *mpt_table; 969 970 mutex_enter(&mr->mr_lock); 971 mpt = mr->mr_mptrsrcp; 972 mpt_table = (uint64_t *)mpt->hr_addr; 973 974 /* Write MPT status to SW bit */ 975 ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 976 977 /* invalidate mem key value */ 978 ddi_put32(mpt->hr_acchdl, (uint32_t *)&mpt_table[1], 0); 979 980 /* invalidate lkey value */ 981 ddi_put32(mpt->hr_acchdl, (uint32_t *)&mpt_table[4], 0); 982 983 /* Write MPT status to HW bit */ 984 ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0x0); 985 986 mutex_exit(&mr->mr_lock); 987 988 return (DDI_SUCCESS); 989 } 990 991 /* 992 * hermon_mr_deregister_fmr() 993 * Context: Can be called from interrupt or base context. 994 */ 995 /* ARGSUSED */ 996 int 997 hermon_mr_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr) 998 { 999 hermon_rsrc_t *mpt; 1000 uint64_t *mpt_table; 1001 1002 mutex_enter(&mr->mr_lock); 1003 mpt = mr->mr_mptrsrcp; 1004 mpt_table = (uint64_t *)mpt->hr_addr; 1005 1006 /* Write MPT status to SW bit */ 1007 ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 1008 mutex_exit(&mr->mr_lock); 1009 1010 return (DDI_SUCCESS); 1011 } 1012 1013 1014 /* 1015 * hermon_mr_query() 1016 * Context: Can be called from interrupt or base context. 1017 */ 1018 /* ARGSUSED */ 1019 int 1020 hermon_mr_query(hermon_state_t *state, hermon_mrhdl_t mr, 1021 ibt_mr_query_attr_t *attr) 1022 { 1023 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr)) 1024 1025 mutex_enter(&mr->mr_lock); 1026 1027 /* 1028 * Check here to see if the memory region has already been partially 1029 * deregistered as a result of a hermon_umap_umemlock_cb() callback. 1030 * If so, this is an error, return failure. 1031 */ 1032 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 1033 mutex_exit(&mr->mr_lock); 1034 return (IBT_MR_HDL_INVALID); 1035 } 1036 1037 /* Fill in the queried attributes */ 1038 attr->mr_attr_flags = mr->mr_accflag; 1039 attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl; 1040 1041 /* Fill in the "local" attributes */ 1042 attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey; 1043 attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 1044 attr->mr_lbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 1045 1046 /* 1047 * Fill in the "remote" attributes (if necessary). Note: the 1048 * remote attributes are only valid if the memory region has one 1049 * or more of the remote access flags set. 1050 */ 1051 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1052 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1053 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1054 attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey; 1055 attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 1056 attr->mr_rbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 1057 } 1058 1059 /* 1060 * If region is mapped for streaming (i.e. noncoherent), then set sync 1061 * is required 1062 */ 1063 attr->mr_sync_required = (mr->mr_bindinfo.bi_flags & 1064 IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE; 1065 1066 mutex_exit(&mr->mr_lock); 1067 return (DDI_SUCCESS); 1068 } 1069 1070 1071 /* 1072 * hermon_mr_reregister() 1073 * Context: Can be called from interrupt or base context. 1074 */ 1075 int 1076 hermon_mr_reregister(hermon_state_t *state, hermon_mrhdl_t mr, 1077 hermon_pdhdl_t pd, ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new, 1078 hermon_mr_options_t *op) 1079 { 1080 hermon_bind_info_t bind; 1081 int status; 1082 1083 /* 1084 * Fill in the "bind" struct. This struct provides the majority 1085 * of the information that will be used to distinguish between an 1086 * "addr" binding (as is the case here) and a "buf" binding (see 1087 * below). The "bind" struct is later passed to hermon_mr_mem_bind() 1088 * which does most of the "heavy lifting" for the Hermon memory 1089 * registration (and reregistration) routines. 1090 */ 1091 bind.bi_type = HERMON_BINDHDL_VADDR; 1092 bind.bi_addr = mr_attr->mr_vaddr; 1093 bind.bi_len = mr_attr->mr_len; 1094 bind.bi_as = mr_attr->mr_as; 1095 bind.bi_flags = mr_attr->mr_flags; 1096 status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 1097 return (status); 1098 } 1099 1100 1101 /* 1102 * hermon_mr_reregister_buf() 1103 * Context: Can be called from interrupt or base context. 1104 */ 1105 int 1106 hermon_mr_reregister_buf(hermon_state_t *state, hermon_mrhdl_t mr, 1107 hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf, 1108 hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op) 1109 { 1110 hermon_bind_info_t bind; 1111 int status; 1112 1113 /* 1114 * Fill in the "bind" struct. This struct provides the majority 1115 * of the information that will be used to distinguish between an 1116 * "addr" binding (see above) and a "buf" binding (as is the case 1117 * here). The "bind" struct is later passed to hermon_mr_mem_bind() 1118 * which does most of the "heavy lifting" for the Hermon memory 1119 * registration routines. Note: We have chosen to provide 1120 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 1121 * not set). It is not critical what value we choose here as it need 1122 * only be unique for the given RKey (which will happen by default), 1123 * so the choice here is somewhat arbitrary. 1124 */ 1125 bind.bi_type = HERMON_BINDHDL_BUF; 1126 bind.bi_buf = buf; 1127 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 1128 bind.bi_addr = mr_attr->mr_vaddr; 1129 } else { 1130 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 1131 } 1132 bind.bi_len = (uint64_t)buf->b_bcount; 1133 bind.bi_flags = mr_attr->mr_flags; 1134 bind.bi_as = NULL; 1135 status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 1136 return (status); 1137 } 1138 1139 1140 /* 1141 * hermon_mr_sync() 1142 * Context: Can be called from interrupt or base context. 1143 */ 1144 /* ARGSUSED */ 1145 int 1146 hermon_mr_sync(hermon_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs) 1147 { 1148 hermon_mrhdl_t mrhdl; 1149 uint64_t seg_vaddr, seg_len, seg_end; 1150 uint64_t mr_start, mr_end; 1151 uint_t type; 1152 int status, i; 1153 1154 /* Process each of the ibt_mr_sync_t's */ 1155 for (i = 0; i < num_segs; i++) { 1156 mrhdl = (hermon_mrhdl_t)mr_segs[i].ms_handle; 1157 1158 /* Check for valid memory region handle */ 1159 if (mrhdl == NULL) { 1160 status = IBT_MR_HDL_INVALID; 1161 goto mrsync_fail; 1162 } 1163 1164 mutex_enter(&mrhdl->mr_lock); 1165 1166 /* 1167 * Check here to see if the memory region has already been 1168 * partially deregistered as a result of a 1169 * hermon_umap_umemlock_cb() callback. If so, this is an 1170 * error, return failure. 1171 */ 1172 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 1173 mutex_exit(&mrhdl->mr_lock); 1174 status = IBT_MR_HDL_INVALID; 1175 goto mrsync_fail; 1176 } 1177 1178 /* Check for valid bounds on sync request */ 1179 seg_vaddr = mr_segs[i].ms_vaddr; 1180 seg_len = mr_segs[i].ms_len; 1181 seg_end = seg_vaddr + seg_len - 1; 1182 mr_start = mrhdl->mr_bindinfo.bi_addr; 1183 mr_end = mr_start + mrhdl->mr_bindinfo.bi_len - 1; 1184 if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) { 1185 mutex_exit(&mrhdl->mr_lock); 1186 status = IBT_MR_VA_INVALID; 1187 goto mrsync_fail; 1188 } 1189 if ((seg_end < mr_start) || (seg_end > mr_end)) { 1190 mutex_exit(&mrhdl->mr_lock); 1191 status = IBT_MR_LEN_INVALID; 1192 goto mrsync_fail; 1193 } 1194 1195 /* Determine what type (i.e. direction) for sync */ 1196 if (mr_segs[i].ms_flags & IBT_SYNC_READ) { 1197 type = DDI_DMA_SYNC_FORDEV; 1198 } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) { 1199 type = DDI_DMA_SYNC_FORCPU; 1200 } else { 1201 mutex_exit(&mrhdl->mr_lock); 1202 status = IBT_INVALID_PARAM; 1203 goto mrsync_fail; 1204 } 1205 1206 (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl, 1207 (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type); 1208 1209 mutex_exit(&mrhdl->mr_lock); 1210 } 1211 1212 return (DDI_SUCCESS); 1213 1214 mrsync_fail: 1215 return (status); 1216 } 1217 1218 1219 /* 1220 * hermon_mw_alloc() 1221 * Context: Can be called from interrupt or base context. 1222 */ 1223 int 1224 hermon_mw_alloc(hermon_state_t *state, hermon_pdhdl_t pd, ibt_mw_flags_t flags, 1225 hermon_mwhdl_t *mwhdl) 1226 { 1227 hermon_rsrc_t *mpt, *rsrc; 1228 hermon_hw_dmpt_t mpt_entry; 1229 hermon_mwhdl_t mw; 1230 uint_t sleep; 1231 int status; 1232 1233 if (state != NULL) /* XXX - bogus test that is always TRUE */ 1234 return (IBT_INSUFF_RESOURCE); 1235 1236 /* 1237 * Check the sleep flag. Ensure that it is consistent with the 1238 * current thread context (i.e. if we are currently in the interrupt 1239 * context, then we shouldn't be attempting to sleep). 1240 */ 1241 sleep = (flags & IBT_MW_NOSLEEP) ? HERMON_NOSLEEP : HERMON_SLEEP; 1242 if ((sleep == HERMON_SLEEP) && 1243 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1244 status = IBT_INVALID_PARAM; 1245 goto mwalloc_fail; 1246 } 1247 1248 /* Increment the reference count on the protection domain (PD) */ 1249 hermon_pd_refcnt_inc(pd); 1250 1251 /* 1252 * Allocate an MPT entry (for use as a memory window). Since the 1253 * Hermon hardware uses the MPT entry for memory regions and for 1254 * memory windows, we will fill in this MPT with all the necessary 1255 * parameters for the memory window. And then (just as we do for 1256 * memory regions) ownership will be passed to the hardware in the 1257 * final step below. If we fail here, we must undo the protection 1258 * domain reference count. 1259 */ 1260 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); 1261 if (status != DDI_SUCCESS) { 1262 status = IBT_INSUFF_RESOURCE; 1263 goto mwalloc_fail1; 1264 } 1265 1266 /* 1267 * Allocate the software structure for tracking the memory window (i.e. 1268 * the Hermon Memory Window handle). Note: This is actually the same 1269 * software structure used for tracking memory regions, but since many 1270 * of the same properties are needed, only a single structure is 1271 * necessary. If we fail here, we must undo the protection domain 1272 * reference count and the previous resource allocation. 1273 */ 1274 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); 1275 if (status != DDI_SUCCESS) { 1276 status = IBT_INSUFF_RESOURCE; 1277 goto mwalloc_fail2; 1278 } 1279 mw = (hermon_mwhdl_t)rsrc->hr_addr; 1280 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 1281 1282 /* 1283 * Calculate an "unbound" RKey from MPT index. In much the same way 1284 * as we do for memory regions (above), this key is constructed from 1285 * a "constrained" (which depends on the MPT index) and an 1286 * "unconstrained" portion (which may be arbitrarily chosen). 1287 */ 1288 mw->mr_rkey = hermon_mr_keycalc(mpt->hr_indx); 1289 1290 /* 1291 * Fill in the MPT entry. This is the final step before passing 1292 * ownership of the MPT entry to the Hermon hardware. We use all of 1293 * the information collected/calculated above to fill in the 1294 * requisite portions of the MPT. Note: fewer entries in the MPT 1295 * entry are necessary to allocate a memory window. 1296 */ 1297 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 1298 mpt_entry.reg_win = HERMON_MPT_IS_WINDOW; 1299 mpt_entry.mem_key = mw->mr_rkey; 1300 mpt_entry.pd = pd->pd_pdnum; 1301 mpt_entry.lr = 1; 1302 1303 /* 1304 * Write the MPT entry to hardware. Lastly, we pass ownership of 1305 * the entry to the hardware. Note: in general, this operation 1306 * shouldn't fail. But if it does, we have to undo everything we've 1307 * done above before returning error. 1308 */ 1309 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1310 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); 1311 if (status != HERMON_CMD_SUCCESS) { 1312 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 1313 status); 1314 if (status == HERMON_CMD_INVALID_STATUS) { 1315 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1316 } 1317 status = ibc_get_ci_failure(0); 1318 goto mwalloc_fail3; 1319 } 1320 1321 /* 1322 * Fill in the rest of the Hermon Memory Window handle. Having 1323 * successfully transferred ownership of the MPT, we can update the 1324 * following fields for use in further operations on the MW. 1325 */ 1326 mw->mr_mptrsrcp = mpt; 1327 mw->mr_pdhdl = pd; 1328 mw->mr_rsrcp = rsrc; 1329 mw->mr_rkey = hermon_mr_key_swap(mw->mr_rkey); 1330 *mwhdl = mw; 1331 1332 return (DDI_SUCCESS); 1333 1334 mwalloc_fail3: 1335 hermon_rsrc_free(state, &rsrc); 1336 mwalloc_fail2: 1337 hermon_rsrc_free(state, &mpt); 1338 mwalloc_fail1: 1339 hermon_pd_refcnt_dec(pd); 1340 mwalloc_fail: 1341 return (status); 1342 } 1343 1344 1345 /* 1346 * hermon_mw_free() 1347 * Context: Can be called from interrupt or base context. 1348 */ 1349 int 1350 hermon_mw_free(hermon_state_t *state, hermon_mwhdl_t *mwhdl, uint_t sleep) 1351 { 1352 hermon_rsrc_t *mpt, *rsrc; 1353 hermon_mwhdl_t mw; 1354 int status; 1355 hermon_pdhdl_t pd; 1356 1357 /* 1358 * Check the sleep flag. Ensure that it is consistent with the 1359 * current thread context (i.e. if we are currently in the interrupt 1360 * context, then we shouldn't be attempting to sleep). 1361 */ 1362 if ((sleep == HERMON_SLEEP) && 1363 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1364 status = IBT_INVALID_PARAM; 1365 return (status); 1366 } 1367 1368 /* 1369 * Pull all the necessary information from the Hermon Memory Window 1370 * handle. This is necessary here because the resource for the 1371 * MW handle is going to be freed up as part of the this operation. 1372 */ 1373 mw = *mwhdl; 1374 mutex_enter(&mw->mr_lock); 1375 mpt = mw->mr_mptrsrcp; 1376 rsrc = mw->mr_rsrcp; 1377 pd = mw->mr_pdhdl; 1378 mutex_exit(&mw->mr_lock); 1379 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 1380 1381 /* 1382 * Reclaim the MPT entry from hardware. Note: in general, it is 1383 * unexpected for this operation to return an error. 1384 */ 1385 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL, 1386 0, mpt->hr_indx, sleep); 1387 if (status != HERMON_CMD_SUCCESS) { 1388 cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: %08x\n", 1389 status); 1390 if (status == HERMON_CMD_INVALID_STATUS) { 1391 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1392 } 1393 return (ibc_get_ci_failure(0)); 1394 } 1395 1396 /* Free the Hermon Memory Window handle */ 1397 hermon_rsrc_free(state, &rsrc); 1398 1399 /* Free up the MPT entry resource */ 1400 hermon_rsrc_free(state, &mpt); 1401 1402 /* Decrement the reference count on the protection domain (PD) */ 1403 hermon_pd_refcnt_dec(pd); 1404 1405 /* Set the mwhdl pointer to NULL and return success */ 1406 *mwhdl = NULL; 1407 1408 return (DDI_SUCCESS); 1409 } 1410 1411 1412 /* 1413 * hermon_mr_keycalc() 1414 * Context: Can be called from interrupt or base context. 1415 * NOTE: Produces a key in the form of 1416 * KKKKKKKK IIIIIIII IIIIIIII IIIIIIIII 1417 * where K == the arbitrary bits and I == the index 1418 */ 1419 uint32_t 1420 hermon_mr_keycalc(uint32_t indx) 1421 { 1422 uint32_t tmp_key, tmp_indx; 1423 1424 /* 1425 * Generate a simple key from counter. Note: We increment this 1426 * static variable _intentionally_ without any kind of mutex around 1427 * it. First, single-threading all operations through a single lock 1428 * would be a bad idea (from a performance point-of-view). Second, 1429 * the upper "unconstrained" bits don't really have to be unique 1430 * because the lower bits are guaranteed to be (although we do make a 1431 * best effort to ensure that they are). Third, the window for the 1432 * race (where both threads read and update the counter at the same 1433 * time) is incredibly small. 1434 * And, lastly, we'd like to make this into a "random" key 1435 */ 1436 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hermon_memkey_cnt)) 1437 tmp_key = (hermon_memkey_cnt++) << HERMON_MEMKEY_SHIFT; 1438 tmp_indx = indx & 0xffffff; 1439 return (tmp_key | tmp_indx); 1440 } 1441 1442 1443 /* 1444 * hermon_mr_key_swap() 1445 * Context: Can be called from interrupt or base context. 1446 * NOTE: Produces a key in the form of 1447 * IIIIIIII IIIIIIII IIIIIIIII KKKKKKKK 1448 * where K == the arbitrary bits and I == the index 1449 */ 1450 uint32_t 1451 hermon_mr_key_swap(uint32_t indx) 1452 { 1453 /* 1454 * The memory key format to pass down to the hardware is 1455 * (key[7:0],index[23:0]), which defines the index to the 1456 * hardware resource. When the driver passes this as a memory 1457 * key, (i.e. to retrieve a resource) the format is 1458 * (index[23:0],key[7:0]). 1459 */ 1460 return (((indx >> 24) & 0x000000ff) | ((indx << 8) & 0xffffff00)); 1461 } 1462 1463 /* 1464 * hermon_mr_common_reg() 1465 * Context: Can be called from interrupt or base context. 1466 */ 1467 static int 1468 hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd, 1469 hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, 1470 hermon_mpt_rsrc_type_t mpt_type) 1471 { 1472 hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 1473 hermon_umap_db_entry_t *umapdb; 1474 hermon_sw_refcnt_t *swrc_tmp; 1475 hermon_hw_dmpt_t mpt_entry; 1476 hermon_mrhdl_t mr; 1477 ibt_mr_flags_t flags; 1478 hermon_bind_info_t *bh; 1479 ddi_dma_handle_t bind_dmahdl; 1480 ddi_umem_cookie_t umem_cookie; 1481 size_t umem_len; 1482 caddr_t umem_addr; 1483 uint64_t mtt_addr, max_sz; 1484 uint_t sleep, mtt_pgsize_bits, bind_type, mr_is_umem; 1485 int status, umem_flags, bind_override_addr; 1486 1487 /* 1488 * Check the "options" flag. Currently this flag tells the driver 1489 * whether or not the region should be bound normally (i.e. with 1490 * entries written into the PCI IOMMU), whether it should be 1491 * registered to bypass the IOMMU, and whether or not the resulting 1492 * address should be "zero-based" (to aid the alignment restrictions 1493 * for QPs). 1494 */ 1495 if (op == NULL) { 1496 bind_type = HERMON_BINDMEM_NORMAL; 1497 bind_dmahdl = NULL; 1498 bind_override_addr = 0; 1499 } else { 1500 bind_type = op->mro_bind_type; 1501 bind_dmahdl = op->mro_bind_dmahdl; 1502 bind_override_addr = op->mro_bind_override_addr; 1503 } 1504 1505 /* check what kind of mpt to use */ 1506 1507 /* Extract the flags field from the hermon_bind_info_t */ 1508 flags = bind->bi_flags; 1509 1510 /* 1511 * Check for invalid length. Check is the length is zero or if the 1512 * length is larger than the maximum configured value. Return error 1513 * if it is. 1514 */ 1515 max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz); 1516 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 1517 status = IBT_MR_LEN_INVALID; 1518 goto mrcommon_fail; 1519 } 1520 1521 /* 1522 * Check the sleep flag. Ensure that it is consistent with the 1523 * current thread context (i.e. if we are currently in the interrupt 1524 * context, then we shouldn't be attempting to sleep). 1525 */ 1526 sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP; 1527 if ((sleep == HERMON_SLEEP) && 1528 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1529 status = IBT_INVALID_PARAM; 1530 goto mrcommon_fail; 1531 } 1532 1533 /* Increment the reference count on the protection domain (PD) */ 1534 hermon_pd_refcnt_inc(pd); 1535 1536 /* 1537 * Allocate an MPT entry. This will be filled in with all the 1538 * necessary parameters to define the memory region. And then 1539 * ownership will be passed to the hardware in the final step 1540 * below. If we fail here, we must undo the protection domain 1541 * reference count. 1542 */ 1543 if (mpt_type == HERMON_MPT_DMPT) { 1544 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); 1545 if (status != DDI_SUCCESS) { 1546 status = IBT_INSUFF_RESOURCE; 1547 goto mrcommon_fail1; 1548 } 1549 } else { 1550 mpt = NULL; 1551 } 1552 1553 /* 1554 * Allocate the software structure for tracking the memory region (i.e. 1555 * the Hermon Memory Region handle). If we fail here, we must undo 1556 * the protection domain reference count and the previous resource 1557 * allocation. 1558 */ 1559 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); 1560 if (status != DDI_SUCCESS) { 1561 status = IBT_INSUFF_RESOURCE; 1562 goto mrcommon_fail2; 1563 } 1564 mr = (hermon_mrhdl_t)rsrc->hr_addr; 1565 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 1566 1567 /* 1568 * Setup and validate the memory region access flags. This means 1569 * translating the IBTF's enable flags into the access flags that 1570 * will be used in later operations. 1571 */ 1572 mr->mr_accflag = 0; 1573 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 1574 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 1575 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 1576 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 1577 if (flags & IBT_MR_ENABLE_REMOTE_READ) 1578 mr->mr_accflag |= IBT_MR_REMOTE_READ; 1579 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 1580 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 1581 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 1582 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 1583 1584 /* 1585 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 1586 * from a certain number of "constrained" bits (the least significant 1587 * bits) and some number of "unconstrained" bits. The constrained 1588 * bits must be set to the index of the entry in the MPT table, but 1589 * the unconstrained bits can be set to any value we wish. Note: 1590 * if no remote access is required, then the RKey value is not filled 1591 * in. Otherwise both Rkey and LKey are given the same value. 1592 */ 1593 if (mpt) 1594 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 1595 1596 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1597 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1598 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1599 mr->mr_rkey = mr->mr_lkey; 1600 } 1601 1602 /* 1603 * Determine if the memory is from userland and pin the pages 1604 * with umem_lockmemory() if necessary. 1605 * Then, if this is userland memory, allocate an entry in the 1606 * "userland resources database". This will later be added to 1607 * the database (after all further memory registration operations are 1608 * successful). If we fail here, we must undo the reference counts 1609 * and the previous resource allocations. 1610 */ 1611 mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0); 1612 if (mr_is_umem) { 1613 umem_len = ptob(btopr(bind->bi_len + 1614 ((uintptr_t)bind->bi_addr & PAGEOFFSET))); 1615 umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET); 1616 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 1617 DDI_UMEMLOCK_LONGTERM); 1618 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 1619 &umem_cookie, &hermon_umem_cbops, NULL); 1620 if (status != 0) { 1621 status = IBT_INSUFF_RESOURCE; 1622 goto mrcommon_fail3; 1623 } 1624 1625 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1626 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1627 1628 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len, 1629 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 1630 if (bind->bi_buf == NULL) { 1631 status = IBT_INSUFF_RESOURCE; 1632 goto mrcommon_fail3; 1633 } 1634 bind->bi_type = HERMON_BINDHDL_UBUF; 1635 bind->bi_buf->b_flags |= B_READ; 1636 1637 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1638 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1639 1640 umapdb = hermon_umap_db_alloc(state->hs_instance, 1641 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 1642 (uint64_t)(uintptr_t)rsrc); 1643 if (umapdb == NULL) { 1644 status = IBT_INSUFF_RESOURCE; 1645 goto mrcommon_fail4; 1646 } 1647 } 1648 1649 /* 1650 * Setup the bindinfo for the mtt bind call 1651 */ 1652 bh = &mr->mr_bindinfo; 1653 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh)) 1654 bcopy(bind, bh, sizeof (hermon_bind_info_t)); 1655 bh->bi_bypass = bind_type; 1656 status = hermon_mr_mtt_bind(state, bh, bind_dmahdl, &mtt, 1657 &mtt_pgsize_bits, mpt != NULL); 1658 if (status != DDI_SUCCESS) { 1659 /* 1660 * When mtt_bind fails, freerbuf has already been done, 1661 * so make sure not to call it again. 1662 */ 1663 bind->bi_type = bh->bi_type; 1664 goto mrcommon_fail5; 1665 } 1666 mr->mr_logmttpgsz = mtt_pgsize_bits; 1667 1668 /* 1669 * Allocate MTT reference count (to track shared memory regions). 1670 * This reference count resource may never be used on the given 1671 * memory region, but if it is ever later registered as "shared" 1672 * memory region then this resource will be necessary. If we fail 1673 * here, we do pretty much the same as above to clean up. 1674 */ 1675 status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep, 1676 &mtt_refcnt); 1677 if (status != DDI_SUCCESS) { 1678 status = IBT_INSUFF_RESOURCE; 1679 goto mrcommon_fail6; 1680 } 1681 mr->mr_mttrefcntp = mtt_refcnt; 1682 swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr; 1683 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp)) 1684 HERMON_MTT_REFCNT_INIT(swrc_tmp); 1685 1686 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); 1687 1688 /* 1689 * Fill in the MPT entry. This is the final step before passing 1690 * ownership of the MPT entry to the Hermon hardware. We use all of 1691 * the information collected/calculated above to fill in the 1692 * requisite portions of the MPT. Do this ONLY for DMPTs. 1693 */ 1694 if (mpt == NULL) 1695 goto no_passown; 1696 1697 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 1698 1699 mpt_entry.status = HERMON_MPT_SW_OWNERSHIP; 1700 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 1701 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 1702 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 1703 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 1704 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 1705 mpt_entry.lr = 1; 1706 mpt_entry.phys_addr = 0; 1707 mpt_entry.reg_win = HERMON_MPT_IS_REGION; 1708 1709 mpt_entry.entity_sz = mr->mr_logmttpgsz; 1710 mpt_entry.mem_key = mr->mr_lkey; 1711 mpt_entry.pd = pd->pd_pdnum; 1712 mpt_entry.rem_acc_en = 0; 1713 mpt_entry.fast_reg_en = 0; 1714 mpt_entry.en_inval = 0; 1715 mpt_entry.lkey = 0; 1716 mpt_entry.win_cnt = 0; 1717 1718 if (bind_override_addr == 0) { 1719 mpt_entry.start_addr = bh->bi_addr; 1720 } else { 1721 bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1); 1722 mpt_entry.start_addr = bh->bi_addr; 1723 } 1724 mpt_entry.reg_win_len = bh->bi_len; 1725 1726 mpt_entry.mtt_addr_h = mtt_addr >> 32; /* only 8 more bits */ 1727 mpt_entry.mtt_addr_l = mtt_addr >> 3; /* only 29 bits */ 1728 1729 /* 1730 * Write the MPT entry to hardware. Lastly, we pass ownership of 1731 * the entry to the hardware if needed. Note: in general, this 1732 * operation shouldn't fail. But if it does, we have to undo 1733 * everything we've done above before returning error. 1734 * 1735 * For Hermon, this routine (which is common to the contexts) will only 1736 * set the ownership if needed - the process of passing the context 1737 * itself to HW will take care of setting up the MPT (based on type 1738 * and index). 1739 */ 1740 1741 mpt_entry.bnd_qp = 0; /* dMPT for a qp, check for window */ 1742 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1743 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); 1744 if (status != HERMON_CMD_SUCCESS) { 1745 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 1746 status); 1747 if (status == HERMON_CMD_INVALID_STATUS) { 1748 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1749 } 1750 status = ibc_get_ci_failure(0); 1751 goto mrcommon_fail7; 1752 } 1753 no_passown: 1754 1755 /* 1756 * Fill in the rest of the Hermon Memory Region handle. Having 1757 * successfully transferred ownership of the MPT, we can update the 1758 * following fields for use in further operations on the MR. 1759 */ 1760 mr->mr_mttaddr = mtt_addr; 1761 1762 mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT); 1763 mr->mr_mptrsrcp = mpt; 1764 mr->mr_mttrsrcp = mtt; 1765 mr->mr_pdhdl = pd; 1766 mr->mr_rsrcp = rsrc; 1767 mr->mr_is_umem = mr_is_umem; 1768 mr->mr_is_fmr = 0; 1769 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 1770 mr->mr_umem_cbfunc = NULL; 1771 mr->mr_umem_cbarg1 = NULL; 1772 mr->mr_umem_cbarg2 = NULL; 1773 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); 1774 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); 1775 mr->mr_mpt_type = mpt_type; 1776 1777 /* 1778 * If this is userland memory, then we need to insert the previously 1779 * allocated entry into the "userland resources database". This will 1780 * allow for later coordination between the hermon_umap_umemlock_cb() 1781 * callback and hermon_mr_deregister(). 1782 */ 1783 if (mr_is_umem) { 1784 hermon_umap_db_add(umapdb); 1785 } 1786 1787 *mrhdl = mr; 1788 1789 return (DDI_SUCCESS); 1790 1791 /* 1792 * The following is cleanup for all possible failure cases in this routine 1793 */ 1794 mrcommon_fail7: 1795 hermon_rsrc_free(state, &mtt_refcnt); 1796 mrcommon_fail6: 1797 hermon_mr_mem_unbind(state, bh); 1798 bind->bi_type = bh->bi_type; 1799 mrcommon_fail5: 1800 if (mr_is_umem) { 1801 hermon_umap_db_free(umapdb); 1802 } 1803 mrcommon_fail4: 1804 if (mr_is_umem) { 1805 /* 1806 * Free up the memory ddi_umem_iosetup() allocates 1807 * internally. 1808 */ 1809 if (bind->bi_type == HERMON_BINDHDL_UBUF) { 1810 freerbuf(bind->bi_buf); 1811 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1812 bind->bi_type = HERMON_BINDHDL_NONE; 1813 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1814 } 1815 ddi_umem_unlock(umem_cookie); 1816 } 1817 mrcommon_fail3: 1818 hermon_rsrc_free(state, &rsrc); 1819 mrcommon_fail2: 1820 if (mpt != NULL) 1821 hermon_rsrc_free(state, &mpt); 1822 mrcommon_fail1: 1823 hermon_pd_refcnt_dec(pd); 1824 mrcommon_fail: 1825 return (status); 1826 } 1827 1828 /* 1829 * hermon_mr_mtt_bind() 1830 * Context: Can be called from interrupt or base context. 1831 */ 1832 int 1833 hermon_mr_mtt_bind(hermon_state_t *state, hermon_bind_info_t *bind, 1834 ddi_dma_handle_t bind_dmahdl, hermon_rsrc_t **mtt, uint_t *mtt_pgsize_bits, 1835 uint_t is_buffer) 1836 { 1837 uint64_t nummtt; 1838 uint_t sleep; 1839 int status; 1840 1841 /* 1842 * Check the sleep flag. Ensure that it is consistent with the 1843 * current thread context (i.e. if we are currently in the interrupt 1844 * context, then we shouldn't be attempting to sleep). 1845 */ 1846 sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ? 1847 HERMON_NOSLEEP : HERMON_SLEEP; 1848 if ((sleep == HERMON_SLEEP) && 1849 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1850 status = IBT_INVALID_PARAM; 1851 goto mrmttbind_fail; 1852 } 1853 1854 /* 1855 * Bind the memory and determine the mapped addresses. This is 1856 * the first of two routines that do all the "heavy lifting" for 1857 * the Hermon memory registration routines. The hermon_mr_mem_bind() 1858 * routine takes the "bind" struct with all its fields filled 1859 * in and returns a list of DMA cookies (for the PCI mapped addresses 1860 * corresponding to the specified address region) which are used by 1861 * the hermon_mr_fast_mtt_write() routine below. If we fail here, we 1862 * must undo all the previous resource allocation (and PD reference 1863 * count). 1864 */ 1865 status = hermon_mr_mem_bind(state, bind, bind_dmahdl, sleep, is_buffer); 1866 if (status != DDI_SUCCESS) { 1867 status = IBT_INSUFF_RESOURCE; 1868 goto mrmttbind_fail; 1869 } 1870 1871 /* 1872 * Determine number of pages spanned. This routine uses the 1873 * information in the "bind" struct to determine the required 1874 * number of MTT entries needed (and returns the suggested page size - 1875 * as a "power-of-2" - for each MTT entry). 1876 */ 1877 nummtt = hermon_mr_nummtt_needed(state, bind, mtt_pgsize_bits); 1878 1879 /* 1880 * Allocate the MTT entries. Use the calculations performed above to 1881 * allocate the required number of MTT entries. If we fail here, we 1882 * must not only undo all the previous resource allocation (and PD 1883 * reference count), but we must also unbind the memory. 1884 */ 1885 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, mtt); 1886 if (status != DDI_SUCCESS) { 1887 status = IBT_INSUFF_RESOURCE; 1888 goto mrmttbind_fail2; 1889 } 1890 1891 /* 1892 * Write the mapped addresses into the MTT entries. This is part two 1893 * of the "heavy lifting" routines that we talked about above. Note: 1894 * we pass the suggested page size from the earlier operation here. 1895 * And if we fail here, we again do pretty much the same huge clean up. 1896 */ 1897 status = hermon_mr_fast_mtt_write(state, *mtt, bind, *mtt_pgsize_bits); 1898 if (status != DDI_SUCCESS) { 1899 /* 1900 * hermon_mr_fast_mtt_write() returns DDI_FAILURE 1901 * only if it detects a HW error during DMA. 1902 */ 1903 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1904 status = ibc_get_ci_failure(0); 1905 goto mrmttbind_fail3; 1906 } 1907 return (DDI_SUCCESS); 1908 1909 /* 1910 * The following is cleanup for all possible failure cases in this routine 1911 */ 1912 mrmttbind_fail3: 1913 hermon_rsrc_free(state, mtt); 1914 mrmttbind_fail2: 1915 hermon_mr_mem_unbind(state, bind); 1916 mrmttbind_fail: 1917 return (status); 1918 } 1919 1920 1921 /* 1922 * hermon_mr_mtt_unbind() 1923 * Context: Can be called from interrupt or base context. 1924 */ 1925 int 1926 hermon_mr_mtt_unbind(hermon_state_t *state, hermon_bind_info_t *bind, 1927 hermon_rsrc_t *mtt) 1928 { 1929 /* 1930 * Free up the MTT entries and unbind the memory. Here, as above, we 1931 * attempt to free these resources only if it is appropriate to do so. 1932 */ 1933 hermon_mr_mem_unbind(state, bind); 1934 hermon_rsrc_free(state, &mtt); 1935 1936 return (DDI_SUCCESS); 1937 } 1938 1939 1940 /* 1941 * hermon_mr_common_rereg() 1942 * Context: Can be called from interrupt or base context. 1943 */ 1944 static int 1945 hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr, 1946 hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new, 1947 hermon_mr_options_t *op) 1948 { 1949 hermon_rsrc_t *mpt; 1950 ibt_mr_attr_flags_t acc_flags_to_use; 1951 ibt_mr_flags_t flags; 1952 hermon_pdhdl_t pd_to_use; 1953 hermon_hw_dmpt_t mpt_entry; 1954 uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use; 1955 uint_t sleep, dereg_level; 1956 int status; 1957 1958 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1959 1960 /* 1961 * Check here to see if the memory region corresponds to a userland 1962 * mapping. Reregistration of userland memory regions is not 1963 * currently supported. Return failure. 1964 */ 1965 if (mr->mr_is_umem) { 1966 status = IBT_MR_HDL_INVALID; 1967 goto mrrereg_fail; 1968 } 1969 1970 mutex_enter(&mr->mr_lock); 1971 1972 /* Pull MPT resource pointer from the Hermon Memory Region handle */ 1973 mpt = mr->mr_mptrsrcp; 1974 1975 /* Extract the flags field from the hermon_bind_info_t */ 1976 flags = bind->bi_flags; 1977 1978 /* 1979 * Check the sleep flag. Ensure that it is consistent with the 1980 * current thread context (i.e. if we are currently in the interrupt 1981 * context, then we shouldn't be attempting to sleep). 1982 */ 1983 sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP; 1984 if ((sleep == HERMON_SLEEP) && 1985 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1986 mutex_exit(&mr->mr_lock); 1987 status = IBT_INVALID_PARAM; 1988 goto mrrereg_fail; 1989 } 1990 1991 /* 1992 * First step is to temporarily invalidate the MPT entry. This 1993 * regains ownership from the hardware, and gives us the opportunity 1994 * to modify the entry. Note: The HW2SW_MPT command returns the 1995 * current MPT entry contents. These are saved away here because 1996 * they will be reused in a later step below. If the region has 1997 * bound memory windows that we fail returning an "in use" error code. 1998 * Otherwise, this is an unexpected error and we deregister the 1999 * memory region and return error. 2000 * 2001 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect 2002 * against holding the lock around this rereg call in all contexts. 2003 */ 2004 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry, 2005 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN); 2006 if (status != HERMON_CMD_SUCCESS) { 2007 mutex_exit(&mr->mr_lock); 2008 if (status == HERMON_CMD_REG_BOUND) { 2009 return (IBT_MR_IN_USE); 2010 } else { 2011 cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: " 2012 "%08x\n", status); 2013 if (status == HERMON_CMD_INVALID_STATUS) { 2014 hermon_fm_ereport(state, HCA_SYS_ERR, 2015 HCA_ERR_SRV_LOST); 2016 } 2017 /* 2018 * Call deregister and ensure that all current 2019 * resources get freed up 2020 */ 2021 if (hermon_mr_deregister(state, &mr, 2022 HERMON_MR_DEREG_ALL, sleep) != DDI_SUCCESS) { 2023 HERMON_WARNING(state, "failed to deregister " 2024 "memory region"); 2025 } 2026 return (ibc_get_ci_failure(0)); 2027 } 2028 } 2029 2030 /* 2031 * If we're changing the protection domain, then validate the new one 2032 */ 2033 if (flags & IBT_MR_CHANGE_PD) { 2034 2035 /* Check for valid PD handle pointer */ 2036 if (pd == NULL) { 2037 mutex_exit(&mr->mr_lock); 2038 /* 2039 * Call deregister and ensure that all current 2040 * resources get properly freed up. Unnecessary 2041 * here to attempt to regain software ownership 2042 * of the MPT entry as that has already been 2043 * done above. 2044 */ 2045 if (hermon_mr_deregister(state, &mr, 2046 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != 2047 DDI_SUCCESS) { 2048 HERMON_WARNING(state, "failed to deregister " 2049 "memory region"); 2050 } 2051 status = IBT_PD_HDL_INVALID; 2052 goto mrrereg_fail; 2053 } 2054 2055 /* Use the new PD handle in all operations below */ 2056 pd_to_use = pd; 2057 2058 } else { 2059 /* Use the current PD handle in all operations below */ 2060 pd_to_use = mr->mr_pdhdl; 2061 } 2062 2063 /* 2064 * If we're changing access permissions, then validate the new ones 2065 */ 2066 if (flags & IBT_MR_CHANGE_ACCESS) { 2067 /* 2068 * Validate the access flags. Both remote write and remote 2069 * atomic require the local write flag to be set 2070 */ 2071 if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) || 2072 (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) && 2073 !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) { 2074 mutex_exit(&mr->mr_lock); 2075 /* 2076 * Call deregister and ensure that all current 2077 * resources get properly freed up. Unnecessary 2078 * here to attempt to regain software ownership 2079 * of the MPT entry as that has already been 2080 * done above. 2081 */ 2082 if (hermon_mr_deregister(state, &mr, 2083 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != 2084 DDI_SUCCESS) { 2085 HERMON_WARNING(state, "failed to deregister " 2086 "memory region"); 2087 } 2088 status = IBT_MR_ACCESS_REQ_INVALID; 2089 goto mrrereg_fail; 2090 } 2091 2092 /* 2093 * Setup and validate the memory region access flags. This 2094 * means translating the IBTF's enable flags into the access 2095 * flags that will be used in later operations. 2096 */ 2097 acc_flags_to_use = 0; 2098 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 2099 acc_flags_to_use |= IBT_MR_WINDOW_BIND; 2100 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 2101 acc_flags_to_use |= IBT_MR_LOCAL_WRITE; 2102 if (flags & IBT_MR_ENABLE_REMOTE_READ) 2103 acc_flags_to_use |= IBT_MR_REMOTE_READ; 2104 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 2105 acc_flags_to_use |= IBT_MR_REMOTE_WRITE; 2106 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 2107 acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC; 2108 2109 } else { 2110 acc_flags_to_use = mr->mr_accflag; 2111 } 2112 2113 /* 2114 * If we're modifying the translation, then figure out whether 2115 * we can reuse the current MTT resources. This means calling 2116 * hermon_mr_rereg_xlat_helper() which does most of the heavy lifting 2117 * for the reregistration. If the current memory region contains 2118 * sufficient MTT entries for the new regions, then it will be 2119 * reused and filled in. Otherwise, new entries will be allocated, 2120 * the old ones will be freed, and the new entries will be filled 2121 * in. Note: If we're not modifying the translation, then we 2122 * should already have all the information we need to update the MPT. 2123 * Also note: If hermon_mr_rereg_xlat_helper() fails, it will return 2124 * a "dereg_level" which is the level of cleanup that needs to be 2125 * passed to hermon_mr_deregister() to finish the cleanup. 2126 */ 2127 if (flags & IBT_MR_CHANGE_TRANSLATION) { 2128 status = hermon_mr_rereg_xlat_helper(state, mr, bind, op, 2129 &mtt_addr_to_use, sleep, &dereg_level); 2130 if (status != DDI_SUCCESS) { 2131 mutex_exit(&mr->mr_lock); 2132 /* 2133 * Call deregister and ensure that all resources get 2134 * properly freed up. 2135 */ 2136 if (hermon_mr_deregister(state, &mr, dereg_level, 2137 sleep) != DDI_SUCCESS) { 2138 HERMON_WARNING(state, "failed to deregister " 2139 "memory region"); 2140 } 2141 goto mrrereg_fail; 2142 } 2143 vaddr_to_use = mr->mr_bindinfo.bi_addr; 2144 len_to_use = mr->mr_bindinfo.bi_len; 2145 } else { 2146 mtt_addr_to_use = mr->mr_mttaddr; 2147 vaddr_to_use = mr->mr_bindinfo.bi_addr; 2148 len_to_use = mr->mr_bindinfo.bi_len; 2149 } 2150 2151 /* 2152 * Calculate new keys (Lkey, Rkey) from MPT index. Just like they were 2153 * when the region was first registered, each key is formed from 2154 * "constrained" bits and "unconstrained" bits. Note: If no remote 2155 * access is required, then the RKey value is not filled in. Otherwise 2156 * both Rkey and LKey are given the same value. 2157 */ 2158 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 2159 if ((acc_flags_to_use & IBT_MR_REMOTE_READ) || 2160 (acc_flags_to_use & IBT_MR_REMOTE_WRITE) || 2161 (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) { 2162 mr->mr_rkey = mr->mr_lkey; 2163 } else 2164 mr->mr_rkey = 0; 2165 2166 /* 2167 * Fill in the MPT entry. This is the final step before passing 2168 * ownership of the MPT entry to the Hermon hardware. We use all of 2169 * the information collected/calculated above to fill in the 2170 * requisite portions of the MPT. 2171 */ 2172 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 2173 2174 mpt_entry.status = HERMON_MPT_SW_OWNERSHIP; 2175 mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND) ? 1 : 0; 2176 mpt_entry.atomic = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 2177 mpt_entry.rw = (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ? 1 : 0; 2178 mpt_entry.rr = (acc_flags_to_use & IBT_MR_REMOTE_READ) ? 1 : 0; 2179 mpt_entry.lw = (acc_flags_to_use & IBT_MR_LOCAL_WRITE) ? 1 : 0; 2180 mpt_entry.lr = 1; 2181 mpt_entry.phys_addr = 0; 2182 mpt_entry.reg_win = HERMON_MPT_IS_REGION; 2183 2184 mpt_entry.entity_sz = mr->mr_logmttpgsz; 2185 mpt_entry.mem_key = mr->mr_lkey; 2186 mpt_entry.pd = pd_to_use->pd_pdnum; 2187 2188 mpt_entry.start_addr = vaddr_to_use; 2189 mpt_entry.reg_win_len = len_to_use; 2190 mpt_entry.mtt_addr_h = mtt_addr_to_use >> 32; 2191 mpt_entry.mtt_addr_l = mtt_addr_to_use >> 3; 2192 2193 /* 2194 * Write the updated MPT entry to hardware 2195 * 2196 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect 2197 * against holding the lock around this rereg call in all contexts. 2198 */ 2199 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 2200 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN); 2201 if (status != HERMON_CMD_SUCCESS) { 2202 mutex_exit(&mr->mr_lock); 2203 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 2204 status); 2205 if (status == HERMON_CMD_INVALID_STATUS) { 2206 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2207 } 2208 /* 2209 * Call deregister and ensure that all current resources get 2210 * properly freed up. Unnecessary here to attempt to regain 2211 * software ownership of the MPT entry as that has already 2212 * been done above. 2213 */ 2214 if (hermon_mr_deregister(state, &mr, 2215 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) { 2216 HERMON_WARNING(state, "failed to deregister memory " 2217 "region"); 2218 } 2219 return (ibc_get_ci_failure(0)); 2220 } 2221 2222 /* 2223 * If we're changing PD, then update their reference counts now. 2224 * This means decrementing the reference count on the old PD and 2225 * incrementing the reference count on the new PD. 2226 */ 2227 if (flags & IBT_MR_CHANGE_PD) { 2228 hermon_pd_refcnt_dec(mr->mr_pdhdl); 2229 hermon_pd_refcnt_inc(pd); 2230 } 2231 2232 /* 2233 * Update the contents of the Hermon Memory Region handle to reflect 2234 * what has been changed. 2235 */ 2236 mr->mr_pdhdl = pd_to_use; 2237 mr->mr_accflag = acc_flags_to_use; 2238 mr->mr_is_umem = 0; 2239 mr->mr_is_fmr = 0; 2240 mr->mr_umemcookie = NULL; 2241 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); 2242 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); 2243 2244 /* New MR handle is same as the old */ 2245 *mrhdl_new = mr; 2246 mutex_exit(&mr->mr_lock); 2247 2248 return (DDI_SUCCESS); 2249 2250 mrrereg_fail: 2251 return (status); 2252 } 2253 2254 2255 /* 2256 * hermon_mr_rereg_xlat_helper 2257 * Context: Can be called from interrupt or base context. 2258 * Note: This routine expects the "mr_lock" to be held when it 2259 * is called. Upon returning failure, this routine passes information 2260 * about what "dereg_level" should be passed to hermon_mr_deregister(). 2261 */ 2262 static int 2263 hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr, 2264 hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr, 2265 uint_t sleep, uint_t *dereg_level) 2266 { 2267 hermon_rsrc_t *mtt, *mtt_refcnt; 2268 hermon_sw_refcnt_t *swrc_old, *swrc_new; 2269 ddi_dma_handle_t dmahdl; 2270 uint64_t nummtt_needed, nummtt_in_currrsrc, max_sz; 2271 uint_t mtt_pgsize_bits, bind_type, reuse_dmahdl; 2272 int status; 2273 2274 ASSERT(MUTEX_HELD(&mr->mr_lock)); 2275 2276 /* 2277 * Check the "options" flag. Currently this flag tells the driver 2278 * whether or not the region should be bound normally (i.e. with 2279 * entries written into the PCI IOMMU) or whether it should be 2280 * registered to bypass the IOMMU. 2281 */ 2282 if (op == NULL) { 2283 bind_type = HERMON_BINDMEM_NORMAL; 2284 } else { 2285 bind_type = op->mro_bind_type; 2286 } 2287 2288 /* 2289 * Check for invalid length. Check is the length is zero or if the 2290 * length is larger than the maximum configured value. Return error 2291 * if it is. 2292 */ 2293 max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz); 2294 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 2295 /* 2296 * Deregister will be called upon returning failure from this 2297 * routine. This will ensure that all current resources get 2298 * properly freed up. Unnecessary to attempt to regain 2299 * software ownership of the MPT entry as that has already 2300 * been done above (in hermon_mr_reregister()) 2301 */ 2302 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT; 2303 2304 status = IBT_MR_LEN_INVALID; 2305 goto mrrereghelp_fail; 2306 } 2307 2308 /* 2309 * Determine the number of pages necessary for new region and the 2310 * number of pages supported by the current MTT resources 2311 */ 2312 nummtt_needed = hermon_mr_nummtt_needed(state, bind, &mtt_pgsize_bits); 2313 nummtt_in_currrsrc = mr->mr_mttrsrcp->hr_len >> HERMON_MTT_SIZE_SHIFT; 2314 2315 /* 2316 * Depending on whether we have enough pages or not, the next step is 2317 * to fill in a set of MTT entries that reflect the new mapping. In 2318 * the first case below, we already have enough entries. This means 2319 * we need to unbind the memory from the previous mapping, bind the 2320 * memory for the new mapping, write the new MTT entries, and update 2321 * the mr to reflect the changes. 2322 * In the second case below, we do not have enough entries in the 2323 * current mapping. So, in this case, we need not only to unbind the 2324 * current mapping, but we need to free up the MTT resources associated 2325 * with that mapping. After we've successfully done that, we continue 2326 * by binding the new memory, allocating new MTT entries, writing the 2327 * new MTT entries, and updating the mr to reflect the changes. 2328 */ 2329 2330 /* 2331 * If this region is being shared (i.e. MTT refcount != 1), then we 2332 * can't reuse the current MTT resources regardless of their size. 2333 * Instead we'll need to alloc new ones (below) just as if there 2334 * hadn't been enough room in the current entries. 2335 */ 2336 swrc_old = (hermon_sw_refcnt_t *)mr->mr_mttrefcntp->hr_addr; 2337 if (HERMON_MTT_IS_NOT_SHARED(swrc_old) && 2338 (nummtt_needed <= nummtt_in_currrsrc)) { 2339 2340 /* 2341 * Unbind the old mapping for this memory region, but retain 2342 * the ddi_dma_handle_t (if possible) for reuse in the bind 2343 * operation below. Note: If original memory region was 2344 * bound for IOMMU bypass and the new region can not use 2345 * bypass, then a new DMA handle will be necessary. 2346 */ 2347 if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2348 mr->mr_bindinfo.bi_free_dmahdl = 0; 2349 hermon_mr_mem_unbind(state, &mr->mr_bindinfo); 2350 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2351 reuse_dmahdl = 1; 2352 } else { 2353 hermon_mr_mem_unbind(state, &mr->mr_bindinfo); 2354 dmahdl = NULL; 2355 reuse_dmahdl = 0; 2356 } 2357 2358 /* 2359 * Bind the new memory and determine the mapped addresses. 2360 * As described, this routine and hermon_mr_fast_mtt_write() 2361 * do the majority of the work for the memory registration 2362 * operations. Note: When we successfully finish the binding, 2363 * we will set the "bi_free_dmahdl" flag to indicate that 2364 * even though we may have reused the ddi_dma_handle_t we do 2365 * wish it to be freed up at some later time. Note also that 2366 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2367 */ 2368 bind->bi_bypass = bind_type; 2369 status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1); 2370 if (status != DDI_SUCCESS) { 2371 if (reuse_dmahdl) { 2372 ddi_dma_free_handle(&dmahdl); 2373 } 2374 2375 /* 2376 * Deregister will be called upon returning failure 2377 * from this routine. This will ensure that all 2378 * current resources get properly freed up. 2379 * Unnecessary to attempt to regain software ownership 2380 * of the MPT entry as that has already been done 2381 * above (in hermon_mr_reregister()). Also unnecessary 2382 * to attempt to unbind the memory. 2383 */ 2384 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2385 2386 status = IBT_INSUFF_RESOURCE; 2387 goto mrrereghelp_fail; 2388 } 2389 if (reuse_dmahdl) { 2390 bind->bi_free_dmahdl = 1; 2391 } 2392 2393 /* 2394 * Using the new mapping, but reusing the current MTT 2395 * resources, write the updated entries to MTT 2396 */ 2397 mtt = mr->mr_mttrsrcp; 2398 status = hermon_mr_fast_mtt_write(state, mtt, bind, 2399 mtt_pgsize_bits); 2400 if (status != DDI_SUCCESS) { 2401 /* 2402 * Deregister will be called upon returning failure 2403 * from this routine. This will ensure that all 2404 * current resources get properly freed up. 2405 * Unnecessary to attempt to regain software ownership 2406 * of the MPT entry as that has already been done 2407 * above (in hermon_mr_reregister()). Also unnecessary 2408 * to attempt to unbind the memory. 2409 * 2410 * But we do need to unbind the newly bound memory 2411 * before returning. 2412 */ 2413 hermon_mr_mem_unbind(state, bind); 2414 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2415 2416 /* 2417 * hermon_mr_fast_mtt_write() returns DDI_FAILURE 2418 * only if it detects a HW error during DMA. 2419 */ 2420 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2421 status = ibc_get_ci_failure(0); 2422 goto mrrereghelp_fail; 2423 } 2424 2425 /* Put the updated information into the Mem Region handle */ 2426 mr->mr_bindinfo = *bind; 2427 mr->mr_logmttpgsz = mtt_pgsize_bits; 2428 2429 } else { 2430 /* 2431 * Check if the memory region MTT is shared by any other MRs. 2432 * Since the resource may be shared between multiple memory 2433 * regions (as a result of a "RegisterSharedMR()" verb) it is 2434 * important that we not unbind any resources prematurely. 2435 */ 2436 if (!HERMON_MTT_IS_SHARED(swrc_old)) { 2437 /* 2438 * Unbind the old mapping for this memory region, but 2439 * retain the ddi_dma_handle_t for reuse in the bind 2440 * operation below. Note: This can only be done here 2441 * because the region being reregistered is not 2442 * currently shared. Also if original memory region 2443 * was bound for IOMMU bypass and the new region can 2444 * not use bypass, then a new DMA handle will be 2445 * necessary. 2446 */ 2447 if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2448 mr->mr_bindinfo.bi_free_dmahdl = 0; 2449 hermon_mr_mem_unbind(state, &mr->mr_bindinfo); 2450 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2451 reuse_dmahdl = 1; 2452 } else { 2453 hermon_mr_mem_unbind(state, &mr->mr_bindinfo); 2454 dmahdl = NULL; 2455 reuse_dmahdl = 0; 2456 } 2457 } else { 2458 dmahdl = NULL; 2459 reuse_dmahdl = 0; 2460 } 2461 2462 /* 2463 * Bind the new memory and determine the mapped addresses. 2464 * As described, this routine and hermon_mr_fast_mtt_write() 2465 * do the majority of the work for the memory registration 2466 * operations. Note: When we successfully finish the binding, 2467 * we will set the "bi_free_dmahdl" flag to indicate that 2468 * even though we may have reused the ddi_dma_handle_t we do 2469 * wish it to be freed up at some later time. Note also that 2470 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2471 */ 2472 bind->bi_bypass = bind_type; 2473 status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1); 2474 if (status != DDI_SUCCESS) { 2475 if (reuse_dmahdl) { 2476 ddi_dma_free_handle(&dmahdl); 2477 } 2478 2479 /* 2480 * Deregister will be called upon returning failure 2481 * from this routine. This will ensure that all 2482 * current resources get properly freed up. 2483 * Unnecessary to attempt to regain software ownership 2484 * of the MPT entry as that has already been done 2485 * above (in hermon_mr_reregister()). Also unnecessary 2486 * to attempt to unbind the memory. 2487 */ 2488 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2489 2490 status = IBT_INSUFF_RESOURCE; 2491 goto mrrereghelp_fail; 2492 } 2493 if (reuse_dmahdl) { 2494 bind->bi_free_dmahdl = 1; 2495 } 2496 2497 /* 2498 * Allocate the new MTT entries resource 2499 */ 2500 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt_needed, 2501 sleep, &mtt); 2502 if (status != DDI_SUCCESS) { 2503 /* 2504 * Deregister will be called upon returning failure 2505 * from this routine. This will ensure that all 2506 * current resources get properly freed up. 2507 * Unnecessary to attempt to regain software ownership 2508 * of the MPT entry as that has already been done 2509 * above (in hermon_mr_reregister()). Also unnecessary 2510 * to attempt to unbind the memory. 2511 * 2512 * But we do need to unbind the newly bound memory 2513 * before returning. 2514 */ 2515 hermon_mr_mem_unbind(state, bind); 2516 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2517 2518 status = IBT_INSUFF_RESOURCE; 2519 goto mrrereghelp_fail; 2520 } 2521 2522 /* 2523 * Allocate MTT reference count (to track shared memory 2524 * regions). As mentioned elsewhere above, this reference 2525 * count resource may never be used on the given memory region, 2526 * but if it is ever later registered as a "shared" memory 2527 * region then this resource will be necessary. Note: This 2528 * is only necessary here if the existing memory region is 2529 * already being shared (because otherwise we already have 2530 * a useable reference count resource). 2531 */ 2532 if (HERMON_MTT_IS_SHARED(swrc_old)) { 2533 status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, 2534 sleep, &mtt_refcnt); 2535 if (status != DDI_SUCCESS) { 2536 /* 2537 * Deregister will be called upon returning 2538 * failure from this routine. This will ensure 2539 * that all current resources get properly 2540 * freed up. Unnecessary to attempt to regain 2541 * software ownership of the MPT entry as that 2542 * has already been done above (in 2543 * hermon_mr_reregister()). Also unnecessary 2544 * to attempt to unbind the memory. 2545 * 2546 * But we need to unbind the newly bound 2547 * memory and free up the newly allocated MTT 2548 * entries before returning. 2549 */ 2550 hermon_mr_mem_unbind(state, bind); 2551 hermon_rsrc_free(state, &mtt); 2552 *dereg_level = 2553 HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2554 2555 status = IBT_INSUFF_RESOURCE; 2556 goto mrrereghelp_fail; 2557 } 2558 swrc_new = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr; 2559 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new)) 2560 HERMON_MTT_REFCNT_INIT(swrc_new); 2561 } else { 2562 mtt_refcnt = mr->mr_mttrefcntp; 2563 } 2564 2565 /* 2566 * Using the new mapping and the new MTT resources, write the 2567 * updated entries to MTT 2568 */ 2569 status = hermon_mr_fast_mtt_write(state, mtt, bind, 2570 mtt_pgsize_bits); 2571 if (status != DDI_SUCCESS) { 2572 /* 2573 * Deregister will be called upon returning failure 2574 * from this routine. This will ensure that all 2575 * current resources get properly freed up. 2576 * Unnecessary to attempt to regain software ownership 2577 * of the MPT entry as that has already been done 2578 * above (in hermon_mr_reregister()). Also unnecessary 2579 * to attempt to unbind the memory. 2580 * 2581 * But we need to unbind the newly bound memory, 2582 * free up the newly allocated MTT entries, and 2583 * (possibly) free the new MTT reference count 2584 * resource before returning. 2585 */ 2586 if (HERMON_MTT_IS_SHARED(swrc_old)) { 2587 hermon_rsrc_free(state, &mtt_refcnt); 2588 } 2589 hermon_mr_mem_unbind(state, bind); 2590 hermon_rsrc_free(state, &mtt); 2591 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2592 2593 status = IBT_INSUFF_RESOURCE; 2594 goto mrrereghelp_fail; 2595 } 2596 2597 /* 2598 * Check if the memory region MTT is shared by any other MRs. 2599 * Since the resource may be shared between multiple memory 2600 * regions (as a result of a "RegisterSharedMR()" verb) it is 2601 * important that we not free up any resources prematurely. 2602 */ 2603 if (HERMON_MTT_IS_SHARED(swrc_old)) { 2604 /* Decrement MTT reference count for "old" region */ 2605 (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp); 2606 } else { 2607 /* Free up the old MTT entries resource */ 2608 hermon_rsrc_free(state, &mr->mr_mttrsrcp); 2609 } 2610 2611 /* Put the updated information into the mrhdl */ 2612 mr->mr_bindinfo = *bind; 2613 mr->mr_logmttpgsz = mtt_pgsize_bits; 2614 mr->mr_mttrsrcp = mtt; 2615 mr->mr_mttrefcntp = mtt_refcnt; 2616 } 2617 2618 /* 2619 * Calculate and return the updated MTT address (in the DDR address 2620 * space). This will be used by the caller (hermon_mr_reregister) in 2621 * the updated MPT entry 2622 */ 2623 *mtt_addr = mtt->hr_indx << HERMON_MTT_SIZE_SHIFT; 2624 2625 return (DDI_SUCCESS); 2626 2627 mrrereghelp_fail: 2628 return (status); 2629 } 2630 2631 2632 /* 2633 * hermon_mr_nummtt_needed() 2634 * Context: Can be called from interrupt or base context. 2635 */ 2636 /* ARGSUSED */ 2637 static uint64_t 2638 hermon_mr_nummtt_needed(hermon_state_t *state, hermon_bind_info_t *bind, 2639 uint_t *mtt_pgsize_bits) 2640 { 2641 uint64_t pg_offset_mask; 2642 uint64_t pg_offset, tmp_length; 2643 2644 /* 2645 * For now we specify the page size as 8Kb (the default page size for 2646 * the sun4u architecture), or 4Kb for x86. Figure out optimal page 2647 * size by examining the dmacookies 2648 */ 2649 *mtt_pgsize_bits = PAGESHIFT; 2650 2651 pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1; 2652 pg_offset = bind->bi_addr & pg_offset_mask; 2653 tmp_length = pg_offset + (bind->bi_len - 1); 2654 return ((tmp_length >> *mtt_pgsize_bits) + 1); 2655 } 2656 2657 2658 /* 2659 * hermon_mr_mem_bind() 2660 * Context: Can be called from interrupt or base context. 2661 */ 2662 static int 2663 hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind, 2664 ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer) 2665 { 2666 ddi_dma_attr_t dma_attr; 2667 int (*callback)(caddr_t); 2668 int status; 2669 2670 /* bi_type must be set to a meaningful value to get a bind handle */ 2671 ASSERT(bind->bi_type == HERMON_BINDHDL_VADDR || 2672 bind->bi_type == HERMON_BINDHDL_BUF || 2673 bind->bi_type == HERMON_BINDHDL_UBUF); 2674 2675 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2676 2677 /* Set the callback flag appropriately */ 2678 callback = (sleep == HERMON_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT; 2679 2680 /* 2681 * Initialize many of the default DMA attributes. Then, if we're 2682 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag. 2683 */ 2684 if (dmahdl == NULL) { 2685 hermon_dma_attr_init(state, &dma_attr); 2686 #ifdef __sparc 2687 if (bind->bi_bypass == HERMON_BINDMEM_BYPASS) { 2688 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2689 } 2690 #endif 2691 2692 /* set RO if needed - tunable set and 'is_buffer' is non-0 */ 2693 if (is_buffer) { 2694 if (! (bind->bi_flags & IBT_MR_DISABLE_RO)) { 2695 if ((bind->bi_type != HERMON_BINDHDL_UBUF) && 2696 (hermon_kernel_data_ro == 2697 HERMON_RO_ENABLED)) { 2698 dma_attr.dma_attr_flags |= 2699 DDI_DMA_RELAXED_ORDERING; 2700 } 2701 if (((bind->bi_type == HERMON_BINDHDL_UBUF) && 2702 (hermon_user_data_ro == 2703 HERMON_RO_ENABLED))) { 2704 dma_attr.dma_attr_flags |= 2705 DDI_DMA_RELAXED_ORDERING; 2706 } 2707 } 2708 } 2709 2710 /* Allocate a DMA handle for the binding */ 2711 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 2712 callback, NULL, &bind->bi_dmahdl); 2713 if (status != DDI_SUCCESS) { 2714 return (status); 2715 } 2716 bind->bi_free_dmahdl = 1; 2717 2718 } else { 2719 bind->bi_dmahdl = dmahdl; 2720 bind->bi_free_dmahdl = 0; 2721 } 2722 2723 2724 /* 2725 * Bind the memory to get the PCI mapped addresses. The decision 2726 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle() 2727 * is determined by the "bi_type" flag. Note: if the bind operation 2728 * fails then we have to free up the DMA handle and return error. 2729 */ 2730 if (bind->bi_type == HERMON_BINDHDL_VADDR) { 2731 status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL, 2732 (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len, 2733 (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, NULL, 2734 &bind->bi_dmacookie, &bind->bi_cookiecnt); 2735 2736 } else { /* HERMON_BINDHDL_BUF or HERMON_BINDHDL_UBUF */ 2737 2738 status = ddi_dma_buf_bind_handle(bind->bi_dmahdl, 2739 bind->bi_buf, (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, 2740 NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt); 2741 } 2742 if (status != DDI_DMA_MAPPED) { 2743 if (bind->bi_free_dmahdl != 0) { 2744 ddi_dma_free_handle(&bind->bi_dmahdl); 2745 } 2746 return (status); 2747 } 2748 2749 return (DDI_SUCCESS); 2750 } 2751 2752 2753 /* 2754 * hermon_mr_mem_unbind() 2755 * Context: Can be called from interrupt or base context. 2756 */ 2757 static void 2758 hermon_mr_mem_unbind(hermon_state_t *state, hermon_bind_info_t *bind) 2759 { 2760 int status; 2761 2762 /* 2763 * In case of HERMON_BINDHDL_UBUF, the memory bi_buf points to 2764 * is actually allocated by ddi_umem_iosetup() internally, then 2765 * it's required to free it here. Reset bi_type to HERMON_BINDHDL_NONE 2766 * not to free it again later. 2767 */ 2768 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2769 if (bind->bi_type == HERMON_BINDHDL_UBUF) { 2770 freerbuf(bind->bi_buf); 2771 bind->bi_type = HERMON_BINDHDL_NONE; 2772 } 2773 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 2774 2775 /* 2776 * Unbind the DMA memory for the region 2777 * 2778 * Note: The only way ddi_dma_unbind_handle() currently 2779 * can return an error is if the handle passed in is invalid. 2780 * Since this should never happen, we choose to return void 2781 * from this function! If this does return an error, however, 2782 * then we print a warning message to the console. 2783 */ 2784 status = ddi_dma_unbind_handle(bind->bi_dmahdl); 2785 if (status != DDI_SUCCESS) { 2786 HERMON_WARNING(state, "failed to unbind DMA mapping"); 2787 return; 2788 } 2789 2790 /* Free up the DMA handle */ 2791 if (bind->bi_free_dmahdl != 0) { 2792 ddi_dma_free_handle(&bind->bi_dmahdl); 2793 } 2794 } 2795 2796 2797 /* 2798 * hermon_mr_fast_mtt_write() 2799 * Context: Can be called from interrupt or base context. 2800 */ 2801 static int 2802 hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt, 2803 hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits) 2804 { 2805 hermon_icm_table_t *icm_table; 2806 hermon_dma_info_t *dma_info; 2807 uint32_t index1, index2, rindx; 2808 ddi_dma_cookie_t dmacookie; 2809 uint_t cookie_cnt; 2810 uint64_t *mtt_table; 2811 uint64_t mtt_entry; 2812 uint64_t addr, endaddr; 2813 uint64_t pagesize; 2814 offset_t i, start; 2815 uint_t per_span; 2816 int sync_needed; 2817 2818 /* 2819 * XXX According to the PRM, we are to use the WRITE_MTT 2820 * command to write out MTTs. Tavor does not do this, 2821 * instead taking advantage of direct access to the MTTs, 2822 * and knowledge that Mellanox FMR relies on our ability 2823 * to write directly to the MTTs without any further 2824 * notification to the firmware. Likewise, we will choose 2825 * to not use the WRITE_MTT command, but to simply write 2826 * out the MTTs. 2827 */ 2828 2829 /* Calculate page size from the suggested value passed in */ 2830 pagesize = ((uint64_t)1 << mtt_pgsize_bits); 2831 2832 /* Walk the "cookie list" and fill in the MTT table entries */ 2833 dmacookie = bind->bi_dmacookie; 2834 cookie_cnt = bind->bi_cookiecnt; 2835 2836 icm_table = &state->hs_icm[HERMON_MTT]; 2837 rindx = mtt->hr_indx; 2838 hermon_index(index1, index2, rindx, icm_table, i); 2839 start = i; 2840 2841 per_span = icm_table->span; 2842 dma_info = icm_table->icm_dma[index1] + index2; 2843 mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr; 2844 2845 sync_needed = 0; 2846 while (cookie_cnt-- > 0) { 2847 addr = dmacookie.dmac_laddress; 2848 endaddr = addr + (dmacookie.dmac_size - 1); 2849 addr = addr & ~((uint64_t)pagesize - 1); 2850 2851 while (addr <= endaddr) { 2852 2853 /* 2854 * Fill in the mapped addresses (calculated above) and 2855 * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry. 2856 */ 2857 mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT; 2858 mtt_table[i] = htonll(mtt_entry); 2859 i++; 2860 rindx++; 2861 2862 if (i == per_span) { 2863 2864 (void) ddi_dma_sync(dma_info->dma_hdl, 2865 start * sizeof (hermon_hw_mtt_t), 2866 (i - start) * sizeof (hermon_hw_mtt_t), 2867 DDI_DMA_SYNC_FORDEV); 2868 2869 if ((addr + pagesize > endaddr) && 2870 (cookie_cnt == 0)) 2871 return (DDI_SUCCESS); 2872 2873 hermon_index(index1, index2, rindx, icm_table, 2874 i); 2875 start = i * sizeof (hermon_hw_mtt_t); 2876 dma_info = icm_table->icm_dma[index1] + index2; 2877 mtt_table = 2878 (uint64_t *)(uintptr_t)dma_info->vaddr; 2879 2880 sync_needed = 0; 2881 } else { 2882 sync_needed = 1; 2883 } 2884 2885 addr += pagesize; 2886 if (addr == 0) { 2887 static int do_once = 1; 2888 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", 2889 do_once)) 2890 if (do_once) { 2891 do_once = 0; 2892 cmn_err(CE_NOTE, "probable error in " 2893 "dma_cookie address from caller\n"); 2894 } 2895 break; 2896 } 2897 } 2898 2899 /* 2900 * When we've reached the end of the current DMA cookie, 2901 * jump to the next cookie (if there are more) 2902 */ 2903 if (cookie_cnt != 0) { 2904 ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie); 2905 } 2906 } 2907 2908 /* done all the cookies, now sync the memory for the device */ 2909 if (sync_needed) 2910 (void) ddi_dma_sync(dma_info->dma_hdl, 2911 start * sizeof (hermon_hw_mtt_t), 2912 (i - start) * sizeof (hermon_hw_mtt_t), 2913 DDI_DMA_SYNC_FORDEV); 2914 2915 return (DDI_SUCCESS); 2916 } 2917 2918 /* 2919 * hermon_mr_fast_mtt_write_fmr() 2920 * Context: Can be called from interrupt or base context. 2921 */ 2922 static int 2923 hermon_mr_fast_mtt_write_fmr(hermon_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, 2924 uint32_t mtt_pgsize_bits) 2925 { 2926 uint64_t *mtt_table; 2927 ibt_phys_addr_t *buf; 2928 uint64_t mtt_entry; 2929 uint64_t addr, first_addr, endaddr; 2930 uint64_t pagesize; 2931 int i; 2932 2933 /* Calculate page size from the suggested value passed in */ 2934 pagesize = ((uint64_t)1 << mtt_pgsize_bits); 2935 2936 /* 2937 * Walk the "addr list" and fill in the MTT table entries 2938 */ 2939 mtt_table = (uint64_t *)mtt->hr_addr; 2940 for (i = 0; i < mem_pattr->pmr_num_buf; i++) { 2941 buf = &mem_pattr->pmr_addr_list[i]; 2942 2943 /* 2944 * For first cookie, use the offset field to determine where 2945 * the buffer starts. The end addr is then calculated with the 2946 * offset in mind. 2947 */ 2948 if (i == 0) { 2949 first_addr = addr = buf->p_laddr + 2950 mem_pattr->pmr_offset; 2951 endaddr = addr + (mem_pattr->pmr_buf_sz - 1) - 2952 mem_pattr->pmr_offset; 2953 /* 2954 * For last cookie, determine end addr based on starting 2955 * address and size of the total buffer 2956 */ 2957 } else if (i == mem_pattr->pmr_num_buf - 1) { 2958 addr = buf->p_laddr; 2959 endaddr = addr + (first_addr + mem_pattr->pmr_len & 2960 (mem_pattr->pmr_buf_sz - 1)); 2961 /* 2962 * For the middle cookies case, start and end addr are 2963 * straightforward. Just use the laddr, and the size, as all 2964 * middle cookies are a set size. 2965 */ 2966 } else { 2967 addr = buf->p_laddr; 2968 endaddr = addr + (mem_pattr->pmr_buf_sz - 1); 2969 } 2970 2971 addr = addr & ~((uint64_t)pagesize - 1); 2972 while (addr <= endaddr) { 2973 /* 2974 * Fill in the mapped addresses (calculated above) and 2975 * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry. 2976 */ 2977 mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT; 2978 mtt_table[i] = htonll(mtt_entry); 2979 addr += pagesize; 2980 } 2981 } 2982 2983 return (DDI_SUCCESS); 2984 } 2985 2986 2987 /* 2988 * hermon_mtt_refcnt_inc() 2989 * Context: Can be called from interrupt or base context. 2990 */ 2991 static uint_t 2992 hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc) 2993 { 2994 hermon_sw_refcnt_t *rc; 2995 2996 rc = (hermon_sw_refcnt_t *)rsrc->hr_addr; 2997 return (atomic_inc_uint_nv(&rc->swrc_refcnt)); 2998 } 2999 3000 3001 /* 3002 * hermon_mtt_refcnt_dec() 3003 * Context: Can be called from interrupt or base context. 3004 */ 3005 static uint_t 3006 hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc) 3007 { 3008 hermon_sw_refcnt_t *rc; 3009 3010 rc = (hermon_sw_refcnt_t *)rsrc->hr_addr; 3011 return (atomic_dec_uint_nv(&rc->swrc_refcnt)); 3012 } 3013