1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * tavor_mr.c 29 * Tavor Memory Region/Window Routines 30 * 31 * Implements all the routines necessary to provide the requisite memory 32 * registration verbs. These include operations like RegisterMemRegion(), 33 * DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion, 34 * etc., that affect Memory Regions. It also includes the verbs that 35 * affect Memory Windows, including AllocMemWindow(), FreeMemWindow(), 36 * and QueryMemWindow(). 37 */ 38 39 #include <sys/types.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/esunddi.h> 45 46 #include <sys/ib/adapters/tavor/tavor.h> 47 48 49 /* 50 * Used by tavor_mr_keycalc() below to fill in the "unconstrained" portion 51 * of Tavor memory keys (LKeys and RKeys) 52 */ 53 static uint_t tavor_debug_memkey_cnt = 0x00000000; 54 55 static int tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd, 56 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op); 57 static int tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr, 58 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new, 59 tavor_mr_options_t *op); 60 static int tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr, 61 tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr, 62 uint_t sleep, uint_t *dereg_level); 63 static uint64_t tavor_mr_nummtt_needed(tavor_state_t *state, 64 tavor_bind_info_t *bind, uint_t *mtt_pgsize); 65 static int tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind, 66 ddi_dma_handle_t dmahdl, uint_t sleep); 67 static void tavor_mr_mem_unbind(tavor_state_t *state, 68 tavor_bind_info_t *bind); 69 static int tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind, 70 uint32_t mtt_pgsize_bits); 71 static int tavor_mr_fast_mtt_write_fmr(tavor_rsrc_t *mtt, 72 ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits); 73 static int tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc); 74 static int tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc); 75 76 /* 77 * The Tavor umem_lockmemory() callback ops. When userland memory is 78 * registered, these callback ops are specified. The tavor_umap_umemlock_cb() 79 * callback will be called whenever the memory for the corresponding 80 * ddi_umem_cookie_t is being freed. 81 */ 82 static struct umem_callback_ops tavor_umem_cbops = { 83 UMEM_CALLBACK_VERSION, 84 tavor_umap_umemlock_cb, 85 }; 86 87 88 /* 89 * tavor_mr_register() 90 * Context: Can be called from interrupt or base context. 91 */ 92 int 93 tavor_mr_register(tavor_state_t *state, tavor_pdhdl_t pd, 94 ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op) 95 { 96 tavor_bind_info_t bind; 97 int status; 98 99 TAVOR_TNF_ENTER(tavor_mr_register); 100 101 /* 102 * Fill in the "bind" struct. This struct provides the majority 103 * of the information that will be used to distinguish between an 104 * "addr" binding (as is the case here) and a "buf" binding (see 105 * below). The "bind" struct is later passed to tavor_mr_mem_bind() 106 * which does most of the "heavy lifting" for the Tavor memory 107 * registration routines. 108 */ 109 bind.bi_type = TAVOR_BINDHDL_VADDR; 110 bind.bi_addr = mr_attr->mr_vaddr; 111 bind.bi_len = mr_attr->mr_len; 112 bind.bi_as = mr_attr->mr_as; 113 bind.bi_flags = mr_attr->mr_flags; 114 status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op); 115 if (status != DDI_SUCCESS) { 116 TNF_PROBE_0(tavor_mr_register_cmnreg_fail, 117 TAVOR_TNF_ERROR, ""); 118 TAVOR_TNF_EXIT(tavor_mr_register); 119 return (status); 120 } 121 122 TAVOR_TNF_EXIT(tavor_mr_register); 123 return (DDI_SUCCESS); 124 } 125 126 127 /* 128 * tavor_mr_register_buf() 129 * Context: Can be called from interrupt or base context. 130 */ 131 int 132 tavor_mr_register_buf(tavor_state_t *state, tavor_pdhdl_t pd, 133 ibt_smr_attr_t *mr_attr, struct buf *buf, tavor_mrhdl_t *mrhdl, 134 tavor_mr_options_t *op) 135 { 136 tavor_bind_info_t bind; 137 int status; 138 139 TAVOR_TNF_ENTER(tavor_mr_register_buf); 140 141 /* 142 * Fill in the "bind" struct. This struct provides the majority 143 * of the information that will be used to distinguish between an 144 * "addr" binding (see above) and a "buf" binding (as is the case 145 * here). The "bind" struct is later passed to tavor_mr_mem_bind() 146 * which does most of the "heavy lifting" for the Tavor memory 147 * registration routines. Note: We have chosen to provide 148 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 149 * not set). It is not critical what value we choose here as it need 150 * only be unique for the given RKey (which will happen by default), 151 * so the choice here is somewhat arbitrary. 152 */ 153 bind.bi_type = TAVOR_BINDHDL_BUF; 154 bind.bi_buf = buf; 155 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 156 bind.bi_addr = mr_attr->mr_vaddr; 157 } else { 158 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 159 } 160 bind.bi_as = NULL; 161 bind.bi_len = (uint64_t)buf->b_bcount; 162 bind.bi_flags = mr_attr->mr_flags; 163 status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op); 164 if (status != DDI_SUCCESS) { 165 TNF_PROBE_0(tavor_mr_register_buf_cmnreg_fail, 166 TAVOR_TNF_ERROR, ""); 167 TAVOR_TNF_EXIT(tavor_mr_register_buf); 168 return (status); 169 } 170 171 TAVOR_TNF_EXIT(tavor_mr_register_buf); 172 return (DDI_SUCCESS); 173 } 174 175 176 /* 177 * tavor_mr_register_shared() 178 * Context: Can be called from interrupt or base context. 179 */ 180 int 181 tavor_mr_register_shared(tavor_state_t *state, tavor_mrhdl_t mrhdl, 182 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new) 183 { 184 tavor_rsrc_pool_info_t *rsrc_pool; 185 tavor_rsrc_t *mpt, *mtt, *rsrc; 186 tavor_umap_db_entry_t *umapdb; 187 tavor_hw_mpt_t mpt_entry; 188 tavor_mrhdl_t mr; 189 tavor_bind_info_t *bind; 190 ddi_umem_cookie_t umem_cookie; 191 size_t umem_len; 192 caddr_t umem_addr; 193 uint64_t mtt_addr, mtt_ddrbaseaddr, pgsize_msk; 194 uint_t sleep, mr_is_umem; 195 int status, umem_flags; 196 char *errormsg; 197 198 TAVOR_TNF_ENTER(tavor_mr_register_shared); 199 200 /* 201 * Check the sleep flag. Ensure that it is consistent with the 202 * current thread context (i.e. if we are currently in the interrupt 203 * context, then we shouldn't be attempting to sleep). 204 */ 205 sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP : 206 TAVOR_SLEEP; 207 if ((sleep == TAVOR_SLEEP) && 208 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 209 /* Set "status" and "errormsg" and goto failure */ 210 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 211 goto mrshared_fail; 212 } 213 214 /* Increment the reference count on the protection domain (PD) */ 215 tavor_pd_refcnt_inc(pd); 216 217 /* 218 * Allocate an MPT entry. This will be filled in with all the 219 * necessary parameters to define the shared memory region. 220 * Specifically, it will be made to reference the currently existing 221 * MTT entries and ownership of the MPT will be passed to the hardware 222 * in the last step below. If we fail here, we must undo the 223 * protection domain reference count. 224 */ 225 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 226 if (status != DDI_SUCCESS) { 227 /* Set "status" and "errormsg" and goto failure */ 228 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 229 goto mrshared_fail1; 230 } 231 232 /* 233 * Allocate the software structure for tracking the shared memory 234 * region (i.e. the Tavor Memory Region handle). If we fail here, we 235 * must undo the protection domain reference count and the previous 236 * resource allocation. 237 */ 238 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 239 if (status != DDI_SUCCESS) { 240 /* Set "status" and "errormsg" and goto failure */ 241 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 242 goto mrshared_fail2; 243 } 244 mr = (tavor_mrhdl_t)rsrc->tr_addr; 245 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 246 247 /* 248 * Setup and validate the memory region access flags. This means 249 * translating the IBTF's enable flags into the access flags that 250 * will be used in later operations. 251 */ 252 mr->mr_accflag = 0; 253 if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND) 254 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 255 if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 256 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 257 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ) 258 mr->mr_accflag |= IBT_MR_REMOTE_READ; 259 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 260 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 261 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 262 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 263 264 /* 265 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 266 * from a certain number of "constrained" bits (the least significant 267 * bits) and some number of "unconstrained" bits. The constrained 268 * bits must be set to the index of the entry in the MPT table, but 269 * the unconstrained bits can be set to any value we wish. Note: 270 * if no remote access is required, then the RKey value is not filled 271 * in. Otherwise both Rkey and LKey are given the same value. 272 */ 273 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 274 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 275 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 276 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 277 mr->mr_rkey = mr->mr_lkey; 278 } 279 280 /* Grab the MR lock for the current memory region */ 281 mutex_enter(&mrhdl->mr_lock); 282 283 /* 284 * Check here to see if the memory region has already been partially 285 * deregistered as a result of a tavor_umap_umemlock_cb() callback. 286 * If so, this is an error, return failure. 287 */ 288 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 289 mutex_exit(&mrhdl->mr_lock); 290 /* Set "status" and "errormsg" and goto failure */ 291 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl"); 292 goto mrshared_fail3; 293 } 294 295 /* 296 * Determine if the original memory was from userland and, if so, pin 297 * the pages (again) with umem_lockmemory(). This will guarantee a 298 * separate callback for each of this shared region's MR handles. 299 * If this is userland memory, then allocate an entry in the 300 * "userland resources database". This will later be added to 301 * the database (after all further memory registration operations are 302 * successful). If we fail here, we must undo all the above setup. 303 */ 304 mr_is_umem = mrhdl->mr_is_umem; 305 if (mr_is_umem) { 306 umem_len = ptob(btopr(mrhdl->mr_bindinfo.bi_len + 307 ((uintptr_t)mrhdl->mr_bindinfo.bi_addr & PAGEOFFSET))); 308 umem_addr = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr & 309 ~PAGEOFFSET); 310 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 311 DDI_UMEMLOCK_LONGTERM); 312 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 313 &umem_cookie, &tavor_umem_cbops, NULL); 314 if (status != 0) { 315 mutex_exit(&mrhdl->mr_lock); 316 /* Set "status" and "errormsg" and goto failure */ 317 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin"); 318 goto mrshared_fail3; 319 } 320 321 umapdb = tavor_umap_db_alloc(state->ts_instance, 322 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 323 (uint64_t)(uintptr_t)rsrc); 324 if (umapdb == NULL) { 325 mutex_exit(&mrhdl->mr_lock); 326 /* Set "status" and "errormsg" and goto failure */ 327 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add"); 328 goto mrshared_fail4; 329 } 330 } 331 332 /* 333 * Copy the MTT resource pointer (and additional parameters) from 334 * the original Tavor Memory Region handle. Note: this is normally 335 * where the tavor_mr_mem_bind() routine would be called, but because 336 * we already have bound and filled-in MTT entries it is simply a 337 * matter here of managing the MTT reference count and grabbing the 338 * address of the MTT table entries (for filling in the shared region's 339 * MPT entry). 340 */ 341 mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp; 342 mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz; 343 mr->mr_bindinfo = mrhdl->mr_bindinfo; 344 mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp; 345 mutex_exit(&mrhdl->mr_lock); 346 bind = &mr->mr_bindinfo; 347 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 348 mtt = mr->mr_mttrsrcp; 349 350 /* 351 * Increment the MTT reference count (to reflect the fact that 352 * the MTT is now shared) 353 */ 354 (void) tavor_mtt_refcnt_inc(mr->mr_mttrefcntp); 355 356 /* 357 * Update the new "bind" virtual address. Do some extra work here 358 * to ensure proper alignment. That is, make sure that the page 359 * offset for the beginning of the old range is the same as the 360 * offset for this new mapping 361 */ 362 pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1); 363 bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) | 364 (mr->mr_bindinfo.bi_addr & pgsize_msk)); 365 366 /* 367 * Get the base address for the MTT table. This will be necessary 368 * in the next step when we are setting up the MPT entry. 369 */ 370 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 371 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 372 373 /* 374 * Fill in the MPT entry. This is the final step before passing 375 * ownership of the MPT entry to the Tavor hardware. We use all of 376 * the information collected/calculated above to fill in the 377 * requisite portions of the MPT. 378 */ 379 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 380 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 381 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 382 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 383 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 384 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 385 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 386 mpt_entry.lr = 1; 387 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 388 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 389 mpt_entry.mem_key = mr->mr_lkey; 390 mpt_entry.pd = pd->pd_pdnum; 391 mpt_entry.start_addr = bind->bi_addr; 392 mpt_entry.reg_win_len = bind->bi_len; 393 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 394 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 395 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 396 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 397 398 /* 399 * Write the MPT entry to hardware. Lastly, we pass ownership of 400 * the entry to the hardware. Note: in general, this operation 401 * shouldn't fail. But if it does, we have to undo everything we've 402 * done above before returning error. 403 */ 404 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 405 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 406 if (status != TAVOR_CMD_SUCCESS) { 407 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 408 status); 409 TNF_PROBE_1(tavor_mr_register_shared_sw2hw_mpt_cmd_fail, 410 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 411 /* Set "status" and "errormsg" and goto failure */ 412 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 413 "tavor SW2HW_MPT command"); 414 goto mrshared_fail5; 415 } 416 417 /* 418 * Fill in the rest of the Tavor Memory Region handle. Having 419 * successfully transferred ownership of the MPT, we can update the 420 * following fields for use in further operations on the MR. 421 */ 422 mr->mr_mptrsrcp = mpt; 423 mr->mr_mttrsrcp = mtt; 424 mr->mr_pdhdl = pd; 425 mr->mr_rsrcp = rsrc; 426 mr->mr_is_umem = mr_is_umem; 427 mr->mr_is_fmr = 0; 428 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 429 mr->mr_umem_cbfunc = NULL; 430 mr->mr_umem_cbarg1 = NULL; 431 mr->mr_umem_cbarg2 = NULL; 432 433 /* 434 * If this is userland memory, then we need to insert the previously 435 * allocated entry into the "userland resources database". This will 436 * allow for later coordination between the tavor_umap_umemlock_cb() 437 * callback and tavor_mr_deregister(). 438 */ 439 if (mr_is_umem) { 440 tavor_umap_db_add(umapdb); 441 } 442 443 *mrhdl_new = mr; 444 445 TAVOR_TNF_EXIT(tavor_mr_register_shared); 446 return (DDI_SUCCESS); 447 448 /* 449 * The following is cleanup for all possible failure cases in this routine 450 */ 451 mrshared_fail5: 452 (void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp); 453 if (mr_is_umem) { 454 tavor_umap_db_free(umapdb); 455 } 456 mrshared_fail4: 457 if (mr_is_umem) { 458 ddi_umem_unlock(umem_cookie); 459 } 460 mrshared_fail3: 461 tavor_rsrc_free(state, &rsrc); 462 mrshared_fail2: 463 tavor_rsrc_free(state, &mpt); 464 mrshared_fail1: 465 tavor_pd_refcnt_dec(pd); 466 mrshared_fail: 467 TNF_PROBE_1(tavor_mr_register_shared_fail, TAVOR_TNF_ERROR, "", 468 tnf_string, msg, errormsg); 469 TAVOR_TNF_EXIT(tavor_mr_register_shared); 470 return (status); 471 } 472 473 /* 474 * tavor_mr_alloc_fmr() 475 * Context: Can be called from interrupt or base context. 476 */ 477 int 478 tavor_mr_alloc_fmr(tavor_state_t *state, tavor_pdhdl_t pd, 479 tavor_fmrhdl_t fmr_pool, tavor_mrhdl_t *mrhdl) 480 { 481 tavor_rsrc_pool_info_t *rsrc_pool; 482 tavor_rsrc_t *mpt, *mtt, *rsrc; 483 tavor_hw_mpt_t mpt_entry; 484 tavor_mrhdl_t mr; 485 tavor_bind_info_t bind; 486 uint64_t mtt_addr, mtt_ddrbaseaddr; 487 uint64_t nummtt; 488 uint_t sleep, mtt_pgsize_bits; 489 int status; 490 char *errormsg; 491 492 TAVOR_TNF_ENTER(tavor_mr_alloc_fmr); 493 494 /* 495 * Check the sleep flag. Ensure that it is consistent with the 496 * current thread context (i.e. if we are currently in the interrupt 497 * context, then we shouldn't be attempting to sleep). 498 */ 499 sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? TAVOR_SLEEP : 500 TAVOR_NOSLEEP; 501 if ((sleep == TAVOR_SLEEP) && 502 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 503 TNF_PROBE_0(tavor_mr_alloc_fmr, TAVOR_TNF_ERROR, ""); 504 TAVOR_TNF_EXIT(tavor_mr_alloc_fmr); 505 return (IBT_INVALID_PARAM); 506 } 507 508 /* Increment the reference count on the protection domain (PD) */ 509 tavor_pd_refcnt_inc(pd); 510 511 /* 512 * Allocate an MPT entry. This will be filled in with all the 513 * necessary parameters to define the FMR. Specifically, it will be 514 * made to reference the currently existing MTT entries and ownership 515 * of the MPT will be passed to the hardware in the last step below. 516 * If we fail here, we must undo the protection domain reference count. 517 */ 518 519 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 520 if (status != DDI_SUCCESS) { 521 /* Set "status" and "errormsg" and goto failure */ 522 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 523 goto fmralloc_fail1; 524 } 525 526 /* 527 * Allocate the software structure for tracking the fmr memory 528 * region (i.e. the Tavor Memory Region handle). If we fail here, we 529 * must undo the protection domain reference count and the previous 530 * resource allocation. 531 */ 532 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 533 if (status != DDI_SUCCESS) { 534 /* Set "status" and "errormsg" and goto failure */ 535 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 536 goto fmralloc_fail2; 537 } 538 mr = (tavor_mrhdl_t)rsrc->tr_addr; 539 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 540 541 /* 542 * Setup and validate the memory region access flags. This means 543 * translating the IBTF's enable flags into the access flags that 544 * will be used in later operations. 545 */ 546 mr->mr_accflag = 0; 547 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 548 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 549 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ) 550 mr->mr_accflag |= IBT_MR_REMOTE_READ; 551 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 552 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 553 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 554 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 555 556 /* 557 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 558 * from a certain number of "constrained" bits (the least significant 559 * bits) and some number of "unconstrained" bits. The constrained 560 * bits must be set to the index of the entry in the MPT table, but 561 * the unconstrained bits can be set to any value we wish. Note: 562 * if no remote access is required, then the RKey value is not filled 563 * in. Otherwise both Rkey and LKey are given the same value. 564 */ 565 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 566 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 567 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 568 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 569 mr->mr_rkey = mr->mr_lkey; 570 } 571 572 /* 573 * Determine number of pages spanned. This routine uses the 574 * information in the "bind" struct to determine the required 575 * number of MTT entries needed (and returns the suggested page size - 576 * as a "power-of-2" - for each MTT entry). 577 */ 578 /* Assume address will be page aligned later */ 579 bind.bi_addr = 0; 580 /* Calculate size based on given max pages */ 581 bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT; 582 nummtt = tavor_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits); 583 584 /* 585 * Allocate the MTT entries. Use the calculations performed above to 586 * allocate the required number of MTT entries. Note: MTT entries are 587 * allocated in "MTT segments" which consist of complete cachelines 588 * (i.e. 8 entries, 16 entries, etc.) So the TAVOR_NUMMTT_TO_MTTSEG() 589 * macro is used to do the proper conversion. If we fail here, we 590 * must not only undo all the previous resource allocation (and PD 591 * reference count), but we must also unbind the memory. 592 */ 593 status = tavor_rsrc_alloc(state, TAVOR_MTT, 594 TAVOR_NUMMTT_TO_MTTSEG(nummtt), sleep, &mtt); 595 if (status != DDI_SUCCESS) { 596 /* Set "status" and "errormsg" and goto failure */ 597 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT"); 598 goto fmralloc_fail3; 599 } 600 mr->mr_logmttpgsz = mtt_pgsize_bits; 601 602 /* 603 * Get the base address for the MTT table. This will be necessary 604 * in the next step when we are setting up the MPT entry. 605 */ 606 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 607 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 608 609 /* 610 * Fill in the MPT entry. This is the final step before passing 611 * ownership of the MPT entry to the Tavor hardware. We use all of 612 * the information collected/calculated above to fill in the 613 * requisite portions of the MPT. 614 */ 615 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 616 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 617 mpt_entry.en_bind = 0; 618 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 619 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 620 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 621 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 622 mpt_entry.lr = 1; 623 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 624 mpt_entry.pd = pd->pd_pdnum; 625 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 626 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 627 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 628 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 629 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 630 mpt_entry.mem_key = mr->mr_lkey; 631 632 /* 633 * FMR sets these to 0 for now. Later during actual fmr registration 634 * these values are filled in. 635 */ 636 mpt_entry.start_addr = 0; 637 mpt_entry.reg_win_len = 0; 638 639 /* 640 * Write the MPT entry to hardware. Lastly, we pass ownership of 641 * the entry to the hardware. Note: in general, this operation 642 * shouldn't fail. But if it does, we have to undo everything we've 643 * done above before returning error. 644 */ 645 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 646 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 647 if (status != TAVOR_CMD_SUCCESS) { 648 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 649 status); 650 TNF_PROBE_1(tavor_mr_register_shared_sw2hw_mpt_cmd_fail, 651 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 652 /* Set "status" and "errormsg" and goto failure */ 653 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 654 "tavor SW2HW_MPT command"); 655 goto fmralloc_fail4; 656 } 657 658 /* 659 * Fill in the rest of the Tavor Memory Region handle. Having 660 * successfully transferred ownership of the MPT, we can update the 661 * following fields for use in further operations on the MR. Also, set 662 * that this is an FMR region. 663 */ 664 mr->mr_mptrsrcp = mpt; 665 mr->mr_mttrsrcp = mtt; 666 mr->mr_pdhdl = pd; 667 mr->mr_rsrcp = rsrc; 668 mr->mr_is_fmr = 1; 669 (void) memcpy(&mr->mr_bindinfo, &bind, sizeof (tavor_bind_info_t)); 670 671 *mrhdl = mr; 672 673 TAVOR_TNF_EXIT(tavor_mr_alloc_fmr); 674 return (DDI_SUCCESS); 675 676 /* 677 * The following is cleanup for all possible failure cases in this routine 678 */ 679 fmralloc_fail4: 680 tavor_rsrc_free(state, &mtt); 681 fmralloc_fail3: 682 tavor_rsrc_free(state, &rsrc); 683 fmralloc_fail2: 684 tavor_rsrc_free(state, &mpt); 685 fmralloc_fail1: 686 tavor_pd_refcnt_dec(pd); 687 fmralloc_fail: 688 TNF_PROBE_1(tavor_mr_alloc_fmr, TAVOR_TNF_ERROR, "", 689 tnf_string, msg, errormsg); 690 TAVOR_TNF_EXIT(tavor_mr_alloc_fmr); 691 return (status); 692 } 693 694 /* 695 * tavor_mr_register_physical_fmr() 696 * Context: Can be called from interrupt or base context. 697 */ 698 int 699 tavor_mr_register_physical_fmr(tavor_state_t *state, 700 ibt_pmr_attr_t *mem_pattr_p, tavor_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p) 701 { 702 tavor_rsrc_t *mpt; 703 uint64_t *mpt_table; 704 int status; 705 char *errormsg; 706 707 TAVOR_TNF_ENTER(tavor_mr_register_physical_fmr); 708 709 mutex_enter(&mr->mr_lock); 710 mpt = mr->mr_mptrsrcp; 711 mpt_table = (uint64_t *)mpt->tr_addr; 712 713 /* Write MPT status to SW bit */ 714 ddi_put8(mpt->tr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 715 716 /* 717 * Write the mapped addresses into the MTT entries. FMR needs to do 718 * this a little differently, so we call the fmr specific fast mtt 719 * write here. 720 */ 721 status = tavor_mr_fast_mtt_write_fmr(mr->mr_mttrsrcp, mem_pattr_p, 722 mr->mr_logmttpgsz); 723 if (status != DDI_SUCCESS) { 724 mutex_exit(&mr->mr_lock); 725 /* Set "status" and "errormsg" and goto failure */ 726 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "failed write mtt"); 727 goto fmr_reg_fail1; 728 } 729 730 /* 731 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 732 * from a certain number of "constrained" bits (the least significant 733 * bits) and some number of "unconstrained" bits. The constrained 734 * bits must be set to the index of the entry in the MPT table, but 735 * the unconstrained bits can be set to any value we wish. Note: 736 * if no remote access is required, then the RKey value is not filled 737 * in. Otherwise both Rkey and LKey are given the same value. 738 */ 739 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 740 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 741 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 742 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 743 mr->mr_rkey = mr->mr_lkey; 744 } 745 746 /* write mem key value */ 747 ddi_put32(mpt->tr_acchdl, (uint32_t *)&mpt_table[1], mr->mr_lkey); 748 749 /* write length value */ 750 ddi_put64(mpt->tr_acchdl, &mpt_table[3], mem_pattr_p->pmr_len); 751 752 /* write start addr value */ 753 ddi_put64(mpt->tr_acchdl, &mpt_table[2], mem_pattr_p->pmr_iova); 754 755 /* write lkey value */ 756 ddi_put32(mpt->tr_acchdl, (uint32_t *)&mpt_table[4], mr->mr_lkey); 757 758 /* Write MPT status to HW bit */ 759 ddi_put8(mpt->tr_acchdl, (uint8_t *)&mpt_table[0], 0x0); 760 761 /* Fill in return parameters */ 762 mem_desc_p->pmd_lkey = mr->mr_lkey; 763 mem_desc_p->pmd_rkey = mr->mr_rkey; 764 mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova; 765 mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len; 766 767 /* Fill in MR bindinfo struct for later sync or query operations */ 768 mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova; 769 mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT; 770 771 mutex_exit(&mr->mr_lock); 772 773 TAVOR_TNF_EXIT(tavor_mr_register_physical_fmr); 774 return (DDI_SUCCESS); 775 776 fmr_reg_fail1: 777 /* 778 * Note, we fail here, and purposely leave the memory ownership in 779 * software. The memory tables may be corrupt, so we leave the region 780 * unregistered. 781 */ 782 TNF_PROBE_1(tavor_mr_register_physical_fmr_fail, TAVOR_TNF_ERROR, "", 783 tnf_string, msg, errormsg); 784 TAVOR_TNF_EXIT(tavor_mr_register_physical_fmr); 785 return (DDI_FAILURE); 786 } 787 788 789 /* 790 * tavor_mr_deregister() 791 * Context: Can be called from interrupt or base context. 792 */ 793 /* ARGSUSED */ 794 int 795 tavor_mr_deregister(tavor_state_t *state, tavor_mrhdl_t *mrhdl, uint_t level, 796 uint_t sleep) 797 { 798 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 799 tavor_umap_db_entry_t *umapdb; 800 tavor_pdhdl_t pd; 801 tavor_mrhdl_t mr; 802 tavor_bind_info_t *bind; 803 uint64_t value; 804 int status, shared_mtt; 805 char *errormsg; 806 807 TAVOR_TNF_ENTER(tavor_mr_deregister); 808 809 /* 810 * Check the sleep flag. Ensure that it is consistent with the 811 * current thread context (i.e. if we are currently in the interrupt 812 * context, then we shouldn't be attempting to sleep). 813 */ 814 if ((sleep == TAVOR_SLEEP) && 815 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 816 /* Set "status" and "errormsg" and goto failure */ 817 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags"); 818 TNF_PROBE_1(tavor_mr_deregister_fail, TAVOR_TNF_ERROR, "", 819 tnf_string, msg, errormsg); 820 TAVOR_TNF_EXIT(tavor_mr_deregister); 821 return (status); 822 } 823 824 /* 825 * Pull all the necessary information from the Tavor Memory Region 826 * handle. This is necessary here because the resource for the 827 * MR handle is going to be freed up as part of the this 828 * deregistration 829 */ 830 mr = *mrhdl; 831 mutex_enter(&mr->mr_lock); 832 mpt = mr->mr_mptrsrcp; 833 mtt = mr->mr_mttrsrcp; 834 mtt_refcnt = mr->mr_mttrefcntp; 835 rsrc = mr->mr_rsrcp; 836 pd = mr->mr_pdhdl; 837 bind = &mr->mr_bindinfo; 838 839 /* 840 * Check here if the memory region is really an FMR. If so, this is a 841 * bad thing and we shouldn't be here. Return failure. 842 */ 843 if (mr->mr_is_fmr) { 844 mutex_exit(&mr->mr_lock); 845 TNF_PROBE_0(tavor_mr_deregister_is_fmr, TAVOR_TNF_ERROR, ""); 846 TAVOR_TNF_EXIT(tavor_mr_deregister); 847 return (IBT_INVALID_PARAM); 848 } 849 850 /* 851 * Check here to see if the memory region has already been partially 852 * deregistered as a result of the tavor_umap_umemlock_cb() callback. 853 * If so, then jump to the end and free the remaining resources. 854 */ 855 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 856 goto mrdereg_finish_cleanup; 857 } 858 859 /* 860 * We must drop the "mr_lock" here to ensure that both SLEEP and 861 * NOSLEEP calls into the firmware work as expected. Also, if two 862 * threads are attemping to access this MR (via de-register, 863 * re-register, or otherwise), then we allow the firmware to enforce 864 * the checking, that only one deregister is valid. 865 */ 866 mutex_exit(&mr->mr_lock); 867 868 /* 869 * Reclaim MPT entry from hardware (if necessary). Since the 870 * tavor_mr_deregister() routine is used in the memory region 871 * reregistration process as well, it is possible that we will 872 * not always wish to reclaim ownership of the MPT. Check the 873 * "level" arg and, if necessary, attempt to reclaim it. If 874 * the ownership transfer fails for any reason, we check to see 875 * what command status was returned from the hardware. The only 876 * "expected" error status is the one that indicates an attempt to 877 * deregister a memory region that has memory windows bound to it 878 */ 879 if (level >= TAVOR_MR_DEREG_ALL) { 880 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, 881 NULL, 0, mpt->tr_indx, sleep); 882 if (status != TAVOR_CMD_SUCCESS) { 883 if (status == TAVOR_CMD_REG_BOUND) { 884 TAVOR_TNF_EXIT(tavor_mr_deregister); 885 return (IBT_MR_IN_USE); 886 } else { 887 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command " 888 "failed: %08x\n", status); 889 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail, 890 TAVOR_TNF_ERROR, "", tnf_uint, status, 891 status); 892 TAVOR_TNF_EXIT(tavor_mr_deregister); 893 return (IBT_INVALID_PARAM); 894 } 895 } 896 } 897 898 /* 899 * Re-grab the mr_lock here. Since further access to the protected 900 * 'mr' structure is needed, and we would have returned previously for 901 * the multiple deregistration case, we can safely grab the lock here. 902 */ 903 mutex_enter(&mr->mr_lock); 904 905 /* 906 * If the memory had come from userland, then we do a lookup in the 907 * "userland resources database". On success, we free the entry, call 908 * ddi_umem_unlock(), and continue the cleanup. On failure (which is 909 * an indication that the umem_lockmemory() callback has called 910 * tavor_mr_deregister()), we call ddi_umem_unlock() and invalidate 911 * the "mr_umemcookie" field in the MR handle (this will be used 912 * later to detect that only partial cleaup still remains to be done 913 * on the MR handle). 914 */ 915 if (mr->mr_is_umem) { 916 status = tavor_umap_db_find(state->ts_instance, 917 (uint64_t)(uintptr_t)mr->mr_umemcookie, 918 MLNX_UMAP_MRMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE, 919 &umapdb); 920 if (status == DDI_SUCCESS) { 921 tavor_umap_db_free(umapdb); 922 ddi_umem_unlock(mr->mr_umemcookie); 923 } else { 924 ddi_umem_unlock(mr->mr_umemcookie); 925 mr->mr_umemcookie = NULL; 926 } 927 } 928 929 /* 930 * Decrement the MTT reference count. Since the MTT resource 931 * may be shared between multiple memory regions (as a result 932 * of a "RegisterSharedMR" verb) it is important that we not 933 * free up or unbind resources prematurely. If it's not shared (as 934 * indicated by the return status), then free the resource. 935 */ 936 shared_mtt = tavor_mtt_refcnt_dec(mtt_refcnt); 937 if (!shared_mtt) { 938 tavor_rsrc_free(state, &mtt_refcnt); 939 } 940 941 /* 942 * Free up the MTT entries and unbind the memory. Here, as above, we 943 * attempt to free these resources only if it is appropriate to do so. 944 */ 945 if (!shared_mtt) { 946 if (level >= TAVOR_MR_DEREG_NO_HW2SW_MPT) { 947 tavor_mr_mem_unbind(state, bind); 948 } 949 tavor_rsrc_free(state, &mtt); 950 } 951 952 /* 953 * If the MR handle has been invalidated, then drop the 954 * lock and return success. Note: This only happens because 955 * the umem_lockmemory() callback has been triggered. The 956 * cleanup here is partial, and further cleanup (in a 957 * subsequent tavor_mr_deregister() call) will be necessary. 958 */ 959 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 960 mutex_exit(&mr->mr_lock); 961 TAVOR_TNF_EXIT(tavor_mr_deregister); 962 return (DDI_SUCCESS); 963 } 964 965 mrdereg_finish_cleanup: 966 mutex_exit(&mr->mr_lock); 967 968 /* Free the Tavor Memory Region handle */ 969 tavor_rsrc_free(state, &rsrc); 970 971 /* Free up the MPT entry resource */ 972 tavor_rsrc_free(state, &mpt); 973 974 /* Decrement the reference count on the protection domain (PD) */ 975 tavor_pd_refcnt_dec(pd); 976 977 /* Set the mrhdl pointer to NULL and return success */ 978 *mrhdl = NULL; 979 980 TAVOR_TNF_EXIT(tavor_mr_deregister); 981 return (DDI_SUCCESS); 982 } 983 984 /* 985 * tavor_mr_dealloc_fmr() 986 * Context: Can be called from interrupt or base context. 987 */ 988 /* ARGSUSED */ 989 int 990 tavor_mr_dealloc_fmr(tavor_state_t *state, tavor_mrhdl_t *mrhdl) 991 { 992 tavor_rsrc_t *mpt, *mtt, *rsrc; 993 tavor_pdhdl_t pd; 994 tavor_mrhdl_t mr; 995 996 TAVOR_TNF_ENTER(tavor_mr_dealloc_fmr); 997 998 /* 999 * Pull all the necessary information from the Tavor Memory Region 1000 * handle. This is necessary here because the resource for the 1001 * MR handle is going to be freed up as part of the this 1002 * deregistration 1003 */ 1004 mr = *mrhdl; 1005 mutex_enter(&mr->mr_lock); 1006 mpt = mr->mr_mptrsrcp; 1007 mtt = mr->mr_mttrsrcp; 1008 rsrc = mr->mr_rsrcp; 1009 pd = mr->mr_pdhdl; 1010 mutex_exit(&mr->mr_lock); 1011 1012 /* Free the MTT entries */ 1013 tavor_rsrc_free(state, &mtt); 1014 1015 /* Free the Tavor Memory Region handle */ 1016 tavor_rsrc_free(state, &rsrc); 1017 1018 /* Free up the MPT entry resource */ 1019 tavor_rsrc_free(state, &mpt); 1020 1021 /* Decrement the reference count on the protection domain (PD) */ 1022 tavor_pd_refcnt_dec(pd); 1023 1024 /* Set the mrhdl pointer to NULL and return success */ 1025 *mrhdl = NULL; 1026 1027 TAVOR_TNF_EXIT(tavor_mr_dealloc_fmr); 1028 return (DDI_SUCCESS); 1029 } 1030 1031 /* 1032 * tavor_mr_invalidate_fmr() 1033 * Context: Can be called from interrupt or base context. 1034 */ 1035 /* ARGSUSED */ 1036 int 1037 tavor_mr_invalidate_fmr(tavor_state_t *state, tavor_mrhdl_t mr) 1038 { 1039 tavor_rsrc_t *mpt; 1040 uint64_t *mpt_table; 1041 1042 TAVOR_TNF_ENTER(tavor_mr_invalidate_fmr); 1043 1044 mutex_enter(&mr->mr_lock); 1045 mpt = mr->mr_mptrsrcp; 1046 mpt_table = (uint64_t *)mpt->tr_addr; 1047 1048 /* Write MPT status to SW bit */ 1049 ddi_put8(mpt->tr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 1050 1051 /* invalidate mem key value */ 1052 ddi_put32(mpt->tr_acchdl, (uint32_t *)&mpt_table[1], 0); 1053 1054 /* invalidate lkey value */ 1055 ddi_put32(mpt->tr_acchdl, (uint32_t *)&mpt_table[4], 0); 1056 1057 /* Write MPT status to HW bit */ 1058 ddi_put8(mpt->tr_acchdl, (uint8_t *)&mpt_table[0], 0x0); 1059 1060 mutex_exit(&mr->mr_lock); 1061 1062 TAVOR_TNF_EXIT(tavor_mr_invalidate_fmr); 1063 return (DDI_SUCCESS); 1064 } 1065 1066 /* 1067 * tavor_mr_deregister_fmr() 1068 * Context: Can be called from interrupt or base context. 1069 */ 1070 /* ARGSUSED */ 1071 int 1072 tavor_mr_deregister_fmr(tavor_state_t *state, tavor_mrhdl_t mr) 1073 { 1074 tavor_rsrc_t *mpt; 1075 uint64_t *mpt_table; 1076 1077 TAVOR_TNF_ENTER(tavor_mr_deregister_fmr); 1078 1079 mutex_enter(&mr->mr_lock); 1080 mpt = mr->mr_mptrsrcp; 1081 mpt_table = (uint64_t *)mpt->tr_addr; 1082 1083 /* Write MPT status to SW bit */ 1084 ddi_put8(mpt->tr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 1085 mutex_exit(&mr->mr_lock); 1086 1087 TAVOR_TNF_EXIT(tavor_mr_deregister_fmr); 1088 return (DDI_SUCCESS); 1089 } 1090 1091 1092 /* 1093 * tavor_mr_query() 1094 * Context: Can be called from interrupt or base context. 1095 */ 1096 /* ARGSUSED */ 1097 int 1098 tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mr, 1099 ibt_mr_query_attr_t *attr) 1100 { 1101 TAVOR_TNF_ENTER(tavor_mr_query); 1102 1103 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr)) 1104 1105 mutex_enter(&mr->mr_lock); 1106 1107 /* 1108 * Check here to see if the memory region has already been partially 1109 * deregistered as a result of a tavor_umap_umemlock_cb() callback. 1110 * If so, this is an error, return failure. 1111 */ 1112 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 1113 mutex_exit(&mr->mr_lock); 1114 TNF_PROBE_0(tavor_mr_query_inv_mrhdl_fail, TAVOR_TNF_ERROR, ""); 1115 TAVOR_TNF_EXIT(tavor_mr_query); 1116 return (IBT_MR_HDL_INVALID); 1117 } 1118 1119 /* Fill in the queried attributes */ 1120 attr->mr_attr_flags = mr->mr_accflag; 1121 attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl; 1122 1123 /* Fill in the "local" attributes */ 1124 attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey; 1125 attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 1126 attr->mr_lbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 1127 1128 /* 1129 * Fill in the "remote" attributes (if necessary). Note: the 1130 * remote attributes are only valid if the memory region has one 1131 * or more of the remote access flags set. 1132 */ 1133 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1134 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1135 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1136 attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey; 1137 attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 1138 attr->mr_rbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 1139 } 1140 1141 /* 1142 * If region is mapped for streaming (i.e. noncoherent), then set sync 1143 * is required 1144 */ 1145 attr->mr_sync_required = (mr->mr_bindinfo.bi_flags & 1146 IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE; 1147 1148 mutex_exit(&mr->mr_lock); 1149 TAVOR_TNF_EXIT(tavor_mr_query); 1150 return (DDI_SUCCESS); 1151 } 1152 1153 1154 /* 1155 * tavor_mr_reregister() 1156 * Context: Can be called from interrupt or base context. 1157 */ 1158 int 1159 tavor_mr_reregister(tavor_state_t *state, tavor_mrhdl_t mr, 1160 tavor_pdhdl_t pd, ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new, 1161 tavor_mr_options_t *op) 1162 { 1163 tavor_bind_info_t bind; 1164 int status; 1165 1166 TAVOR_TNF_ENTER(tavor_mr_reregister); 1167 1168 /* 1169 * Fill in the "bind" struct. This struct provides the majority 1170 * of the information that will be used to distinguish between an 1171 * "addr" binding (as is the case here) and a "buf" binding (see 1172 * below). The "bind" struct is later passed to tavor_mr_mem_bind() 1173 * which does most of the "heavy lifting" for the Tavor memory 1174 * registration (and reregistration) routines. 1175 */ 1176 bind.bi_type = TAVOR_BINDHDL_VADDR; 1177 bind.bi_addr = mr_attr->mr_vaddr; 1178 bind.bi_len = mr_attr->mr_len; 1179 bind.bi_as = mr_attr->mr_as; 1180 bind.bi_flags = mr_attr->mr_flags; 1181 status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 1182 if (status != DDI_SUCCESS) { 1183 TNF_PROBE_0(tavor_mr_reregister_cmnreg_fail, 1184 TAVOR_TNF_ERROR, ""); 1185 TAVOR_TNF_EXIT(tavor_mr_reregister); 1186 return (status); 1187 } 1188 1189 TAVOR_TNF_EXIT(tavor_mr_reregister); 1190 return (DDI_SUCCESS); 1191 } 1192 1193 1194 /* 1195 * tavor_mr_reregister_buf() 1196 * Context: Can be called from interrupt or base context. 1197 */ 1198 int 1199 tavor_mr_reregister_buf(tavor_state_t *state, tavor_mrhdl_t mr, 1200 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf, 1201 tavor_mrhdl_t *mrhdl_new, tavor_mr_options_t *op) 1202 { 1203 tavor_bind_info_t bind; 1204 int status; 1205 1206 TAVOR_TNF_ENTER(tavor_mr_reregister_buf); 1207 1208 /* 1209 * Fill in the "bind" struct. This struct provides the majority 1210 * of the information that will be used to distinguish between an 1211 * "addr" binding (see above) and a "buf" binding (as is the case 1212 * here). The "bind" struct is later passed to tavor_mr_mem_bind() 1213 * which does most of the "heavy lifting" for the Tavor memory 1214 * registration routines. Note: We have chosen to provide 1215 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 1216 * not set). It is not critical what value we choose here as it need 1217 * only be unique for the given RKey (which will happen by default), 1218 * so the choice here is somewhat arbitrary. 1219 */ 1220 bind.bi_type = TAVOR_BINDHDL_BUF; 1221 bind.bi_buf = buf; 1222 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 1223 bind.bi_addr = mr_attr->mr_vaddr; 1224 } else { 1225 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 1226 } 1227 bind.bi_len = (uint64_t)buf->b_bcount; 1228 bind.bi_flags = mr_attr->mr_flags; 1229 bind.bi_as = NULL; 1230 status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 1231 if (status != DDI_SUCCESS) { 1232 TNF_PROBE_0(tavor_mr_reregister_buf_cmnreg_fail, 1233 TAVOR_TNF_ERROR, ""); 1234 TAVOR_TNF_EXIT(tavor_mr_reregister_buf); 1235 return (status); 1236 } 1237 1238 TAVOR_TNF_EXIT(tavor_mr_reregister_buf); 1239 return (DDI_SUCCESS); 1240 } 1241 1242 1243 /* 1244 * tavor_mr_sync() 1245 * Context: Can be called from interrupt or base context. 1246 */ 1247 /* ARGSUSED */ 1248 int 1249 tavor_mr_sync(tavor_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs) 1250 { 1251 tavor_mrhdl_t mrhdl; 1252 uint64_t seg_vaddr, seg_len, seg_end; 1253 uint64_t mr_start, mr_end; 1254 uint_t type; 1255 int status, i; 1256 char *errormsg; 1257 1258 TAVOR_TNF_ENTER(tavor_mr_sync); 1259 1260 /* Process each of the ibt_mr_sync_t's */ 1261 for (i = 0; i < num_segs; i++) { 1262 mrhdl = (tavor_mrhdl_t)mr_segs[i].ms_handle; 1263 1264 /* Check for valid memory region handle */ 1265 if (mrhdl == NULL) { 1266 /* Set "status" and "errormsg" and goto failure */ 1267 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl"); 1268 goto mrsync_fail; 1269 } 1270 1271 mutex_enter(&mrhdl->mr_lock); 1272 1273 /* 1274 * Check here to see if the memory region has already been 1275 * partially deregistered as a result of a 1276 * tavor_umap_umemlock_cb() callback. If so, this is an 1277 * error, return failure. 1278 */ 1279 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 1280 mutex_exit(&mrhdl->mr_lock); 1281 /* Set "status" and "errormsg" and goto failure */ 1282 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl2"); 1283 goto mrsync_fail; 1284 } 1285 1286 /* Check for valid bounds on sync request */ 1287 seg_vaddr = mr_segs[i].ms_vaddr; 1288 seg_len = mr_segs[i].ms_len; 1289 seg_end = seg_vaddr + seg_len - 1; 1290 mr_start = mrhdl->mr_bindinfo.bi_addr; 1291 mr_end = mr_start + mrhdl->mr_bindinfo.bi_len - 1; 1292 if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) { 1293 mutex_exit(&mrhdl->mr_lock); 1294 /* Set "status" and "errormsg" and goto failure */ 1295 TAVOR_TNF_FAIL(IBT_MR_VA_INVALID, "invalid vaddr"); 1296 goto mrsync_fail; 1297 } 1298 if ((seg_end < mr_start) || (seg_end > mr_end)) { 1299 mutex_exit(&mrhdl->mr_lock); 1300 /* Set "status" and "errormsg" and goto failure */ 1301 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length"); 1302 goto mrsync_fail; 1303 } 1304 1305 /* Determine what type (i.e. direction) for sync */ 1306 if (mr_segs[i].ms_flags & IBT_SYNC_READ) { 1307 type = DDI_DMA_SYNC_FORDEV; 1308 } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) { 1309 type = DDI_DMA_SYNC_FORCPU; 1310 } else { 1311 mutex_exit(&mrhdl->mr_lock); 1312 /* Set "status" and "errormsg" and goto failure */ 1313 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sync type"); 1314 goto mrsync_fail; 1315 } 1316 1317 (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl, 1318 (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type); 1319 mutex_exit(&mrhdl->mr_lock); 1320 } 1321 1322 TAVOR_TNF_EXIT(tavor_mr_sync); 1323 return (DDI_SUCCESS); 1324 1325 mrsync_fail: 1326 TNF_PROBE_1(tavor_mr_sync_fail, TAVOR_TNF_ERROR, "", tnf_string, msg, 1327 errormsg); 1328 TAVOR_TNF_EXIT(tavor_mr_sync); 1329 return (status); 1330 } 1331 1332 1333 /* 1334 * tavor_mw_alloc() 1335 * Context: Can be called from interrupt or base context. 1336 */ 1337 int 1338 tavor_mw_alloc(tavor_state_t *state, tavor_pdhdl_t pd, ibt_mw_flags_t flags, 1339 tavor_mwhdl_t *mwhdl) 1340 { 1341 tavor_rsrc_t *mpt, *rsrc; 1342 tavor_hw_mpt_t mpt_entry; 1343 tavor_mwhdl_t mw; 1344 uint_t sleep; 1345 int status; 1346 char *errormsg; 1347 1348 TAVOR_TNF_ENTER(tavor_mw_alloc); 1349 1350 /* 1351 * Check the sleep flag. Ensure that it is consistent with the 1352 * current thread context (i.e. if we are currently in the interrupt 1353 * context, then we shouldn't be attempting to sleep). 1354 */ 1355 sleep = (flags & IBT_MW_NOSLEEP) ? TAVOR_NOSLEEP : TAVOR_SLEEP; 1356 if ((sleep == TAVOR_SLEEP) && 1357 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1358 /* Set "status" and "errormsg" and goto failure */ 1359 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 1360 goto mwalloc_fail; 1361 } 1362 1363 /* Increment the reference count on the protection domain (PD) */ 1364 tavor_pd_refcnt_inc(pd); 1365 1366 /* 1367 * Allocate an MPT entry (for use as a memory window). Since the 1368 * Tavor hardware uses the MPT entry for memory regions and for 1369 * memory windows, we will fill in this MPT with all the necessary 1370 * parameters for the memory window. And then (just as we do for 1371 * memory regions) ownership will be passed to the hardware in the 1372 * final step below. If we fail here, we must undo the protection 1373 * domain reference count. 1374 */ 1375 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 1376 if (status != DDI_SUCCESS) { 1377 /* Set "status" and "errormsg" and goto failure */ 1378 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 1379 goto mwalloc_fail1; 1380 } 1381 1382 /* 1383 * Allocate the software structure for tracking the memory window (i.e. 1384 * the Tavor Memory Window handle). Note: This is actually the same 1385 * software structure used for tracking memory regions, but since many 1386 * of the same properties are needed, only a single structure is 1387 * necessary. If we fail here, we must undo the protection domain 1388 * reference count and the previous resource allocation. 1389 */ 1390 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 1391 if (status != DDI_SUCCESS) { 1392 /* Set "status" and "errormsg" and goto failure */ 1393 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 1394 goto mwalloc_fail2; 1395 } 1396 mw = (tavor_mwhdl_t)rsrc->tr_addr; 1397 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 1398 1399 /* 1400 * Calculate an "unbound" RKey from MPT index. In much the same way 1401 * as we do for memory regions (above), this key is constructed from 1402 * a "constrained" (which depends on the MPT index) and an 1403 * "unconstrained" portion (which may be arbitrarily chosen). 1404 */ 1405 tavor_mr_keycalc(state, mpt->tr_indx, &mw->mr_rkey); 1406 1407 /* 1408 * Fill in the MPT entry. This is the final step before passing 1409 * ownership of the MPT entry to the Tavor hardware. We use all of 1410 * the information collected/calculated above to fill in the 1411 * requisite portions of the MPT. Note: fewer entries in the MPT 1412 * entry are necessary to allocate a memory window. 1413 */ 1414 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 1415 mpt_entry.reg_win = TAVOR_MPT_IS_WINDOW; 1416 mpt_entry.mem_key = mw->mr_rkey; 1417 mpt_entry.pd = pd->pd_pdnum; 1418 1419 /* 1420 * Write the MPT entry to hardware. Lastly, we pass ownership of 1421 * the entry to the hardware. Note: in general, this operation 1422 * shouldn't fail. But if it does, we have to undo everything we've 1423 * done above before returning error. 1424 */ 1425 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1426 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 1427 if (status != TAVOR_CMD_SUCCESS) { 1428 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 1429 status); 1430 TNF_PROBE_1(tavor_mw_alloc_sw2hw_mpt_cmd_fail, 1431 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 1432 /* Set "status" and "errormsg" and goto failure */ 1433 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 1434 "tavor SW2HW_MPT command"); 1435 goto mwalloc_fail3; 1436 } 1437 1438 /* 1439 * Fill in the rest of the Tavor Memory Window handle. Having 1440 * successfully transferred ownership of the MPT, we can update the 1441 * following fields for use in further operations on the MW. 1442 */ 1443 mw->mr_mptrsrcp = mpt; 1444 mw->mr_pdhdl = pd; 1445 mw->mr_rsrcp = rsrc; 1446 *mwhdl = mw; 1447 1448 TAVOR_TNF_EXIT(tavor_mw_alloc); 1449 return (DDI_SUCCESS); 1450 1451 mwalloc_fail3: 1452 tavor_rsrc_free(state, &rsrc); 1453 mwalloc_fail2: 1454 tavor_rsrc_free(state, &mpt); 1455 mwalloc_fail1: 1456 tavor_pd_refcnt_dec(pd); 1457 mwalloc_fail: 1458 TNF_PROBE_1(tavor_mw_alloc_fail, TAVOR_TNF_ERROR, "", 1459 tnf_string, msg, errormsg); 1460 TAVOR_TNF_EXIT(tavor_mw_alloc); 1461 return (status); 1462 } 1463 1464 1465 /* 1466 * tavor_mw_free() 1467 * Context: Can be called from interrupt or base context. 1468 */ 1469 int 1470 tavor_mw_free(tavor_state_t *state, tavor_mwhdl_t *mwhdl, uint_t sleep) 1471 { 1472 tavor_rsrc_t *mpt, *rsrc; 1473 tavor_mwhdl_t mw; 1474 int status; 1475 char *errormsg; 1476 tavor_pdhdl_t pd; 1477 1478 TAVOR_TNF_ENTER(tavor_mw_free); 1479 1480 /* 1481 * Check the sleep flag. Ensure that it is consistent with the 1482 * current thread context (i.e. if we are currently in the interrupt 1483 * context, then we shouldn't be attempting to sleep). 1484 */ 1485 if ((sleep == TAVOR_SLEEP) && 1486 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1487 /* Set "status" and "errormsg" and goto failure */ 1488 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags"); 1489 TNF_PROBE_1(tavor_mw_free_fail, TAVOR_TNF_ERROR, "", 1490 tnf_string, msg, errormsg); 1491 TAVOR_TNF_EXIT(tavor_mw_free); 1492 return (status); 1493 } 1494 1495 /* 1496 * Pull all the necessary information from the Tavor Memory Window 1497 * handle. This is necessary here because the resource for the 1498 * MW handle is going to be freed up as part of the this operation. 1499 */ 1500 mw = *mwhdl; 1501 mutex_enter(&mw->mr_lock); 1502 mpt = mw->mr_mptrsrcp; 1503 rsrc = mw->mr_rsrcp; 1504 pd = mw->mr_pdhdl; 1505 mutex_exit(&mw->mr_lock); 1506 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 1507 1508 /* 1509 * Reclaim the MPT entry from hardware. Note: in general, it is 1510 * unexpected for this operation to return an error. 1511 */ 1512 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL, 1513 0, mpt->tr_indx, sleep); 1514 if (status != TAVOR_CMD_SUCCESS) { 1515 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: %08x\n", 1516 status); 1517 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail, TAVOR_TNF_ERROR, "", 1518 tnf_uint, status, status); 1519 TAVOR_TNF_EXIT(tavor_mw_free); 1520 return (IBT_INVALID_PARAM); 1521 } 1522 1523 /* Free the Tavor Memory Window handle */ 1524 tavor_rsrc_free(state, &rsrc); 1525 1526 /* Free up the MPT entry resource */ 1527 tavor_rsrc_free(state, &mpt); 1528 1529 /* Decrement the reference count on the protection domain (PD) */ 1530 tavor_pd_refcnt_dec(pd); 1531 1532 /* Set the mwhdl pointer to NULL and return success */ 1533 *mwhdl = NULL; 1534 1535 TAVOR_TNF_EXIT(tavor_mw_free); 1536 return (DDI_SUCCESS); 1537 } 1538 1539 1540 /* 1541 * tavor_mr_keycalc() 1542 * Context: Can be called from interrupt or base context. 1543 */ 1544 void 1545 tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key) 1546 { 1547 uint32_t tmp, log_num_mpt; 1548 1549 /* 1550 * Generate a simple key from counter. Note: We increment this 1551 * static variable _intentionally_ without any kind of mutex around 1552 * it. First, single-threading all operations through a single lock 1553 * would be a bad idea (from a performance point-of-view). Second, 1554 * the upper "unconstrained" bits don't really have to be unique 1555 * because the lower bits are guaranteed to be (although we do make a 1556 * best effort to ensure that they are). Third, the window for the 1557 * race (where both threads read and update the counter at the same 1558 * time) is incredibly small. 1559 * And, lastly, we'd like to make this into a "random" key XXX 1560 */ 1561 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(tavor_debug_memkey_cnt)) 1562 log_num_mpt = state->ts_cfg_profile->cp_log_num_mpt; 1563 tmp = (tavor_debug_memkey_cnt++) << log_num_mpt; 1564 *key = tmp | indx; 1565 } 1566 1567 1568 /* 1569 * tavor_mr_common_reg() 1570 * Context: Can be called from interrupt or base context. 1571 */ 1572 static int 1573 tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd, 1574 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op) 1575 { 1576 tavor_rsrc_pool_info_t *rsrc_pool; 1577 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 1578 tavor_umap_db_entry_t *umapdb; 1579 tavor_sw_refcnt_t *swrc_tmp; 1580 tavor_hw_mpt_t mpt_entry; 1581 tavor_mrhdl_t mr; 1582 ibt_mr_flags_t flags; 1583 tavor_bind_info_t *bh; 1584 ddi_dma_handle_t bind_dmahdl; 1585 ddi_umem_cookie_t umem_cookie; 1586 size_t umem_len; 1587 caddr_t umem_addr; 1588 uint64_t mtt_addr, mtt_ddrbaseaddr, max_sz; 1589 uint_t sleep, mtt_pgsize_bits, bind_type, mr_is_umem; 1590 int status, umem_flags, bind_override_addr; 1591 char *errormsg; 1592 1593 TAVOR_TNF_ENTER(tavor_mr_common_reg); 1594 1595 /* 1596 * Check the "options" flag. Currently this flag tells the driver 1597 * whether or not the region should be bound normally (i.e. with 1598 * entries written into the PCI IOMMU), whether it should be 1599 * registered to bypass the IOMMU, and whether or not the resulting 1600 * address should be "zero-based" (to aid the alignment restrictions 1601 * for QPs). 1602 */ 1603 if (op == NULL) { 1604 bind_type = TAVOR_BINDMEM_NORMAL; 1605 bind_dmahdl = NULL; 1606 bind_override_addr = 0; 1607 } else { 1608 bind_type = op->mro_bind_type; 1609 bind_dmahdl = op->mro_bind_dmahdl; 1610 bind_override_addr = op->mro_bind_override_addr; 1611 } 1612 1613 /* Extract the flags field from the tavor_bind_info_t */ 1614 flags = bind->bi_flags; 1615 1616 /* 1617 * Check for invalid length. Check is the length is zero or if the 1618 * length is larger than the maximum configured value. Return error 1619 * if it is. 1620 */ 1621 max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz); 1622 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 1623 /* Set "status" and "errormsg" and goto failure */ 1624 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length"); 1625 goto mrcommon_fail; 1626 } 1627 1628 /* 1629 * Check the sleep flag. Ensure that it is consistent with the 1630 * current thread context (i.e. if we are currently in the interrupt 1631 * context, then we shouldn't be attempting to sleep). 1632 */ 1633 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1634 if ((sleep == TAVOR_SLEEP) && 1635 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1636 /* Set "status" and "errormsg" and goto failure */ 1637 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 1638 goto mrcommon_fail; 1639 } 1640 1641 /* 1642 * Get the base address for the MTT table. This will be necessary 1643 * below when we are setting up the MPT entry. 1644 */ 1645 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 1646 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 1647 1648 /* Increment the reference count on the protection domain (PD) */ 1649 tavor_pd_refcnt_inc(pd); 1650 1651 /* 1652 * Allocate an MPT entry. This will be filled in with all the 1653 * necessary parameters to define the memory region. And then 1654 * ownership will be passed to the hardware in the final step 1655 * below. If we fail here, we must undo the protection domain 1656 * reference count. 1657 */ 1658 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 1659 if (status != DDI_SUCCESS) { 1660 /* Set "status" and "errormsg" and goto failure */ 1661 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 1662 goto mrcommon_fail1; 1663 } 1664 1665 /* 1666 * Allocate the software structure for tracking the memory region (i.e. 1667 * the Tavor Memory Region handle). If we fail here, we must undo 1668 * the protection domain reference count and the previous resource 1669 * allocation. 1670 */ 1671 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 1672 if (status != DDI_SUCCESS) { 1673 /* Set "status" and "errormsg" and goto failure */ 1674 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 1675 goto mrcommon_fail2; 1676 } 1677 mr = (tavor_mrhdl_t)rsrc->tr_addr; 1678 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 1679 1680 /* 1681 * Setup and validate the memory region access flags. This means 1682 * translating the IBTF's enable flags into the access flags that 1683 * will be used in later operations. 1684 */ 1685 mr->mr_accflag = 0; 1686 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 1687 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 1688 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 1689 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 1690 if (flags & IBT_MR_ENABLE_REMOTE_READ) 1691 mr->mr_accflag |= IBT_MR_REMOTE_READ; 1692 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 1693 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 1694 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 1695 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 1696 1697 /* 1698 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 1699 * from a certain number of "constrained" bits (the least significant 1700 * bits) and some number of "unconstrained" bits. The constrained 1701 * bits must be set to the index of the entry in the MPT table, but 1702 * the unconstrained bits can be set to any value we wish. Note: 1703 * if no remote access is required, then the RKey value is not filled 1704 * in. Otherwise both Rkey and LKey are given the same value. 1705 */ 1706 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 1707 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1708 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1709 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1710 mr->mr_rkey = mr->mr_lkey; 1711 } 1712 1713 /* 1714 * Determine if the memory is from userland and pin the pages 1715 * with umem_lockmemory() if necessary. 1716 * Then, if this is userland memory, allocate an entry in the 1717 * "userland resources database". This will later be added to 1718 * the database (after all further memory registration operations are 1719 * successful). If we fail here, we must undo the reference counts 1720 * and the previous resource allocations. 1721 */ 1722 mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0); 1723 if (mr_is_umem) { 1724 umem_len = ptob(btopr(bind->bi_len + 1725 ((uintptr_t)bind->bi_addr & PAGEOFFSET))); 1726 umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET); 1727 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 1728 DDI_UMEMLOCK_LONGTERM); 1729 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 1730 &umem_cookie, &tavor_umem_cbops, NULL); 1731 if (status != 0) { 1732 /* Set "status" and "errormsg" and goto failure */ 1733 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin"); 1734 goto mrcommon_fail3; 1735 } 1736 1737 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1738 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1739 1740 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len, 1741 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 1742 if (bind->bi_buf == NULL) { 1743 /* Set "status" and "errormsg" and goto failure */ 1744 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed iosetup"); 1745 goto mrcommon_fail3; 1746 } 1747 bind->bi_type = TAVOR_BINDHDL_UBUF; 1748 bind->bi_buf->b_flags |= B_READ; 1749 1750 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1751 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1752 1753 umapdb = tavor_umap_db_alloc(state->ts_instance, 1754 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 1755 (uint64_t)(uintptr_t)rsrc); 1756 if (umapdb == NULL) { 1757 /* Set "status" and "errormsg" and goto failure */ 1758 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add"); 1759 goto mrcommon_fail4; 1760 } 1761 } 1762 1763 /* 1764 * Setup the bindinfo for the mtt bind call 1765 */ 1766 bh = &mr->mr_bindinfo; 1767 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh)) 1768 bcopy(bind, bh, sizeof (tavor_bind_info_t)); 1769 bh->bi_bypass = bind_type; 1770 status = tavor_mr_mtt_bind(state, bh, bind_dmahdl, &mtt, 1771 &mtt_pgsize_bits); 1772 if (status != DDI_SUCCESS) { 1773 /* Set "status" and "errormsg" and goto failure */ 1774 TAVOR_TNF_FAIL(status, "failed mtt bind"); 1775 /* 1776 * When mtt_bind fails, freerbuf has already been done, 1777 * so make sure not to call it again. 1778 */ 1779 bind->bi_type = bh->bi_type; 1780 goto mrcommon_fail5; 1781 } 1782 mr->mr_logmttpgsz = mtt_pgsize_bits; 1783 1784 /* 1785 * Allocate MTT reference count (to track shared memory regions). 1786 * This reference count resource may never be used on the given 1787 * memory region, but if it is ever later registered as "shared" 1788 * memory region then this resource will be necessary. If we fail 1789 * here, we do pretty much the same as above to clean up. 1790 */ 1791 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, sleep, 1792 &mtt_refcnt); 1793 if (status != DDI_SUCCESS) { 1794 /* Set "status" and "errormsg" and goto failure */ 1795 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed refence count"); 1796 goto mrcommon_fail6; 1797 } 1798 mr->mr_mttrefcntp = mtt_refcnt; 1799 swrc_tmp = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr; 1800 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp)) 1801 TAVOR_MTT_REFCNT_INIT(swrc_tmp); 1802 1803 /* 1804 * Fill in the MPT entry. This is the final step before passing 1805 * ownership of the MPT entry to the Tavor hardware. We use all of 1806 * the information collected/calculated above to fill in the 1807 * requisite portions of the MPT. 1808 */ 1809 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 1810 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 1811 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 1812 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 1813 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 1814 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 1815 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 1816 mpt_entry.lr = 1; 1817 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 1818 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 1819 mpt_entry.mem_key = mr->mr_lkey; 1820 mpt_entry.pd = pd->pd_pdnum; 1821 if (bind_override_addr == 0) { 1822 mpt_entry.start_addr = bh->bi_addr; 1823 } else { 1824 bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1); 1825 mpt_entry.start_addr = bh->bi_addr; 1826 } 1827 mpt_entry.reg_win_len = bh->bi_len; 1828 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 1829 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 1830 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 1831 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 1832 1833 /* 1834 * Write the MPT entry to hardware. Lastly, we pass ownership of 1835 * the entry to the hardware. Note: in general, this operation 1836 * shouldn't fail. But if it does, we have to undo everything we've 1837 * done above before returning error. 1838 */ 1839 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1840 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 1841 if (status != TAVOR_CMD_SUCCESS) { 1842 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 1843 status); 1844 TNF_PROBE_1(tavor_mr_common_reg_sw2hw_mpt_cmd_fail, 1845 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 1846 /* Set "status" and "errormsg" and goto failure */ 1847 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 1848 "tavor SW2HW_MPT command"); 1849 goto mrcommon_fail7; 1850 } 1851 1852 /* 1853 * Fill in the rest of the Tavor Memory Region handle. Having 1854 * successfully transferred ownership of the MPT, we can update the 1855 * following fields for use in further operations on the MR. 1856 */ 1857 mr->mr_mptrsrcp = mpt; 1858 mr->mr_mttrsrcp = mtt; 1859 mr->mr_pdhdl = pd; 1860 mr->mr_rsrcp = rsrc; 1861 mr->mr_is_umem = mr_is_umem; 1862 mr->mr_is_fmr = 0; 1863 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 1864 mr->mr_umem_cbfunc = NULL; 1865 mr->mr_umem_cbarg1 = NULL; 1866 mr->mr_umem_cbarg2 = NULL; 1867 1868 /* 1869 * If this is userland memory, then we need to insert the previously 1870 * allocated entry into the "userland resources database". This will 1871 * allow for later coordination between the tavor_umap_umemlock_cb() 1872 * callback and tavor_mr_deregister(). 1873 */ 1874 if (mr_is_umem) { 1875 tavor_umap_db_add(umapdb); 1876 } 1877 1878 *mrhdl = mr; 1879 1880 TAVOR_TNF_EXIT(tavor_mr_common_reg); 1881 return (DDI_SUCCESS); 1882 1883 /* 1884 * The following is cleanup for all possible failure cases in this routine 1885 */ 1886 mrcommon_fail7: 1887 tavor_rsrc_free(state, &mtt_refcnt); 1888 mrcommon_fail6: 1889 tavor_rsrc_free(state, &mtt); 1890 tavor_mr_mem_unbind(state, bh); 1891 bind->bi_type = bh->bi_type; 1892 mrcommon_fail5: 1893 if (mr_is_umem) { 1894 tavor_umap_db_free(umapdb); 1895 } 1896 mrcommon_fail4: 1897 if (mr_is_umem) { 1898 /* 1899 * Free up the memory ddi_umem_iosetup() allocates 1900 * internally. 1901 */ 1902 if (bind->bi_type == TAVOR_BINDHDL_UBUF) { 1903 freerbuf(bind->bi_buf); 1904 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1905 bind->bi_type = TAVOR_BINDHDL_NONE; 1906 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1907 } 1908 ddi_umem_unlock(umem_cookie); 1909 } 1910 mrcommon_fail3: 1911 tavor_rsrc_free(state, &rsrc); 1912 mrcommon_fail2: 1913 tavor_rsrc_free(state, &mpt); 1914 mrcommon_fail1: 1915 tavor_pd_refcnt_dec(pd); 1916 mrcommon_fail: 1917 TNF_PROBE_1(tavor_mr_common_reg_fail, TAVOR_TNF_ERROR, "", 1918 tnf_string, msg, errormsg); 1919 TAVOR_TNF_EXIT(tavor_mr_common_reg); 1920 return (status); 1921 } 1922 1923 /* 1924 * tavor_mr_mtt_bind() 1925 * Context: Can be called from interrupt or base context. 1926 */ 1927 int 1928 tavor_mr_mtt_bind(tavor_state_t *state, tavor_bind_info_t *bind, 1929 ddi_dma_handle_t bind_dmahdl, tavor_rsrc_t **mtt, uint_t *mtt_pgsize_bits) 1930 { 1931 uint64_t nummtt; 1932 uint_t sleep; 1933 int status; 1934 char *errormsg; 1935 1936 TAVOR_TNF_ENTER(tavor_mr_common_reg); 1937 1938 /* 1939 * Check the sleep flag. Ensure that it is consistent with the 1940 * current thread context (i.e. if we are currently in the interrupt 1941 * context, then we shouldn't be attempting to sleep). 1942 */ 1943 sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1944 if ((sleep == TAVOR_SLEEP) && 1945 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1946 /* Set "status" and "errormsg" and goto failure */ 1947 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 1948 goto mrmttbind_fail; 1949 } 1950 1951 /* 1952 * Bind the memory and determine the mapped addresses. This is 1953 * the first of two routines that do all the "heavy lifting" for 1954 * the Tavor memory registration routines. The tavor_mr_mem_bind() 1955 * routine takes the "bind" struct with all its fields filled 1956 * in and returns a list of DMA cookies (for the PCI mapped addresses 1957 * corresponding to the specified address region) which are used by 1958 * the tavor_mr_fast_mtt_write() routine below. If we fail here, we 1959 * must undo all the previous resource allocation (and PD reference 1960 * count). 1961 */ 1962 status = tavor_mr_mem_bind(state, bind, bind_dmahdl, sleep); 1963 if (status != DDI_SUCCESS) { 1964 /* Set "status" and "errormsg" and goto failure */ 1965 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind"); 1966 goto mrmttbind_fail; 1967 } 1968 1969 /* 1970 * Determine number of pages spanned. This routine uses the 1971 * information in the "bind" struct to determine the required 1972 * number of MTT entries needed (and returns the suggested page size - 1973 * as a "power-of-2" - for each MTT entry). 1974 */ 1975 nummtt = tavor_mr_nummtt_needed(state, bind, mtt_pgsize_bits); 1976 1977 /* 1978 * Allocate the MTT entries. Use the calculations performed above to 1979 * allocate the required number of MTT entries. Note: MTT entries are 1980 * allocated in "MTT segments" which consist of complete cachelines 1981 * (i.e. 8 entries, 16 entries, etc.) So the TAVOR_NUMMTT_TO_MTTSEG() 1982 * macro is used to do the proper conversion. If we fail here, we 1983 * must not only undo all the previous resource allocation (and PD 1984 * reference count), but we must also unbind the memory. 1985 */ 1986 status = tavor_rsrc_alloc(state, TAVOR_MTT, 1987 TAVOR_NUMMTT_TO_MTTSEG(nummtt), sleep, mtt); 1988 if (status != DDI_SUCCESS) { 1989 /* Set "status" and "errormsg" and goto failure */ 1990 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT"); 1991 goto mrmttbind_fail2; 1992 } 1993 1994 /* 1995 * Write the mapped addresses into the MTT entries. This is part two 1996 * of the "heavy lifting" routines that we talked about above. Note: 1997 * we pass the suggested page size from the earlier operation here. 1998 * And if we fail here, we again do pretty much the same huge clean up. 1999 */ 2000 status = tavor_mr_fast_mtt_write(*mtt, bind, *mtt_pgsize_bits); 2001 if (status != DDI_SUCCESS) { 2002 /* Set "status" and "errormsg" and goto failure */ 2003 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "failed write mtt"); 2004 goto mrmttbind_fail3; 2005 } 2006 TAVOR_TNF_EXIT(tavor_mr_mtt_bind); 2007 return (DDI_SUCCESS); 2008 2009 /* 2010 * The following is cleanup for all possible failure cases in this routine 2011 */ 2012 mrmttbind_fail3: 2013 tavor_rsrc_free(state, mtt); 2014 mrmttbind_fail2: 2015 tavor_mr_mem_unbind(state, bind); 2016 mrmttbind_fail: 2017 TNF_PROBE_1(tavor_mr_mtt_bind_fail, TAVOR_TNF_ERROR, "", 2018 tnf_string, msg, errormsg); 2019 TAVOR_TNF_EXIT(tavor_mr_mtt_bind); 2020 return (status); 2021 } 2022 2023 2024 /* 2025 * tavor_mr_mtt_unbind() 2026 * Context: Can be called from interrupt or base context. 2027 */ 2028 int 2029 tavor_mr_mtt_unbind(tavor_state_t *state, tavor_bind_info_t *bind, 2030 tavor_rsrc_t *mtt) 2031 { 2032 TAVOR_TNF_ENTER(tavor_mr_mtt_unbind); 2033 2034 /* 2035 * Free up the MTT entries and unbind the memory. Here, as above, we 2036 * attempt to free these resources only if it is appropriate to do so. 2037 */ 2038 tavor_mr_mem_unbind(state, bind); 2039 tavor_rsrc_free(state, &mtt); 2040 2041 TAVOR_TNF_EXIT(tavor_mr_mtt_unbind); 2042 return (DDI_SUCCESS); 2043 } 2044 2045 2046 /* 2047 * tavor_mr_common_rereg() 2048 * Context: Can be called from interrupt or base context. 2049 */ 2050 static int 2051 tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr, 2052 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new, 2053 tavor_mr_options_t *op) 2054 { 2055 tavor_rsrc_t *mpt; 2056 ibt_mr_attr_flags_t acc_flags_to_use; 2057 ibt_mr_flags_t flags; 2058 tavor_pdhdl_t pd_to_use; 2059 tavor_hw_mpt_t mpt_entry; 2060 uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use; 2061 uint_t sleep, dereg_level; 2062 int status; 2063 char *errormsg; 2064 2065 TAVOR_TNF_ENTER(tavor_mr_common_rereg); 2066 2067 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2068 2069 /* 2070 * Check here to see if the memory region corresponds to a userland 2071 * mapping. Reregistration of userland memory regions is not 2072 * currently supported. Return failure. XXX 2073 */ 2074 if (mr->mr_is_umem) { 2075 /* Set "status" and "errormsg" and goto failure */ 2076 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl"); 2077 goto mrrereg_fail; 2078 } 2079 2080 mutex_enter(&mr->mr_lock); 2081 2082 /* Pull MPT resource pointer from the Tavor Memory Region handle */ 2083 mpt = mr->mr_mptrsrcp; 2084 2085 /* Extract the flags field from the tavor_bind_info_t */ 2086 flags = bind->bi_flags; 2087 2088 /* 2089 * Check the sleep flag. Ensure that it is consistent with the 2090 * current thread context (i.e. if we are currently in the interrupt 2091 * context, then we shouldn't be attempting to sleep). 2092 */ 2093 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 2094 if ((sleep == TAVOR_SLEEP) && 2095 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 2096 mutex_exit(&mr->mr_lock); 2097 /* Set "status" and "errormsg" and goto failure */ 2098 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 2099 goto mrrereg_fail; 2100 } 2101 2102 /* 2103 * First step is to temporarily invalidate the MPT entry. This 2104 * regains ownership from the hardware, and gives us the opportunity 2105 * to modify the entry. Note: The HW2SW_MPT command returns the 2106 * current MPT entry contents. These are saved away here because 2107 * they will be reused in a later step below. If the region has 2108 * bound memory windows that we fail returning an "in use" error code. 2109 * Otherwise, this is an unexpected error and we deregister the 2110 * memory region and return error. 2111 * 2112 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect 2113 * against holding the lock around this rereg call in all contexts. 2114 */ 2115 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry, 2116 sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN); 2117 if (status != TAVOR_CMD_SUCCESS) { 2118 mutex_exit(&mr->mr_lock); 2119 if (status == TAVOR_CMD_REG_BOUND) { 2120 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2121 return (IBT_MR_IN_USE); 2122 } else { 2123 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: " 2124 "%08x\n", status); 2125 2126 /* 2127 * Call deregister and ensure that all current 2128 * resources get freed up 2129 */ 2130 if (tavor_mr_deregister(state, &mr, 2131 TAVOR_MR_DEREG_ALL, sleep) != DDI_SUCCESS) { 2132 TAVOR_WARNING(state, "failed to deregister " 2133 "memory region"); 2134 } 2135 TNF_PROBE_1(tavor_mr_common_rereg_hw2sw_mpt_cmd_fail, 2136 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 2137 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2138 return (ibc_get_ci_failure(0)); 2139 } 2140 } 2141 2142 /* 2143 * If we're changing the protection domain, then validate the new one 2144 */ 2145 if (flags & IBT_MR_CHANGE_PD) { 2146 2147 /* Check for valid PD handle pointer */ 2148 if (pd == NULL) { 2149 mutex_exit(&mr->mr_lock); 2150 /* 2151 * Call deregister and ensure that all current 2152 * resources get properly freed up. Unnecessary 2153 * here to attempt to regain software ownership 2154 * of the MPT entry as that has already been 2155 * done above. 2156 */ 2157 if (tavor_mr_deregister(state, &mr, 2158 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != 2159 DDI_SUCCESS) { 2160 TAVOR_WARNING(state, "failed to deregister " 2161 "memory region"); 2162 } 2163 /* Set "status" and "errormsg" and goto failure */ 2164 TAVOR_TNF_FAIL(IBT_PD_HDL_INVALID, "invalid PD handle"); 2165 goto mrrereg_fail; 2166 } 2167 2168 /* Use the new PD handle in all operations below */ 2169 pd_to_use = pd; 2170 2171 } else { 2172 /* Use the current PD handle in all operations below */ 2173 pd_to_use = mr->mr_pdhdl; 2174 } 2175 2176 /* 2177 * If we're changing access permissions, then validate the new ones 2178 */ 2179 if (flags & IBT_MR_CHANGE_ACCESS) { 2180 /* 2181 * Validate the access flags. Both remote write and remote 2182 * atomic require the local write flag to be set 2183 */ 2184 if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) || 2185 (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) && 2186 !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) { 2187 mutex_exit(&mr->mr_lock); 2188 /* 2189 * Call deregister and ensure that all current 2190 * resources get properly freed up. Unnecessary 2191 * here to attempt to regain software ownership 2192 * of the MPT entry as that has already been 2193 * done above. 2194 */ 2195 if (tavor_mr_deregister(state, &mr, 2196 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != 2197 DDI_SUCCESS) { 2198 TAVOR_WARNING(state, "failed to deregister " 2199 "memory region"); 2200 } 2201 /* Set "status" and "errormsg" and goto failure */ 2202 TAVOR_TNF_FAIL(IBT_MR_ACCESS_REQ_INVALID, 2203 "invalid access flags"); 2204 goto mrrereg_fail; 2205 } 2206 2207 /* 2208 * Setup and validate the memory region access flags. This 2209 * means translating the IBTF's enable flags into the access 2210 * flags that will be used in later operations. 2211 */ 2212 acc_flags_to_use = 0; 2213 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 2214 acc_flags_to_use |= IBT_MR_WINDOW_BIND; 2215 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 2216 acc_flags_to_use |= IBT_MR_LOCAL_WRITE; 2217 if (flags & IBT_MR_ENABLE_REMOTE_READ) 2218 acc_flags_to_use |= IBT_MR_REMOTE_READ; 2219 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 2220 acc_flags_to_use |= IBT_MR_REMOTE_WRITE; 2221 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 2222 acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC; 2223 2224 } else { 2225 acc_flags_to_use = mr->mr_accflag; 2226 } 2227 2228 /* 2229 * If we're modifying the translation, then figure out whether 2230 * we can reuse the current MTT resources. This means calling 2231 * tavor_mr_rereg_xlat_helper() which does most of the heavy lifting 2232 * for the reregistration. If the current memory region contains 2233 * sufficient MTT entries for the new regions, then it will be 2234 * reused and filled in. Otherwise, new entries will be allocated, 2235 * the old ones will be freed, and the new entries will be filled 2236 * in. Note: If we're not modifying the translation, then we 2237 * should already have all the information we need to update the MPT. 2238 * Also note: If tavor_mr_rereg_xlat_helper() fails, it will return 2239 * a "dereg_level" which is the level of cleanup that needs to be 2240 * passed to tavor_mr_deregister() to finish the cleanup. 2241 */ 2242 if (flags & IBT_MR_CHANGE_TRANSLATION) { 2243 status = tavor_mr_rereg_xlat_helper(state, mr, bind, op, 2244 &mtt_addr_to_use, sleep, &dereg_level); 2245 if (status != DDI_SUCCESS) { 2246 mutex_exit(&mr->mr_lock); 2247 /* 2248 * Call deregister and ensure that all resources get 2249 * properly freed up. 2250 */ 2251 if (tavor_mr_deregister(state, &mr, dereg_level, 2252 sleep) != DDI_SUCCESS) { 2253 TAVOR_WARNING(state, "failed to deregister " 2254 "memory region"); 2255 } 2256 2257 /* Set "status" and "errormsg" and goto failure */ 2258 TAVOR_TNF_FAIL(status, "failed rereg helper"); 2259 goto mrrereg_fail; 2260 } 2261 vaddr_to_use = mr->mr_bindinfo.bi_addr; 2262 len_to_use = mr->mr_bindinfo.bi_len; 2263 } else { 2264 mtt_addr_to_use = (((uint64_t)mpt_entry.mttseg_addr_h << 32) | 2265 ((uint64_t)mpt_entry.mttseg_addr_l << 6)); 2266 vaddr_to_use = mr->mr_bindinfo.bi_addr; 2267 len_to_use = mr->mr_bindinfo.bi_len; 2268 } 2269 2270 /* 2271 * Calculate new keys (Lkey, Rkey) from MPT index. Just like they were 2272 * when the region was first registered, each key is formed from 2273 * "constrained" bits and "unconstrained" bits. Note: If no remote 2274 * access is required, then the RKey value is not filled in. Otherwise 2275 * both Rkey and LKey are given the same value. 2276 */ 2277 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 2278 if ((acc_flags_to_use & IBT_MR_REMOTE_READ) || 2279 (acc_flags_to_use & IBT_MR_REMOTE_WRITE) || 2280 (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) { 2281 mr->mr_rkey = mr->mr_lkey; 2282 } 2283 2284 /* 2285 * Update the MPT entry with the new information. Some of this 2286 * information is retained from the previous operation, some of 2287 * it is new based on request. 2288 */ 2289 mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND) ? 1 : 0; 2290 mpt_entry.atomic = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 2291 mpt_entry.rw = (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ? 1 : 0; 2292 mpt_entry.rr = (acc_flags_to_use & IBT_MR_REMOTE_READ) ? 1 : 0; 2293 mpt_entry.lw = (acc_flags_to_use & IBT_MR_LOCAL_WRITE) ? 1 : 0; 2294 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 2295 mpt_entry.mem_key = mr->mr_lkey; 2296 mpt_entry.pd = pd_to_use->pd_pdnum; 2297 mpt_entry.start_addr = vaddr_to_use; 2298 mpt_entry.reg_win_len = len_to_use; 2299 mpt_entry.mttseg_addr_h = mtt_addr_to_use >> 32; 2300 mpt_entry.mttseg_addr_l = mtt_addr_to_use >> 6; 2301 2302 /* 2303 * Write the updated MPT entry to hardware 2304 * 2305 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect 2306 * against holding the lock around this rereg call in all contexts. 2307 */ 2308 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 2309 sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN); 2310 if (status != TAVOR_CMD_SUCCESS) { 2311 mutex_exit(&mr->mr_lock); 2312 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 2313 status); 2314 /* 2315 * Call deregister and ensure that all current resources get 2316 * properly freed up. Unnecessary here to attempt to regain 2317 * software ownership of the MPT entry as that has already 2318 * been done above. 2319 */ 2320 if (tavor_mr_deregister(state, &mr, 2321 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) { 2322 TAVOR_WARNING(state, "failed to deregister memory " 2323 "region"); 2324 } 2325 TNF_PROBE_1(tavor_mr_common_rereg_sw2hw_mpt_cmd_fail, 2326 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 2327 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2328 return (ibc_get_ci_failure(0)); 2329 } 2330 2331 /* 2332 * If we're changing PD, then update their reference counts now. 2333 * This means decrementing the reference count on the old PD and 2334 * incrementing the reference count on the new PD. 2335 */ 2336 if (flags & IBT_MR_CHANGE_PD) { 2337 tavor_pd_refcnt_dec(mr->mr_pdhdl); 2338 tavor_pd_refcnt_inc(pd); 2339 } 2340 2341 /* 2342 * Update the contents of the Tavor Memory Region handle to reflect 2343 * what has been changed. 2344 */ 2345 mr->mr_pdhdl = pd_to_use; 2346 mr->mr_accflag = acc_flags_to_use; 2347 mr->mr_is_umem = 0; 2348 mr->mr_is_fmr = 0; 2349 mr->mr_umemcookie = NULL; 2350 2351 /* New MR handle is same as the old */ 2352 *mrhdl_new = mr; 2353 mutex_exit(&mr->mr_lock); 2354 2355 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2356 return (DDI_SUCCESS); 2357 2358 mrrereg_fail: 2359 TNF_PROBE_1(tavor_mr_common_rereg_fail, TAVOR_TNF_ERROR, "", 2360 tnf_string, msg, errormsg); 2361 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2362 return (status); 2363 } 2364 2365 2366 /* 2367 * tavor_mr_rereg_xlat_helper 2368 * Context: Can be called from interrupt or base context. 2369 * Note: This routine expects the "mr_lock" to be held when it 2370 * is called. Upon returning failure, this routine passes information 2371 * about what "dereg_level" should be passed to tavor_mr_deregister(). 2372 */ 2373 static int 2374 tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr, 2375 tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr, 2376 uint_t sleep, uint_t *dereg_level) 2377 { 2378 tavor_rsrc_pool_info_t *rsrc_pool; 2379 tavor_rsrc_t *mtt, *mtt_refcnt; 2380 tavor_sw_refcnt_t *swrc_old, *swrc_new; 2381 ddi_dma_handle_t dmahdl; 2382 uint64_t nummtt_needed, nummtt_in_currrsrc, max_sz; 2383 uint64_t mtt_ddrbaseaddr; 2384 uint_t mtt_pgsize_bits, bind_type, reuse_dmahdl; 2385 int status; 2386 char *errormsg; 2387 2388 TAVOR_TNF_ENTER(tavor_mr_rereg_xlat_helper); 2389 2390 ASSERT(MUTEX_HELD(&mr->mr_lock)); 2391 2392 /* 2393 * Check the "options" flag. Currently this flag tells the driver 2394 * whether or not the region should be bound normally (i.e. with 2395 * entries written into the PCI IOMMU) or whether it should be 2396 * registered to bypass the IOMMU. 2397 */ 2398 if (op == NULL) { 2399 bind_type = TAVOR_BINDMEM_NORMAL; 2400 } else { 2401 bind_type = op->mro_bind_type; 2402 } 2403 2404 /* 2405 * Check for invalid length. Check is the length is zero or if the 2406 * length is larger than the maximum configured value. Return error 2407 * if it is. 2408 */ 2409 max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz); 2410 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 2411 /* 2412 * Deregister will be called upon returning failure from this 2413 * routine. This will ensure that all current resources get 2414 * properly freed up. Unnecessary to attempt to regain 2415 * software ownership of the MPT entry as that has already 2416 * been done above (in tavor_mr_reregister()) 2417 */ 2418 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT; 2419 2420 /* Set "status" and "errormsg" and goto failure */ 2421 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length"); 2422 goto mrrereghelp_fail; 2423 } 2424 2425 /* 2426 * Determine the number of pages necessary for new region and the 2427 * number of pages supported by the current MTT resources 2428 */ 2429 nummtt_needed = tavor_mr_nummtt_needed(state, bind, &mtt_pgsize_bits); 2430 nummtt_in_currrsrc = mr->mr_mttrsrcp->tr_len >> TAVOR_MTT_SIZE_SHIFT; 2431 2432 /* 2433 * Depending on whether we have enough pages or not, the next step is 2434 * to fill in a set of MTT entries that reflect the new mapping. In 2435 * the first case below, we already have enough entries. This means 2436 * we need to unbind the memory from the previous mapping, bind the 2437 * memory for the new mapping, write the new MTT entries, and update 2438 * the mr to reflect the changes. 2439 * In the second case below, we do not have enough entries in the 2440 * current mapping. So, in this case, we need not only to unbind the 2441 * current mapping, but we need to free up the MTT resources associated 2442 * with that mapping. After we've successfully done that, we continue 2443 * by binding the new memory, allocating new MTT entries, writing the 2444 * new MTT entries, and updating the mr to reflect the changes. 2445 */ 2446 2447 /* 2448 * If this region is being shared (i.e. MTT refcount != 1), then we 2449 * can't reuse the current MTT resources regardless of their size. 2450 * Instead we'll need to alloc new ones (below) just as if there 2451 * hadn't been enough room in the current entries. 2452 */ 2453 swrc_old = (tavor_sw_refcnt_t *)mr->mr_mttrefcntp->tr_addr; 2454 if (TAVOR_MTT_IS_NOT_SHARED(swrc_old) && 2455 (nummtt_needed <= nummtt_in_currrsrc)) { 2456 2457 /* 2458 * Unbind the old mapping for this memory region, but retain 2459 * the ddi_dma_handle_t (if possible) for reuse in the bind 2460 * operation below. Note: If original memory region was 2461 * bound for IOMMU bypass and the new region can not use 2462 * bypass, then a new DMA handle will be necessary. 2463 */ 2464 if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2465 mr->mr_bindinfo.bi_free_dmahdl = 0; 2466 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2467 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2468 reuse_dmahdl = 1; 2469 } else { 2470 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2471 dmahdl = NULL; 2472 reuse_dmahdl = 0; 2473 } 2474 2475 /* 2476 * Bind the new memory and determine the mapped addresses. 2477 * As described, this routine and tavor_mr_fast_mtt_write() 2478 * do the majority of the work for the memory registration 2479 * operations. Note: When we successfully finish the binding, 2480 * we will set the "bi_free_dmahdl" flag to indicate that 2481 * even though we may have reused the ddi_dma_handle_t we do 2482 * wish it to be freed up at some later time. Note also that 2483 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2484 */ 2485 bind->bi_bypass = bind_type; 2486 status = tavor_mr_mem_bind(state, bind, dmahdl, sleep); 2487 if (status != DDI_SUCCESS) { 2488 if (reuse_dmahdl) { 2489 ddi_dma_free_handle(&dmahdl); 2490 } 2491 2492 /* 2493 * Deregister will be called upon returning failure 2494 * from this routine. This will ensure that all 2495 * current resources get properly freed up. 2496 * Unnecessary to attempt to regain software ownership 2497 * of the MPT entry as that has already been done 2498 * above (in tavor_mr_reregister()). Also unnecessary 2499 * to attempt to unbind the memory. 2500 */ 2501 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2502 2503 /* Set "status" and "errormsg" and goto failure */ 2504 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind"); 2505 goto mrrereghelp_fail; 2506 } 2507 if (reuse_dmahdl) { 2508 bind->bi_free_dmahdl = 1; 2509 } 2510 2511 /* 2512 * Using the new mapping, but reusing the current MTT 2513 * resources, write the updated entries to MTT 2514 */ 2515 mtt = mr->mr_mttrsrcp; 2516 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits); 2517 if (status != DDI_SUCCESS) { 2518 /* 2519 * Deregister will be called upon returning failure 2520 * from this routine. This will ensure that all 2521 * current resources get properly freed up. 2522 * Unnecessary to attempt to regain software ownership 2523 * of the MPT entry as that has already been done 2524 * above (in tavor_mr_reregister()). Also unnecessary 2525 * to attempt to unbind the memory. 2526 * 2527 * But we do need to unbind the newly bound memory 2528 * before returning. 2529 */ 2530 tavor_mr_mem_unbind(state, bind); 2531 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2532 2533 /* Set "status" and "errormsg" and goto failure */ 2534 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 2535 "failed write mtt"); 2536 goto mrrereghelp_fail; 2537 } 2538 2539 /* Put the updated information into the Mem Region handle */ 2540 mr->mr_bindinfo = *bind; 2541 mr->mr_logmttpgsz = mtt_pgsize_bits; 2542 2543 } else { 2544 /* 2545 * Check if the memory region MTT is shared by any other MRs. 2546 * Since the resource may be shared between multiple memory 2547 * regions (as a result of a "RegisterSharedMR()" verb) it is 2548 * important that we not unbind any resources prematurely. 2549 */ 2550 if (!TAVOR_MTT_IS_SHARED(swrc_old)) { 2551 /* 2552 * Unbind the old mapping for this memory region, but 2553 * retain the ddi_dma_handle_t for reuse in the bind 2554 * operation below. Note: This can only be done here 2555 * because the region being reregistered is not 2556 * currently shared. Also if original memory region 2557 * was bound for IOMMU bypass and the new region can 2558 * not use bypass, then a new DMA handle will be 2559 * necessary. 2560 */ 2561 if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2562 mr->mr_bindinfo.bi_free_dmahdl = 0; 2563 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2564 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2565 reuse_dmahdl = 1; 2566 } else { 2567 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2568 dmahdl = NULL; 2569 reuse_dmahdl = 0; 2570 } 2571 } else { 2572 dmahdl = NULL; 2573 reuse_dmahdl = 0; 2574 } 2575 2576 /* 2577 * Bind the new memory and determine the mapped addresses. 2578 * As described, this routine and tavor_mr_fast_mtt_write() 2579 * do the majority of the work for the memory registration 2580 * operations. Note: When we successfully finish the binding, 2581 * we will set the "bi_free_dmahdl" flag to indicate that 2582 * even though we may have reused the ddi_dma_handle_t we do 2583 * wish it to be freed up at some later time. Note also that 2584 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2585 */ 2586 bind->bi_bypass = bind_type; 2587 status = tavor_mr_mem_bind(state, bind, dmahdl, sleep); 2588 if (status != DDI_SUCCESS) { 2589 if (reuse_dmahdl) { 2590 ddi_dma_free_handle(&dmahdl); 2591 } 2592 2593 /* 2594 * Deregister will be called upon returning failure 2595 * from this routine. This will ensure that all 2596 * current resources get properly freed up. 2597 * Unnecessary to attempt to regain software ownership 2598 * of the MPT entry as that has already been done 2599 * above (in tavor_mr_reregister()). Also unnecessary 2600 * to attempt to unbind the memory. 2601 */ 2602 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2603 2604 /* Set "status" and "errormsg" and goto failure */ 2605 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind"); 2606 goto mrrereghelp_fail; 2607 } 2608 if (reuse_dmahdl) { 2609 bind->bi_free_dmahdl = 1; 2610 } 2611 2612 /* 2613 * Allocate the new MTT entries resource 2614 */ 2615 status = tavor_rsrc_alloc(state, TAVOR_MTT, 2616 TAVOR_NUMMTT_TO_MTTSEG(nummtt_needed), sleep, &mtt); 2617 if (status != DDI_SUCCESS) { 2618 /* 2619 * Deregister will be called upon returning failure 2620 * from this routine. This will ensure that all 2621 * current resources get properly freed up. 2622 * Unnecessary to attempt to regain software ownership 2623 * of the MPT entry as that has already been done 2624 * above (in tavor_mr_reregister()). Also unnecessary 2625 * to attempt to unbind the memory. 2626 * 2627 * But we do need to unbind the newly bound memory 2628 * before returning. 2629 */ 2630 tavor_mr_mem_unbind(state, bind); 2631 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2632 2633 /* Set "status" and "errormsg" and goto failure */ 2634 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT"); 2635 goto mrrereghelp_fail; 2636 } 2637 2638 /* 2639 * Allocate MTT reference count (to track shared memory 2640 * regions). As mentioned elsewhere above, this reference 2641 * count resource may never be used on the given memory region, 2642 * but if it is ever later registered as a "shared" memory 2643 * region then this resource will be necessary. Note: This 2644 * is only necessary here if the existing memory region is 2645 * already being shared (because otherwise we already have 2646 * a useable reference count resource). 2647 */ 2648 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2649 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, 2650 sleep, &mtt_refcnt); 2651 if (status != DDI_SUCCESS) { 2652 /* 2653 * Deregister will be called upon returning 2654 * failure from this routine. This will ensure 2655 * that all current resources get properly 2656 * freed up. Unnecessary to attempt to regain 2657 * software ownership of the MPT entry as that 2658 * has already been done above (in 2659 * tavor_mr_reregister()). Also unnecessary 2660 * to attempt to unbind the memory. 2661 * 2662 * But we need to unbind the newly bound 2663 * memory and free up the newly allocated MTT 2664 * entries before returning. 2665 */ 2666 tavor_mr_mem_unbind(state, bind); 2667 tavor_rsrc_free(state, &mtt); 2668 *dereg_level = 2669 TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2670 2671 /* Set "status"/"errormsg", goto failure */ 2672 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, 2673 "failed reference count"); 2674 goto mrrereghelp_fail; 2675 } 2676 swrc_new = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr; 2677 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new)) 2678 TAVOR_MTT_REFCNT_INIT(swrc_new); 2679 } else { 2680 mtt_refcnt = mr->mr_mttrefcntp; 2681 } 2682 2683 /* 2684 * Using the new mapping and the new MTT resources, write the 2685 * updated entries to MTT 2686 */ 2687 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits); 2688 if (status != DDI_SUCCESS) { 2689 /* 2690 * Deregister will be called upon returning failure 2691 * from this routine. This will ensure that all 2692 * current resources get properly freed up. 2693 * Unnecessary to attempt to regain software ownership 2694 * of the MPT entry as that has already been done 2695 * above (in tavor_mr_reregister()). Also unnecessary 2696 * to attempt to unbind the memory. 2697 * 2698 * But we need to unbind the newly bound memory, 2699 * free up the newly allocated MTT entries, and 2700 * (possibly) free the new MTT reference count 2701 * resource before returning. 2702 */ 2703 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2704 tavor_rsrc_free(state, &mtt_refcnt); 2705 } 2706 tavor_mr_mem_unbind(state, bind); 2707 tavor_rsrc_free(state, &mtt); 2708 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2709 2710 /* Set "status" and "errormsg" and goto failure */ 2711 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed write mtt"); 2712 goto mrrereghelp_fail; 2713 } 2714 2715 /* 2716 * Check if the memory region MTT is shared by any other MRs. 2717 * Since the resource may be shared between multiple memory 2718 * regions (as a result of a "RegisterSharedMR()" verb) it is 2719 * important that we not free up any resources prematurely. 2720 */ 2721 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2722 /* Decrement MTT reference count for "old" region */ 2723 (void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp); 2724 } else { 2725 /* Free up the old MTT entries resource */ 2726 tavor_rsrc_free(state, &mr->mr_mttrsrcp); 2727 } 2728 2729 /* Put the updated information into the mrhdl */ 2730 mr->mr_bindinfo = *bind; 2731 mr->mr_logmttpgsz = mtt_pgsize_bits; 2732 mr->mr_mttrsrcp = mtt; 2733 mr->mr_mttrefcntp = mtt_refcnt; 2734 } 2735 2736 /* 2737 * Calculate and return the updated MTT address (in the DDR address 2738 * space). This will be used by the caller (tavor_mr_reregister) in 2739 * the updated MPT entry 2740 */ 2741 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 2742 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 2743 *mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << 2744 TAVOR_MTT_SIZE_SHIFT); 2745 2746 TAVOR_TNF_EXIT(tavor_mr_rereg_xlat_helper); 2747 return (DDI_SUCCESS); 2748 2749 mrrereghelp_fail: 2750 TNF_PROBE_1(tavor_mr_rereg_xlat_helper_fail, TAVOR_TNF_ERROR, "", 2751 tnf_string, msg, errormsg); 2752 TAVOR_TNF_EXIT(tavor_mr_rereg_xlat_helper); 2753 return (status); 2754 } 2755 2756 2757 /* 2758 * tavor_mr_nummtt_needed() 2759 * Context: Can be called from interrupt or base context. 2760 */ 2761 /* ARGSUSED */ 2762 static uint64_t 2763 tavor_mr_nummtt_needed(tavor_state_t *state, tavor_bind_info_t *bind, 2764 uint_t *mtt_pgsize_bits) 2765 { 2766 uint64_t pg_offset_mask; 2767 uint64_t pg_offset, tmp_length; 2768 2769 /* 2770 * For now we specify the page size as 8Kb (the default page size for 2771 * the sun4u architecture), or 4Kb for x86. Figure out optimal page 2772 * size by examining the dmacookies XXX 2773 */ 2774 *mtt_pgsize_bits = PAGESHIFT; 2775 2776 pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1; 2777 pg_offset = bind->bi_addr & pg_offset_mask; 2778 tmp_length = pg_offset + (bind->bi_len - 1); 2779 return ((tmp_length >> *mtt_pgsize_bits) + 1); 2780 } 2781 2782 2783 /* 2784 * tavor_mr_mem_bind() 2785 * Context: Can be called from interrupt or base context. 2786 */ 2787 static int 2788 tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind, 2789 ddi_dma_handle_t dmahdl, uint_t sleep) 2790 { 2791 ddi_dma_attr_t dma_attr; 2792 int (*callback)(caddr_t); 2793 uint_t dma_xfer_mode; 2794 int status; 2795 2796 /* bi_type must be set to a meaningful value to get a bind handle */ 2797 ASSERT(bind->bi_type == TAVOR_BINDHDL_VADDR || 2798 bind->bi_type == TAVOR_BINDHDL_BUF || 2799 bind->bi_type == TAVOR_BINDHDL_UBUF); 2800 2801 TAVOR_TNF_ENTER(tavor_mr_mem_bind); 2802 2803 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2804 2805 /* Set the callback flag appropriately */ 2806 callback = (sleep == TAVOR_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT; 2807 2808 /* Determine whether to map STREAMING or CONSISTENT */ 2809 dma_xfer_mode = (bind->bi_flags & IBT_MR_NONCOHERENT) ? 2810 DDI_DMA_STREAMING : DDI_DMA_CONSISTENT; 2811 2812 /* 2813 * Initialize many of the default DMA attributes. Then, if we're 2814 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag. 2815 */ 2816 if (dmahdl == NULL) { 2817 tavor_dma_attr_init(&dma_attr); 2818 #ifdef __sparc 2819 /* 2820 * First, disable streaming and switch to consistent if 2821 * configured to do so and IOMMU BYPASS is enabled. 2822 */ 2823 if (state->ts_cfg_profile->cp_disable_streaming_on_bypass && 2824 dma_xfer_mode == DDI_DMA_STREAMING && 2825 bind->bi_bypass == TAVOR_BINDMEM_BYPASS) { 2826 dma_xfer_mode = DDI_DMA_CONSISTENT; 2827 } 2828 2829 /* 2830 * Then, if streaming is still specified, then "bypass" is not 2831 * allowed. 2832 */ 2833 if ((dma_xfer_mode == DDI_DMA_CONSISTENT) && 2834 (bind->bi_bypass == TAVOR_BINDMEM_BYPASS)) { 2835 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2836 } 2837 #endif 2838 /* Allocate a DMA handle for the binding */ 2839 status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr, 2840 callback, NULL, &bind->bi_dmahdl); 2841 if (status != DDI_SUCCESS) { 2842 TNF_PROBE_0(tavor_mr_mem_bind_dmahdl_fail, 2843 TAVOR_TNF_ERROR, ""); 2844 TAVOR_TNF_EXIT(tavor_mr_mem_bind); 2845 return (status); 2846 } 2847 bind->bi_free_dmahdl = 1; 2848 2849 } else { 2850 bind->bi_dmahdl = dmahdl; 2851 bind->bi_free_dmahdl = 0; 2852 } 2853 2854 /* 2855 * Bind the memory to get the PCI mapped addresses. The decision 2856 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle() 2857 * is determined by the "bi_type" flag. Note: if the bind operation 2858 * fails then we have to free up the DMA handle and return error. 2859 */ 2860 if (bind->bi_type == TAVOR_BINDHDL_VADDR) { 2861 status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL, 2862 (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len, 2863 (DDI_DMA_RDWR | dma_xfer_mode), callback, NULL, 2864 &bind->bi_dmacookie, &bind->bi_cookiecnt); 2865 } else { /* TAVOR_BINDHDL_BUF || TAVOR_BINDHDL_UBUF */ 2866 status = ddi_dma_buf_bind_handle(bind->bi_dmahdl, 2867 bind->bi_buf, (DDI_DMA_RDWR | dma_xfer_mode), callback, 2868 NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt); 2869 } 2870 2871 if (status != DDI_DMA_MAPPED) { 2872 if (bind->bi_free_dmahdl != 0) { 2873 ddi_dma_free_handle(&bind->bi_dmahdl); 2874 } 2875 TNF_PROBE_0(tavor_mr_mem_bind_dmabind_fail, TAVOR_TNF_ERROR, 2876 ""); 2877 TAVOR_TNF_EXIT(tavor_mr_mem_bind); 2878 return (status); 2879 } 2880 2881 TAVOR_TNF_EXIT(tavor_mr_mem_bind); 2882 return (DDI_SUCCESS); 2883 } 2884 2885 2886 /* 2887 * tavor_mr_mem_unbind() 2888 * Context: Can be called from interrupt or base context. 2889 */ 2890 static void 2891 tavor_mr_mem_unbind(tavor_state_t *state, tavor_bind_info_t *bind) 2892 { 2893 int status; 2894 2895 TAVOR_TNF_ENTER(tavor_mr_mem_unbind); 2896 2897 /* 2898 * In case of TAVOR_BINDHDL_UBUF, the memory bi_buf points to 2899 * is actually allocated by ddi_umem_iosetup() internally, then 2900 * it's required to free it here. Reset bi_type to TAVOR_BINDHDL_NONE 2901 * not to free it again later. 2902 */ 2903 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2904 if (bind->bi_type == TAVOR_BINDHDL_UBUF) { 2905 freerbuf(bind->bi_buf); 2906 bind->bi_type = TAVOR_BINDHDL_NONE; 2907 } 2908 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 2909 2910 /* 2911 * Unbind the DMA memory for the region 2912 * 2913 * Note: The only way ddi_dma_unbind_handle() currently 2914 * can return an error is if the handle passed in is invalid. 2915 * Since this should never happen, we choose to return void 2916 * from this function! If this does return an error, however, 2917 * then we print a warning message to the console. 2918 */ 2919 status = ddi_dma_unbind_handle(bind->bi_dmahdl); 2920 if (status != DDI_SUCCESS) { 2921 TAVOR_WARNING(state, "failed to unbind DMA mapping"); 2922 TNF_PROBE_0(tavor_mr_mem_unbind_dmaunbind_fail, 2923 TAVOR_TNF_ERROR, ""); 2924 TAVOR_TNF_EXIT(tavor_mr_mem_unbind); 2925 return; 2926 } 2927 2928 /* Free up the DMA handle */ 2929 if (bind->bi_free_dmahdl != 0) { 2930 ddi_dma_free_handle(&bind->bi_dmahdl); 2931 } 2932 2933 TAVOR_TNF_EXIT(tavor_mr_mem_unbind); 2934 } 2935 2936 2937 /* 2938 * tavor_mr_fast_mtt_write() 2939 * Context: Can be called from interrupt or base context. 2940 */ 2941 static int 2942 tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind, 2943 uint32_t mtt_pgsize_bits) 2944 { 2945 ddi_dma_cookie_t dmacookie; 2946 uint_t cookie_cnt; 2947 uint64_t *mtt_table; 2948 uint64_t mtt_entry; 2949 uint64_t addr, endaddr; 2950 uint64_t pagesize; 2951 int i; 2952 2953 TAVOR_TNF_ENTER(tavor_mr_fast_mtt_write); 2954 2955 /* Calculate page size from the suggested value passed in */ 2956 pagesize = ((uint64_t)1 << mtt_pgsize_bits); 2957 2958 /* 2959 * Walk the "cookie list" and fill in the MTT table entries 2960 */ 2961 i = 0; 2962 mtt_table = (uint64_t *)mtt->tr_addr; 2963 dmacookie = bind->bi_dmacookie; 2964 cookie_cnt = bind->bi_cookiecnt; 2965 while (cookie_cnt-- > 0) { 2966 addr = dmacookie.dmac_laddress; 2967 endaddr = addr + (dmacookie.dmac_size - 1); 2968 addr = addr & ~((uint64_t)pagesize - 1); 2969 while (addr <= endaddr) { 2970 /* 2971 * Fill in the mapped addresses (calculated above) and 2972 * set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry. 2973 */ 2974 mtt_entry = addr | TAVOR_MTT_ENTRY_PRESET; 2975 ddi_put64(mtt->tr_acchdl, &mtt_table[i], mtt_entry); 2976 addr += pagesize; 2977 i++; 2978 2979 if (addr == 0) { 2980 static int do_once = 1; 2981 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", 2982 do_once)) 2983 if (do_once) { 2984 do_once = 0; 2985 cmn_err(CE_NOTE, "probable error in " 2986 "dma_cookie address from caller\n"); 2987 } 2988 break; 2989 } 2990 } 2991 2992 /* 2993 * When we've reached the end of the current DMA cookie, 2994 * jump to the next cookie (if there are more) 2995 */ 2996 if (cookie_cnt != 0) { 2997 ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie); 2998 } 2999 } 3000 3001 TAVOR_TNF_EXIT(tavor_mr_fast_mtt_write); 3002 return (DDI_SUCCESS); 3003 } 3004 3005 /* 3006 * tavor_mr_fast_mtt_write_fmr() 3007 * Context: Can be called from interrupt or base context. 3008 */ 3009 static int 3010 tavor_mr_fast_mtt_write_fmr(tavor_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, 3011 uint32_t mtt_pgsize_bits) 3012 { 3013 uint64_t *mtt_table; 3014 ibt_phys_addr_t *buf; 3015 uint64_t mtt_entry; 3016 uint64_t addr, first_addr, endaddr; 3017 uint64_t pagesize; 3018 int i; 3019 3020 TAVOR_TNF_ENTER(tavor_mr_fast_mtt_write_fmr); 3021 3022 /* Calculate page size from the suggested value passed in */ 3023 pagesize = ((uint64_t)1 << mtt_pgsize_bits); 3024 3025 /* 3026 * Walk the "buf list" and fill in the MTT table entries 3027 */ 3028 mtt_table = (uint64_t *)mtt->tr_addr; 3029 for (i = 0; i < mem_pattr->pmr_num_buf; i++) { 3030 buf = &mem_pattr->pmr_addr_list[i]; 3031 3032 /* 3033 * For first cookie, use the offset field to determine where 3034 * the buffer starts. The end addr is then calculated with the 3035 * offset in mind. 3036 */ 3037 if (i == 0) { 3038 first_addr = addr = buf->p_laddr + 3039 mem_pattr->pmr_offset; 3040 endaddr = addr + (mem_pattr->pmr_buf_sz - 1) - 3041 mem_pattr->pmr_offset; 3042 /* 3043 * For last cookie, determine end addr based on starting 3044 * address and size of the total buffer 3045 */ 3046 } else if (i == mem_pattr->pmr_num_buf - 1) { 3047 addr = buf->p_laddr; 3048 endaddr = addr + (first_addr + mem_pattr->pmr_len & 3049 (mem_pattr->pmr_buf_sz - 1)); 3050 /* 3051 * For the middle cookies case, start and end addr are 3052 * straightforward. Just use the laddr, and the size, as all 3053 * middle cookies are a set size. 3054 */ 3055 } else { 3056 addr = buf->p_laddr; 3057 endaddr = addr + (mem_pattr->pmr_buf_sz - 1); 3058 } 3059 3060 addr = addr & ~((uint64_t)pagesize - 1); 3061 while (addr <= endaddr) { 3062 /* 3063 * Fill in the mapped addresses (calculated above) and 3064 * set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry. 3065 */ 3066 mtt_entry = addr | TAVOR_MTT_ENTRY_PRESET; 3067 ddi_put64(mtt->tr_acchdl, &mtt_table[i], mtt_entry); 3068 addr += pagesize; 3069 } 3070 } 3071 3072 TAVOR_TNF_EXIT(tavor_mr_fast_mtt_write_fmr); 3073 return (DDI_SUCCESS); 3074 } 3075 3076 3077 /* 3078 * tavor_mtt_refcnt_inc() 3079 * Context: Can be called from interrupt or base context. 3080 */ 3081 static int 3082 tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc) 3083 { 3084 tavor_sw_refcnt_t *rc; 3085 uint32_t cnt; 3086 3087 rc = (tavor_sw_refcnt_t *)rsrc->tr_addr; 3088 3089 /* Increment the MTT's reference count */ 3090 mutex_enter(&rc->swrc_lock); 3091 TNF_PROBE_1_DEBUG(tavor_mtt_refcnt_inc, TAVOR_TNF_TRACE, "", 3092 tnf_uint, refcnt, rc->swrc_refcnt); 3093 cnt = rc->swrc_refcnt++; 3094 mutex_exit(&rc->swrc_lock); 3095 3096 return (cnt); 3097 } 3098 3099 3100 /* 3101 * tavor_mtt_refcnt_dec() 3102 * Context: Can be called from interrupt or base context. 3103 */ 3104 static int 3105 tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc) 3106 { 3107 tavor_sw_refcnt_t *rc; 3108 uint32_t cnt; 3109 3110 rc = (tavor_sw_refcnt_t *)rsrc->tr_addr; 3111 3112 /* Decrement the MTT's reference count */ 3113 mutex_enter(&rc->swrc_lock); 3114 cnt = --rc->swrc_refcnt; 3115 TNF_PROBE_1_DEBUG(tavor_mtt_refcnt_dec, TAVOR_TNF_TRACE, "", 3116 tnf_uint, refcnt, rc->swrc_refcnt); 3117 mutex_exit(&rc->swrc_lock); 3118 3119 return (cnt); 3120 } 3121