1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * tavor_mr.c 29 * Tavor Memory Region/Window Routines 30 * 31 * Implements all the routines necessary to provide the requisite memory 32 * registration verbs. These include operations like RegisterMemRegion(), 33 * DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion, 34 * etc., that affect Memory Regions. It also includes the verbs that 35 * affect Memory Windows, including AllocMemWindow(), FreeMemWindow(), 36 * and QueryMemWindow(). 37 */ 38 39 #include <sys/types.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/esunddi.h> 45 46 #include <sys/ib/adapters/tavor/tavor.h> 47 48 49 /* 50 * Used by tavor_mr_keycalc() below to fill in the "unconstrained" portion 51 * of Tavor memory keys (LKeys and RKeys) 52 */ 53 static uint_t tavor_debug_memkey_cnt = 0x00000000; 54 55 static int tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd, 56 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op); 57 static int tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr, 58 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new, 59 tavor_mr_options_t *op); 60 static int tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr, 61 tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr, 62 uint_t sleep, uint_t *dereg_level); 63 static uint64_t tavor_mr_nummtt_needed(tavor_state_t *state, 64 tavor_bind_info_t *bind, uint_t *mtt_pgsize); 65 static int tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind, 66 ddi_dma_handle_t dmahdl, uint_t sleep); 67 static void tavor_mr_mem_unbind(tavor_state_t *state, 68 tavor_bind_info_t *bind); 69 static int tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind, 70 uint32_t mtt_pgsize_bits); 71 static int tavor_mr_fast_mtt_write_fmr(tavor_rsrc_t *mtt, 72 ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits); 73 static int tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc); 74 static int tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc); 75 76 /* 77 * The Tavor umem_lockmemory() callback ops. When userland memory is 78 * registered, these callback ops are specified. The tavor_umap_umemlock_cb() 79 * callback will be called whenever the memory for the corresponding 80 * ddi_umem_cookie_t is being freed. 81 */ 82 static struct umem_callback_ops tavor_umem_cbops = { 83 UMEM_CALLBACK_VERSION, 84 tavor_umap_umemlock_cb, 85 }; 86 87 88 /* 89 * tavor_mr_register() 90 * Context: Can be called from interrupt or base context. 91 */ 92 int 93 tavor_mr_register(tavor_state_t *state, tavor_pdhdl_t pd, 94 ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op) 95 { 96 tavor_bind_info_t bind; 97 int status; 98 99 TAVOR_TNF_ENTER(tavor_mr_register); 100 101 /* 102 * Fill in the "bind" struct. This struct provides the majority 103 * of the information that will be used to distinguish between an 104 * "addr" binding (as is the case here) and a "buf" binding (see 105 * below). The "bind" struct is later passed to tavor_mr_mem_bind() 106 * which does most of the "heavy lifting" for the Tavor memory 107 * registration routines. 108 */ 109 bind.bi_type = TAVOR_BINDHDL_VADDR; 110 bind.bi_addr = mr_attr->mr_vaddr; 111 bind.bi_len = mr_attr->mr_len; 112 bind.bi_as = mr_attr->mr_as; 113 bind.bi_flags = mr_attr->mr_flags; 114 status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op); 115 if (status != DDI_SUCCESS) { 116 TNF_PROBE_0(tavor_mr_register_cmnreg_fail, 117 TAVOR_TNF_ERROR, ""); 118 TAVOR_TNF_EXIT(tavor_mr_register); 119 return (status); 120 } 121 122 TAVOR_TNF_EXIT(tavor_mr_register); 123 return (DDI_SUCCESS); 124 } 125 126 127 /* 128 * tavor_mr_register_buf() 129 * Context: Can be called from interrupt or base context. 130 */ 131 int 132 tavor_mr_register_buf(tavor_state_t *state, tavor_pdhdl_t pd, 133 ibt_smr_attr_t *mr_attr, struct buf *buf, tavor_mrhdl_t *mrhdl, 134 tavor_mr_options_t *op) 135 { 136 tavor_bind_info_t bind; 137 int status; 138 139 TAVOR_TNF_ENTER(tavor_mr_register_buf); 140 141 /* 142 * Fill in the "bind" struct. This struct provides the majority 143 * of the information that will be used to distinguish between an 144 * "addr" binding (see above) and a "buf" binding (as is the case 145 * here). The "bind" struct is later passed to tavor_mr_mem_bind() 146 * which does most of the "heavy lifting" for the Tavor memory 147 * registration routines. Note: We have chosen to provide 148 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 149 * not set). It is not critical what value we choose here as it need 150 * only be unique for the given RKey (which will happen by default), 151 * so the choice here is somewhat arbitrary. 152 */ 153 bind.bi_type = TAVOR_BINDHDL_BUF; 154 bind.bi_buf = buf; 155 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 156 bind.bi_addr = mr_attr->mr_vaddr; 157 } else { 158 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 159 } 160 bind.bi_as = NULL; 161 bind.bi_len = (uint64_t)buf->b_bcount; 162 bind.bi_flags = mr_attr->mr_flags; 163 status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op); 164 if (status != DDI_SUCCESS) { 165 TNF_PROBE_0(tavor_mr_register_buf_cmnreg_fail, 166 TAVOR_TNF_ERROR, ""); 167 TAVOR_TNF_EXIT(tavor_mr_register_buf); 168 return (status); 169 } 170 171 TAVOR_TNF_EXIT(tavor_mr_register_buf); 172 return (DDI_SUCCESS); 173 } 174 175 176 /* 177 * tavor_mr_register_shared() 178 * Context: Can be called from interrupt or base context. 179 */ 180 int 181 tavor_mr_register_shared(tavor_state_t *state, tavor_mrhdl_t mrhdl, 182 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new) 183 { 184 tavor_rsrc_pool_info_t *rsrc_pool; 185 tavor_rsrc_t *mpt, *mtt, *rsrc; 186 tavor_umap_db_entry_t *umapdb; 187 tavor_hw_mpt_t mpt_entry; 188 tavor_mrhdl_t mr; 189 tavor_bind_info_t *bind; 190 ddi_umem_cookie_t umem_cookie; 191 size_t umem_len; 192 caddr_t umem_addr; 193 uint64_t mtt_addr, mtt_ddrbaseaddr, pgsize_msk; 194 uint_t sleep, mr_is_umem; 195 int status, umem_flags; 196 char *errormsg; 197 198 TAVOR_TNF_ENTER(tavor_mr_register_shared); 199 200 /* 201 * Check the sleep flag. Ensure that it is consistent with the 202 * current thread context (i.e. if we are currently in the interrupt 203 * context, then we shouldn't be attempting to sleep). 204 */ 205 sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP : 206 TAVOR_SLEEP; 207 if ((sleep == TAVOR_SLEEP) && 208 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 209 /* Set "status" and "errormsg" and goto failure */ 210 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 211 goto mrshared_fail; 212 } 213 214 /* Increment the reference count on the protection domain (PD) */ 215 tavor_pd_refcnt_inc(pd); 216 217 /* 218 * Allocate an MPT entry. This will be filled in with all the 219 * necessary parameters to define the shared memory region. 220 * Specifically, it will be made to reference the currently existing 221 * MTT entries and ownership of the MPT will be passed to the hardware 222 * in the last step below. If we fail here, we must undo the 223 * protection domain reference count. 224 */ 225 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 226 if (status != DDI_SUCCESS) { 227 /* Set "status" and "errormsg" and goto failure */ 228 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 229 goto mrshared_fail1; 230 } 231 232 /* 233 * Allocate the software structure for tracking the shared memory 234 * region (i.e. the Tavor Memory Region handle). If we fail here, we 235 * must undo the protection domain reference count and the previous 236 * resource allocation. 237 */ 238 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 239 if (status != DDI_SUCCESS) { 240 /* Set "status" and "errormsg" and goto failure */ 241 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 242 goto mrshared_fail2; 243 } 244 mr = (tavor_mrhdl_t)rsrc->tr_addr; 245 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 246 247 /* 248 * Setup and validate the memory region access flags. This means 249 * translating the IBTF's enable flags into the access flags that 250 * will be used in later operations. 251 */ 252 mr->mr_accflag = 0; 253 if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND) 254 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 255 if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 256 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 257 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ) 258 mr->mr_accflag |= IBT_MR_REMOTE_READ; 259 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 260 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 261 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 262 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 263 264 /* 265 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 266 * from a certain number of "constrained" bits (the least significant 267 * bits) and some number of "unconstrained" bits. The constrained 268 * bits must be set to the index of the entry in the MPT table, but 269 * the unconstrained bits can be set to any value we wish. Note: 270 * if no remote access is required, then the RKey value is not filled 271 * in. Otherwise both Rkey and LKey are given the same value. 272 */ 273 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 274 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 275 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 276 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 277 mr->mr_rkey = mr->mr_lkey; 278 } 279 280 /* Grab the MR lock for the current memory region */ 281 mutex_enter(&mrhdl->mr_lock); 282 283 /* 284 * Check here to see if the memory region has already been partially 285 * deregistered as a result of a tavor_umap_umemlock_cb() callback. 286 * If so, this is an error, return failure. 287 */ 288 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 289 mutex_exit(&mrhdl->mr_lock); 290 /* Set "status" and "errormsg" and goto failure */ 291 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl"); 292 goto mrshared_fail3; 293 } 294 295 /* 296 * Determine if the original memory was from userland and, if so, pin 297 * the pages (again) with umem_lockmemory(). This will guarantee a 298 * separate callback for each of this shared region's MR handles. 299 * If this is userland memory, then allocate an entry in the 300 * "userland resources database". This will later be added to 301 * the database (after all further memory registration operations are 302 * successful). If we fail here, we must undo all the above setup. 303 */ 304 mr_is_umem = mrhdl->mr_is_umem; 305 if (mr_is_umem) { 306 umem_len = ptob(btopr(mrhdl->mr_bindinfo.bi_len + 307 ((uintptr_t)mrhdl->mr_bindinfo.bi_addr & PAGEOFFSET))); 308 umem_addr = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr & 309 ~PAGEOFFSET); 310 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 311 DDI_UMEMLOCK_LONGTERM); 312 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 313 &umem_cookie, &tavor_umem_cbops, NULL); 314 if (status != 0) { 315 mutex_exit(&mrhdl->mr_lock); 316 /* Set "status" and "errormsg" and goto failure */ 317 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin"); 318 goto mrshared_fail3; 319 } 320 321 umapdb = tavor_umap_db_alloc(state->ts_instance, 322 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 323 (uint64_t)(uintptr_t)rsrc); 324 if (umapdb == NULL) { 325 mutex_exit(&mrhdl->mr_lock); 326 /* Set "status" and "errormsg" and goto failure */ 327 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add"); 328 goto mrshared_fail4; 329 } 330 } 331 332 /* 333 * Copy the MTT resource pointer (and additional parameters) from 334 * the original Tavor Memory Region handle. Note: this is normally 335 * where the tavor_mr_mem_bind() routine would be called, but because 336 * we already have bound and filled-in MTT entries it is simply a 337 * matter here of managing the MTT reference count and grabbing the 338 * address of the MTT table entries (for filling in the shared region's 339 * MPT entry). 340 */ 341 mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp; 342 mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz; 343 mr->mr_bindinfo = mrhdl->mr_bindinfo; 344 mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp; 345 mutex_exit(&mrhdl->mr_lock); 346 bind = &mr->mr_bindinfo; 347 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 348 mtt = mr->mr_mttrsrcp; 349 350 /* 351 * Increment the MTT reference count (to reflect the fact that 352 * the MTT is now shared) 353 */ 354 (void) tavor_mtt_refcnt_inc(mr->mr_mttrefcntp); 355 356 /* 357 * Update the new "bind" virtual address. Do some extra work here 358 * to ensure proper alignment. That is, make sure that the page 359 * offset for the beginning of the old range is the same as the 360 * offset for this new mapping 361 */ 362 pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1); 363 bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) | 364 (mr->mr_bindinfo.bi_addr & pgsize_msk)); 365 366 /* 367 * Get the base address for the MTT table. This will be necessary 368 * in the next step when we are setting up the MPT entry. 369 */ 370 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 371 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 372 373 /* 374 * Fill in the MPT entry. This is the final step before passing 375 * ownership of the MPT entry to the Tavor hardware. We use all of 376 * the information collected/calculated above to fill in the 377 * requisite portions of the MPT. 378 */ 379 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 380 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 381 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 382 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 383 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 384 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 385 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 386 mpt_entry.lr = 1; 387 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 388 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 389 mpt_entry.mem_key = mr->mr_lkey; 390 mpt_entry.pd = pd->pd_pdnum; 391 mpt_entry.start_addr = bind->bi_addr; 392 mpt_entry.reg_win_len = bind->bi_len; 393 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 394 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 395 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 396 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 397 398 /* 399 * Write the MPT entry to hardware. Lastly, we pass ownership of 400 * the entry to the hardware. Note: in general, this operation 401 * shouldn't fail. But if it does, we have to undo everything we've 402 * done above before returning error. 403 */ 404 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 405 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 406 if (status != TAVOR_CMD_SUCCESS) { 407 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 408 status); 409 TNF_PROBE_1(tavor_mr_register_shared_sw2hw_mpt_cmd_fail, 410 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 411 /* Set "status" and "errormsg" and goto failure */ 412 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 413 "tavor SW2HW_MPT command"); 414 goto mrshared_fail5; 415 } 416 417 /* 418 * Fill in the rest of the Tavor Memory Region handle. Having 419 * successfully transferred ownership of the MPT, we can update the 420 * following fields for use in further operations on the MR. 421 */ 422 mr->mr_mptrsrcp = mpt; 423 mr->mr_mttrsrcp = mtt; 424 mr->mr_pdhdl = pd; 425 mr->mr_rsrcp = rsrc; 426 mr->mr_is_umem = mr_is_umem; 427 mr->mr_is_fmr = 0; 428 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 429 mr->mr_umem_cbfunc = NULL; 430 mr->mr_umem_cbarg1 = NULL; 431 mr->mr_umem_cbarg2 = NULL; 432 433 /* 434 * If this is userland memory, then we need to insert the previously 435 * allocated entry into the "userland resources database". This will 436 * allow for later coordination between the tavor_umap_umemlock_cb() 437 * callback and tavor_mr_deregister(). 438 */ 439 if (mr_is_umem) { 440 tavor_umap_db_add(umapdb); 441 } 442 443 *mrhdl_new = mr; 444 445 TAVOR_TNF_EXIT(tavor_mr_register_shared); 446 return (DDI_SUCCESS); 447 448 /* 449 * The following is cleanup for all possible failure cases in this routine 450 */ 451 mrshared_fail5: 452 (void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp); 453 if (mr_is_umem) { 454 tavor_umap_db_free(umapdb); 455 } 456 mrshared_fail4: 457 if (mr_is_umem) { 458 ddi_umem_unlock(umem_cookie); 459 } 460 mrshared_fail3: 461 tavor_rsrc_free(state, &rsrc); 462 mrshared_fail2: 463 tavor_rsrc_free(state, &mpt); 464 mrshared_fail1: 465 tavor_pd_refcnt_dec(pd); 466 mrshared_fail: 467 TNF_PROBE_1(tavor_mr_register_shared_fail, TAVOR_TNF_ERROR, "", 468 tnf_string, msg, errormsg); 469 TAVOR_TNF_EXIT(tavor_mr_register_shared); 470 return (status); 471 } 472 473 /* 474 * tavor_mr_alloc_fmr() 475 * Context: Can be called from interrupt or base context. 476 */ 477 int 478 tavor_mr_alloc_fmr(tavor_state_t *state, tavor_pdhdl_t pd, 479 tavor_fmrhdl_t fmr_pool, tavor_mrhdl_t *mrhdl) 480 { 481 tavor_rsrc_pool_info_t *rsrc_pool; 482 tavor_rsrc_t *mpt, *mtt, *rsrc; 483 tavor_hw_mpt_t mpt_entry; 484 tavor_mrhdl_t mr; 485 tavor_bind_info_t bind; 486 uint64_t mtt_addr, mtt_ddrbaseaddr; 487 uint64_t nummtt; 488 uint_t sleep, mtt_pgsize_bits; 489 int status; 490 char *errormsg; 491 492 TAVOR_TNF_ENTER(tavor_mr_alloc_fmr); 493 494 /* 495 * Check the sleep flag. Ensure that it is consistent with the 496 * current thread context (i.e. if we are currently in the interrupt 497 * context, then we shouldn't be attempting to sleep). 498 */ 499 sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? TAVOR_SLEEP : 500 TAVOR_NOSLEEP; 501 if ((sleep == TAVOR_SLEEP) && 502 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 503 TNF_PROBE_0(tavor_mr_alloc_fmr, TAVOR_TNF_ERROR, ""); 504 TAVOR_TNF_EXIT(tavor_mr_alloc_fmr); 505 return (IBT_INVALID_PARAM); 506 } 507 508 /* Increment the reference count on the protection domain (PD) */ 509 tavor_pd_refcnt_inc(pd); 510 511 /* 512 * Allocate an MPT entry. This will be filled in with all the 513 * necessary parameters to define the FMR. Specifically, it will be 514 * made to reference the currently existing MTT entries and ownership 515 * of the MPT will be passed to the hardware in the last step below. 516 * If we fail here, we must undo the protection domain reference count. 517 */ 518 519 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 520 if (status != DDI_SUCCESS) { 521 /* Set "status" and "errormsg" and goto failure */ 522 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 523 goto fmralloc_fail1; 524 } 525 526 /* 527 * Allocate the software structure for tracking the fmr memory 528 * region (i.e. the Tavor Memory Region handle). If we fail here, we 529 * must undo the protection domain reference count and the previous 530 * resource allocation. 531 */ 532 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 533 if (status != DDI_SUCCESS) { 534 /* Set "status" and "errormsg" and goto failure */ 535 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 536 goto fmralloc_fail2; 537 } 538 mr = (tavor_mrhdl_t)rsrc->tr_addr; 539 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 540 541 /* 542 * Setup and validate the memory region access flags. This means 543 * translating the IBTF's enable flags into the access flags that 544 * will be used in later operations. 545 */ 546 mr->mr_accflag = 0; 547 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 548 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 549 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ) 550 mr->mr_accflag |= IBT_MR_REMOTE_READ; 551 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 552 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 553 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 554 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 555 556 /* 557 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 558 * from a certain number of "constrained" bits (the least significant 559 * bits) and some number of "unconstrained" bits. The constrained 560 * bits must be set to the index of the entry in the MPT table, but 561 * the unconstrained bits can be set to any value we wish. Note: 562 * if no remote access is required, then the RKey value is not filled 563 * in. Otherwise both Rkey and LKey are given the same value. 564 */ 565 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 566 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 567 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 568 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 569 mr->mr_rkey = mr->mr_lkey; 570 } 571 572 /* 573 * Determine number of pages spanned. This routine uses the 574 * information in the "bind" struct to determine the required 575 * number of MTT entries needed (and returns the suggested page size - 576 * as a "power-of-2" - for each MTT entry). 577 */ 578 /* Assume address will be page aligned later */ 579 bind.bi_addr = 0; 580 /* Calculate size based on given max pages */ 581 bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT; 582 nummtt = tavor_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits); 583 584 /* 585 * Allocate the MTT entries. Use the calculations performed above to 586 * allocate the required number of MTT entries. Note: MTT entries are 587 * allocated in "MTT segments" which consist of complete cachelines 588 * (i.e. 8 entries, 16 entries, etc.) So the TAVOR_NUMMTT_TO_MTTSEG() 589 * macro is used to do the proper conversion. If we fail here, we 590 * must not only undo all the previous resource allocation (and PD 591 * reference count), but we must also unbind the memory. 592 */ 593 status = tavor_rsrc_alloc(state, TAVOR_MTT, 594 TAVOR_NUMMTT_TO_MTTSEG(nummtt), sleep, &mtt); 595 if (status != DDI_SUCCESS) { 596 /* Set "status" and "errormsg" and goto failure */ 597 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT"); 598 goto fmralloc_fail3; 599 } 600 mr->mr_logmttpgsz = mtt_pgsize_bits; 601 602 /* 603 * Get the base address for the MTT table. This will be necessary 604 * in the next step when we are setting up the MPT entry. 605 */ 606 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 607 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 608 609 /* 610 * Fill in the MPT entry. This is the final step before passing 611 * ownership of the MPT entry to the Tavor hardware. We use all of 612 * the information collected/calculated above to fill in the 613 * requisite portions of the MPT. 614 */ 615 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 616 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 617 mpt_entry.en_bind = 0; 618 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 619 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 620 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 621 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 622 mpt_entry.lr = 1; 623 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 624 mpt_entry.pd = pd->pd_pdnum; 625 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 626 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 627 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 628 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 629 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 630 mpt_entry.mem_key = mr->mr_lkey; 631 632 /* 633 * FMR sets these to 0 for now. Later during actual fmr registration 634 * these values are filled in. 635 */ 636 mpt_entry.start_addr = 0; 637 mpt_entry.reg_win_len = 0; 638 639 /* 640 * Write the MPT entry to hardware. Lastly, we pass ownership of 641 * the entry to the hardware. Note: in general, this operation 642 * shouldn't fail. But if it does, we have to undo everything we've 643 * done above before returning error. 644 */ 645 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 646 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 647 if (status != TAVOR_CMD_SUCCESS) { 648 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 649 status); 650 TNF_PROBE_1(tavor_mr_register_shared_sw2hw_mpt_cmd_fail, 651 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 652 /* Set "status" and "errormsg" and goto failure */ 653 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 654 "tavor SW2HW_MPT command"); 655 goto fmralloc_fail4; 656 } 657 658 /* 659 * Fill in the rest of the Tavor Memory Region handle. Having 660 * successfully transferred ownership of the MPT, we can update the 661 * following fields for use in further operations on the MR. Also, set 662 * that this is an FMR region. 663 */ 664 mr->mr_mptrsrcp = mpt; 665 mr->mr_mttrsrcp = mtt; 666 mr->mr_pdhdl = pd; 667 mr->mr_rsrcp = rsrc; 668 mr->mr_is_fmr = 1; 669 (void) memcpy(&mr->mr_bindinfo, &bind, sizeof (tavor_bind_info_t)); 670 671 *mrhdl = mr; 672 673 TAVOR_TNF_EXIT(tavor_mr_alloc_fmr); 674 return (DDI_SUCCESS); 675 676 /* 677 * The following is cleanup for all possible failure cases in this routine 678 */ 679 fmralloc_fail4: 680 tavor_rsrc_free(state, &mtt); 681 fmralloc_fail3: 682 tavor_rsrc_free(state, &rsrc); 683 fmralloc_fail2: 684 tavor_rsrc_free(state, &mpt); 685 fmralloc_fail1: 686 tavor_pd_refcnt_dec(pd); 687 fmralloc_fail: 688 TNF_PROBE_1(tavor_mr_alloc_fmr, TAVOR_TNF_ERROR, "", 689 tnf_string, msg, errormsg); 690 TAVOR_TNF_EXIT(tavor_mr_alloc_fmr); 691 return (status); 692 } 693 694 /* 695 * tavor_mr_register_physical_fmr() 696 * Context: Can be called from interrupt or base context. 697 */ 698 int 699 tavor_mr_register_physical_fmr(tavor_state_t *state, 700 ibt_pmr_attr_t *mem_pattr_p, tavor_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p) 701 { 702 tavor_rsrc_t *mpt; 703 uint64_t *mpt_table; 704 int status; 705 char *errormsg; 706 707 TAVOR_TNF_ENTER(tavor_mr_register_physical_fmr); 708 709 mutex_enter(&mr->mr_lock); 710 mpt = mr->mr_mptrsrcp; 711 mpt_table = (uint64_t *)mpt->tr_addr; 712 713 /* Write MPT status to SW bit */ 714 ddi_put8(mpt->tr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 715 716 /* 717 * Write the mapped addresses into the MTT entries. FMR needs to do 718 * this a little differently, so we call the fmr specific fast mtt 719 * write here. 720 */ 721 status = tavor_mr_fast_mtt_write_fmr(mr->mr_mttrsrcp, mem_pattr_p, 722 mr->mr_logmttpgsz); 723 if (status != DDI_SUCCESS) { 724 mutex_exit(&mr->mr_lock); 725 /* Set "status" and "errormsg" and goto failure */ 726 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "failed write mtt"); 727 goto fmr_reg_fail1; 728 } 729 730 /* 731 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 732 * from a certain number of "constrained" bits (the least significant 733 * bits) and some number of "unconstrained" bits. The constrained 734 * bits must be set to the index of the entry in the MPT table, but 735 * the unconstrained bits can be set to any value we wish. Note: 736 * if no remote access is required, then the RKey value is not filled 737 * in. Otherwise both Rkey and LKey are given the same value. 738 */ 739 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 740 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 741 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 742 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 743 mr->mr_rkey = mr->mr_lkey; 744 } 745 746 /* write mem key value */ 747 ddi_put32(mpt->tr_acchdl, (uint32_t *)&mpt_table[1], mr->mr_lkey); 748 749 /* write length value */ 750 ddi_put64(mpt->tr_acchdl, &mpt_table[3], mem_pattr_p->pmr_len); 751 752 /* write start addr value */ 753 ddi_put64(mpt->tr_acchdl, &mpt_table[2], mem_pattr_p->pmr_iova); 754 755 /* write lkey value */ 756 ddi_put32(mpt->tr_acchdl, (uint32_t *)&mpt_table[4], mr->mr_lkey); 757 758 /* Write MPT status to HW bit */ 759 ddi_put8(mpt->tr_acchdl, (uint8_t *)&mpt_table[0], 0x0); 760 761 /* Fill in return parameters */ 762 mem_desc_p->pmd_lkey = mr->mr_lkey; 763 mem_desc_p->pmd_rkey = mr->mr_rkey; 764 mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova; 765 mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len; 766 767 /* Fill in MR bindinfo struct for later sync or query operations */ 768 mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova; 769 mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT; 770 771 mutex_exit(&mr->mr_lock); 772 773 TAVOR_TNF_EXIT(tavor_mr_register_physical_fmr); 774 return (DDI_SUCCESS); 775 776 fmr_reg_fail1: 777 /* 778 * Note, we fail here, and purposely leave the memory ownership in 779 * software. The memory tables may be corrupt, so we leave the region 780 * unregistered. 781 */ 782 TNF_PROBE_1(tavor_mr_register_physical_fmr_fail, TAVOR_TNF_ERROR, "", 783 tnf_string, msg, errormsg); 784 TAVOR_TNF_EXIT(tavor_mr_register_physical_fmr); 785 return (DDI_FAILURE); 786 } 787 788 789 /* 790 * tavor_mr_deregister() 791 * Context: Can be called from interrupt or base context. 792 */ 793 /* ARGSUSED */ 794 int 795 tavor_mr_deregister(tavor_state_t *state, tavor_mrhdl_t *mrhdl, uint_t level, 796 uint_t sleep) 797 { 798 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 799 tavor_umap_db_entry_t *umapdb; 800 tavor_pdhdl_t pd; 801 tavor_mrhdl_t mr; 802 tavor_bind_info_t *bind; 803 uint64_t value; 804 int status, shared_mtt; 805 char *errormsg; 806 807 TAVOR_TNF_ENTER(tavor_mr_deregister); 808 809 /* 810 * Check the sleep flag. Ensure that it is consistent with the 811 * current thread context (i.e. if we are currently in the interrupt 812 * context, then we shouldn't be attempting to sleep). 813 */ 814 if ((sleep == TAVOR_SLEEP) && 815 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 816 /* Set "status" and "errormsg" and goto failure */ 817 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags"); 818 TNF_PROBE_1(tavor_mr_deregister_fail, TAVOR_TNF_ERROR, "", 819 tnf_string, msg, errormsg); 820 TAVOR_TNF_EXIT(tavor_mr_deregister); 821 return (status); 822 } 823 824 /* 825 * Pull all the necessary information from the Tavor Memory Region 826 * handle. This is necessary here because the resource for the 827 * MR handle is going to be freed up as part of the this 828 * deregistration 829 */ 830 mr = *mrhdl; 831 mutex_enter(&mr->mr_lock); 832 mpt = mr->mr_mptrsrcp; 833 mtt = mr->mr_mttrsrcp; 834 mtt_refcnt = mr->mr_mttrefcntp; 835 rsrc = mr->mr_rsrcp; 836 pd = mr->mr_pdhdl; 837 bind = &mr->mr_bindinfo; 838 839 /* 840 * Check here if the memory region is really an FMR. If so, this is a 841 * bad thing and we shouldn't be here. Return failure. 842 */ 843 if (mr->mr_is_fmr) { 844 mutex_exit(&mr->mr_lock); 845 TNF_PROBE_0(tavor_mr_deregister_is_fmr, TAVOR_TNF_ERROR, ""); 846 TAVOR_TNF_EXIT(tavor_mr_deregister); 847 return (IBT_INVALID_PARAM); 848 } 849 850 /* 851 * Check here to see if the memory region has already been partially 852 * deregistered as a result of the tavor_umap_umemlock_cb() callback. 853 * If so, then jump to the end and free the remaining resources. 854 */ 855 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 856 goto mrdereg_finish_cleanup; 857 } 858 859 /* 860 * We must drop the "mr_lock" here to ensure that both SLEEP and 861 * NOSLEEP calls into the firmware work as expected. Also, if two 862 * threads are attemping to access this MR (via de-register, 863 * re-register, or otherwise), then we allow the firmware to enforce 864 * the checking, that only one deregister is valid. 865 */ 866 mutex_exit(&mr->mr_lock); 867 868 /* 869 * Reclaim MPT entry from hardware (if necessary). Since the 870 * tavor_mr_deregister() routine is used in the memory region 871 * reregistration process as well, it is possible that we will 872 * not always wish to reclaim ownership of the MPT. Check the 873 * "level" arg and, if necessary, attempt to reclaim it. If 874 * the ownership transfer fails for any reason, we check to see 875 * what command status was returned from the hardware. The only 876 * "expected" error status is the one that indicates an attempt to 877 * deregister a memory region that has memory windows bound to it 878 */ 879 if (level >= TAVOR_MR_DEREG_ALL) { 880 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, 881 NULL, 0, mpt->tr_indx, sleep); 882 if (status != TAVOR_CMD_SUCCESS) { 883 if (status == TAVOR_CMD_REG_BOUND) { 884 TAVOR_TNF_EXIT(tavor_mr_deregister); 885 return (IBT_MR_IN_USE); 886 } else { 887 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command " 888 "failed: %08x\n", status); 889 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail, 890 TAVOR_TNF_ERROR, "", tnf_uint, status, 891 status); 892 TAVOR_TNF_EXIT(tavor_mr_deregister); 893 return (IBT_INVALID_PARAM); 894 } 895 } 896 } 897 898 /* 899 * Re-grab the mr_lock here. Since further access to the protected 900 * 'mr' structure is needed, and we would have returned previously for 901 * the multiple deregistration case, we can safely grab the lock here. 902 */ 903 mutex_enter(&mr->mr_lock); 904 905 /* 906 * If the memory had come from userland, then we do a lookup in the 907 * "userland resources database". On success, we free the entry, call 908 * ddi_umem_unlock(), and continue the cleanup. On failure (which is 909 * an indication that the umem_lockmemory() callback has called 910 * tavor_mr_deregister()), we call ddi_umem_unlock() and invalidate 911 * the "mr_umemcookie" field in the MR handle (this will be used 912 * later to detect that only partial cleaup still remains to be done 913 * on the MR handle). 914 */ 915 if (mr->mr_is_umem) { 916 status = tavor_umap_db_find(state->ts_instance, 917 (uint64_t)(uintptr_t)mr->mr_umemcookie, 918 MLNX_UMAP_MRMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE, 919 &umapdb); 920 if (status == DDI_SUCCESS) { 921 tavor_umap_db_free(umapdb); 922 ddi_umem_unlock(mr->mr_umemcookie); 923 } else { 924 ddi_umem_unlock(mr->mr_umemcookie); 925 mr->mr_umemcookie = NULL; 926 } 927 } 928 929 /* 930 * Decrement the MTT reference count. Since the MTT resource 931 * may be shared between multiple memory regions (as a result 932 * of a "RegisterSharedMR" verb) it is important that we not 933 * free up or unbind resources prematurely. If it's not shared (as 934 * indicated by the return status), then free the resource. 935 */ 936 shared_mtt = tavor_mtt_refcnt_dec(mtt_refcnt); 937 if (!shared_mtt) { 938 tavor_rsrc_free(state, &mtt_refcnt); 939 } 940 941 /* 942 * Free up the MTT entries and unbind the memory. Here, as above, we 943 * attempt to free these resources only if it is appropriate to do so. 944 */ 945 if (!shared_mtt) { 946 if (level >= TAVOR_MR_DEREG_NO_HW2SW_MPT) { 947 tavor_mr_mem_unbind(state, bind); 948 } 949 tavor_rsrc_free(state, &mtt); 950 } 951 952 /* 953 * If the MR handle has been invalidated, then drop the 954 * lock and return success. Note: This only happens because 955 * the umem_lockmemory() callback has been triggered. The 956 * cleanup here is partial, and further cleanup (in a 957 * subsequent tavor_mr_deregister() call) will be necessary. 958 */ 959 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 960 mutex_exit(&mr->mr_lock); 961 TAVOR_TNF_EXIT(tavor_mr_deregister); 962 return (DDI_SUCCESS); 963 } 964 965 mrdereg_finish_cleanup: 966 mutex_exit(&mr->mr_lock); 967 968 /* Free the Tavor Memory Region handle */ 969 tavor_rsrc_free(state, &rsrc); 970 971 /* Free up the MPT entry resource */ 972 tavor_rsrc_free(state, &mpt); 973 974 /* Decrement the reference count on the protection domain (PD) */ 975 tavor_pd_refcnt_dec(pd); 976 977 /* Set the mrhdl pointer to NULL and return success */ 978 *mrhdl = NULL; 979 980 TAVOR_TNF_EXIT(tavor_mr_deregister); 981 return (DDI_SUCCESS); 982 } 983 984 /* 985 * tavor_mr_dealloc_fmr() 986 * Context: Can be called from interrupt or base context. 987 */ 988 /* ARGSUSED */ 989 int 990 tavor_mr_dealloc_fmr(tavor_state_t *state, tavor_mrhdl_t *mrhdl) 991 { 992 tavor_rsrc_t *mpt, *mtt, *rsrc; 993 tavor_pdhdl_t pd; 994 tavor_mrhdl_t mr; 995 996 TAVOR_TNF_ENTER(tavor_mr_dealloc_fmr); 997 998 /* 999 * Pull all the necessary information from the Tavor Memory Region 1000 * handle. This is necessary here because the resource for the 1001 * MR handle is going to be freed up as part of the this 1002 * deregistration 1003 */ 1004 mr = *mrhdl; 1005 mutex_enter(&mr->mr_lock); 1006 mpt = mr->mr_mptrsrcp; 1007 mtt = mr->mr_mttrsrcp; 1008 rsrc = mr->mr_rsrcp; 1009 pd = mr->mr_pdhdl; 1010 mutex_exit(&mr->mr_lock); 1011 1012 /* Free the MTT entries */ 1013 tavor_rsrc_free(state, &mtt); 1014 1015 /* Free the Tavor Memory Region handle */ 1016 tavor_rsrc_free(state, &rsrc); 1017 1018 /* Free up the MPT entry resource */ 1019 tavor_rsrc_free(state, &mpt); 1020 1021 /* Decrement the reference count on the protection domain (PD) */ 1022 tavor_pd_refcnt_dec(pd); 1023 1024 /* Set the mrhdl pointer to NULL and return success */ 1025 *mrhdl = NULL; 1026 1027 TAVOR_TNF_EXIT(tavor_mr_dealloc_fmr); 1028 return (DDI_SUCCESS); 1029 } 1030 1031 /* 1032 * tavor_mr_invalidate_fmr() 1033 * Context: Can be called from interrupt or base context. 1034 */ 1035 /* ARGSUSED */ 1036 int 1037 tavor_mr_invalidate_fmr(tavor_state_t *state, tavor_mrhdl_t mr) 1038 { 1039 tavor_rsrc_t *mpt; 1040 uint64_t *mpt_table; 1041 1042 TAVOR_TNF_ENTER(tavor_mr_invalidate_fmr); 1043 1044 mutex_enter(&mr->mr_lock); 1045 mpt = mr->mr_mptrsrcp; 1046 mpt_table = (uint64_t *)mpt->tr_addr; 1047 1048 /* Write MPT status to SW bit */ 1049 ddi_put8(mpt->tr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 1050 1051 /* invalidate mem key value */ 1052 ddi_put32(mpt->tr_acchdl, (uint32_t *)&mpt_table[1], 0); 1053 1054 /* invalidate lkey value */ 1055 ddi_put32(mpt->tr_acchdl, (uint32_t *)&mpt_table[4], 0); 1056 1057 /* Write MPT status to HW bit */ 1058 ddi_put8(mpt->tr_acchdl, (uint8_t *)&mpt_table[0], 0x0); 1059 1060 mutex_exit(&mr->mr_lock); 1061 1062 TAVOR_TNF_EXIT(tavor_mr_invalidate_fmr); 1063 return (DDI_SUCCESS); 1064 } 1065 1066 /* 1067 * tavor_mr_deregister_fmr() 1068 * Context: Can be called from interrupt or base context. 1069 */ 1070 /* ARGSUSED */ 1071 int 1072 tavor_mr_deregister_fmr(tavor_state_t *state, tavor_mrhdl_t mr) 1073 { 1074 tavor_rsrc_t *mpt; 1075 uint64_t *mpt_table; 1076 1077 TAVOR_TNF_ENTER(tavor_mr_deregister_fmr); 1078 1079 mutex_enter(&mr->mr_lock); 1080 mpt = mr->mr_mptrsrcp; 1081 mpt_table = (uint64_t *)mpt->tr_addr; 1082 1083 /* Write MPT status to SW bit */ 1084 ddi_put8(mpt->tr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 1085 mutex_exit(&mr->mr_lock); 1086 1087 TAVOR_TNF_EXIT(tavor_mr_deregister_fmr); 1088 return (DDI_SUCCESS); 1089 } 1090 1091 1092 /* 1093 * tavor_mr_query() 1094 * Context: Can be called from interrupt or base context. 1095 */ 1096 /* ARGSUSED */ 1097 int 1098 tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mr, 1099 ibt_mr_query_attr_t *attr) 1100 { 1101 TAVOR_TNF_ENTER(tavor_mr_query); 1102 1103 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr)) 1104 1105 mutex_enter(&mr->mr_lock); 1106 1107 /* 1108 * Check here to see if the memory region has already been partially 1109 * deregistered as a result of a tavor_umap_umemlock_cb() callback. 1110 * If so, this is an error, return failure. 1111 */ 1112 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 1113 mutex_exit(&mr->mr_lock); 1114 TNF_PROBE_0(tavor_mr_query_inv_mrhdl_fail, TAVOR_TNF_ERROR, ""); 1115 TAVOR_TNF_EXIT(tavor_mr_query); 1116 return (IBT_MR_HDL_INVALID); 1117 } 1118 1119 /* Fill in the queried attributes */ 1120 attr->mr_attr_flags = mr->mr_accflag; 1121 attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl; 1122 1123 /* Fill in the "local" attributes */ 1124 attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey; 1125 attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 1126 attr->mr_lbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 1127 1128 /* 1129 * Fill in the "remote" attributes (if necessary). Note: the 1130 * remote attributes are only valid if the memory region has one 1131 * or more of the remote access flags set. 1132 */ 1133 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1134 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1135 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1136 attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey; 1137 attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 1138 attr->mr_rbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 1139 } 1140 1141 /* 1142 * If region is mapped for streaming (i.e. noncoherent), then set sync 1143 * is required 1144 */ 1145 attr->mr_sync_required = (mr->mr_bindinfo.bi_flags & 1146 IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE; 1147 1148 mutex_exit(&mr->mr_lock); 1149 TAVOR_TNF_EXIT(tavor_mr_query); 1150 return (DDI_SUCCESS); 1151 } 1152 1153 1154 /* 1155 * tavor_mr_reregister() 1156 * Context: Can be called from interrupt or base context. 1157 */ 1158 int 1159 tavor_mr_reregister(tavor_state_t *state, tavor_mrhdl_t mr, 1160 tavor_pdhdl_t pd, ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new, 1161 tavor_mr_options_t *op) 1162 { 1163 tavor_bind_info_t bind; 1164 int status; 1165 1166 TAVOR_TNF_ENTER(tavor_mr_reregister); 1167 1168 /* 1169 * Fill in the "bind" struct. This struct provides the majority 1170 * of the information that will be used to distinguish between an 1171 * "addr" binding (as is the case here) and a "buf" binding (see 1172 * below). The "bind" struct is later passed to tavor_mr_mem_bind() 1173 * which does most of the "heavy lifting" for the Tavor memory 1174 * registration (and reregistration) routines. 1175 */ 1176 bind.bi_type = TAVOR_BINDHDL_VADDR; 1177 bind.bi_addr = mr_attr->mr_vaddr; 1178 bind.bi_len = mr_attr->mr_len; 1179 bind.bi_as = mr_attr->mr_as; 1180 bind.bi_flags = mr_attr->mr_flags; 1181 status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 1182 if (status != DDI_SUCCESS) { 1183 TNF_PROBE_0(tavor_mr_reregister_cmnreg_fail, 1184 TAVOR_TNF_ERROR, ""); 1185 TAVOR_TNF_EXIT(tavor_mr_reregister); 1186 return (status); 1187 } 1188 1189 TAVOR_TNF_EXIT(tavor_mr_reregister); 1190 return (DDI_SUCCESS); 1191 } 1192 1193 1194 /* 1195 * tavor_mr_reregister_buf() 1196 * Context: Can be called from interrupt or base context. 1197 */ 1198 int 1199 tavor_mr_reregister_buf(tavor_state_t *state, tavor_mrhdl_t mr, 1200 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf, 1201 tavor_mrhdl_t *mrhdl_new, tavor_mr_options_t *op) 1202 { 1203 tavor_bind_info_t bind; 1204 int status; 1205 1206 TAVOR_TNF_ENTER(tavor_mr_reregister_buf); 1207 1208 /* 1209 * Fill in the "bind" struct. This struct provides the majority 1210 * of the information that will be used to distinguish between an 1211 * "addr" binding (see above) and a "buf" binding (as is the case 1212 * here). The "bind" struct is later passed to tavor_mr_mem_bind() 1213 * which does most of the "heavy lifting" for the Tavor memory 1214 * registration routines. Note: We have chosen to provide 1215 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 1216 * not set). It is not critical what value we choose here as it need 1217 * only be unique for the given RKey (which will happen by default), 1218 * so the choice here is somewhat arbitrary. 1219 */ 1220 bind.bi_type = TAVOR_BINDHDL_BUF; 1221 bind.bi_buf = buf; 1222 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 1223 bind.bi_addr = mr_attr->mr_vaddr; 1224 } else { 1225 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 1226 } 1227 bind.bi_len = (uint64_t)buf->b_bcount; 1228 bind.bi_flags = mr_attr->mr_flags; 1229 bind.bi_as = NULL; 1230 status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 1231 if (status != DDI_SUCCESS) { 1232 TNF_PROBE_0(tavor_mr_reregister_buf_cmnreg_fail, 1233 TAVOR_TNF_ERROR, ""); 1234 TAVOR_TNF_EXIT(tavor_mr_reregister_buf); 1235 return (status); 1236 } 1237 1238 TAVOR_TNF_EXIT(tavor_mr_reregister_buf); 1239 return (DDI_SUCCESS); 1240 } 1241 1242 1243 /* 1244 * tavor_mr_sync() 1245 * Context: Can be called from interrupt or base context. 1246 */ 1247 /* ARGSUSED */ 1248 int 1249 tavor_mr_sync(tavor_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs) 1250 { 1251 tavor_mrhdl_t mrhdl; 1252 uint64_t seg_vaddr, seg_len, seg_end; 1253 uint64_t mr_start, mr_end; 1254 uint_t type; 1255 int status, i; 1256 char *errormsg; 1257 1258 TAVOR_TNF_ENTER(tavor_mr_sync); 1259 1260 /* Process each of the ibt_mr_sync_t's */ 1261 for (i = 0; i < num_segs; i++) { 1262 mrhdl = (tavor_mrhdl_t)mr_segs[i].ms_handle; 1263 1264 /* Check for valid memory region handle */ 1265 if (mrhdl == NULL) { 1266 /* Set "status" and "errormsg" and goto failure */ 1267 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl"); 1268 goto mrsync_fail; 1269 } 1270 1271 mutex_enter(&mrhdl->mr_lock); 1272 1273 /* 1274 * Check here to see if the memory region has already been 1275 * partially deregistered as a result of a 1276 * tavor_umap_umemlock_cb() callback. If so, this is an 1277 * error, return failure. 1278 */ 1279 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 1280 mutex_exit(&mrhdl->mr_lock); 1281 /* Set "status" and "errormsg" and goto failure */ 1282 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl2"); 1283 goto mrsync_fail; 1284 } 1285 1286 /* Check for valid bounds on sync request */ 1287 seg_vaddr = mr_segs[i].ms_vaddr; 1288 seg_len = mr_segs[i].ms_len; 1289 seg_end = seg_vaddr + seg_len - 1; 1290 mr_start = mrhdl->mr_bindinfo.bi_addr; 1291 mr_end = mr_start + mrhdl->mr_bindinfo.bi_len - 1; 1292 if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) { 1293 mutex_exit(&mrhdl->mr_lock); 1294 /* Set "status" and "errormsg" and goto failure */ 1295 TAVOR_TNF_FAIL(IBT_MR_VA_INVALID, "invalid vaddr"); 1296 goto mrsync_fail; 1297 } 1298 if ((seg_end < mr_start) || (seg_end > mr_end)) { 1299 mutex_exit(&mrhdl->mr_lock); 1300 /* Set "status" and "errormsg" and goto failure */ 1301 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length"); 1302 goto mrsync_fail; 1303 } 1304 1305 /* Determine what type (i.e. direction) for sync */ 1306 if (mr_segs[i].ms_flags & IBT_SYNC_READ) { 1307 type = DDI_DMA_SYNC_FORDEV; 1308 } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) { 1309 type = DDI_DMA_SYNC_FORCPU; 1310 } else { 1311 mutex_exit(&mrhdl->mr_lock); 1312 /* Set "status" and "errormsg" and goto failure */ 1313 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sync type"); 1314 goto mrsync_fail; 1315 } 1316 1317 (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl, 1318 (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type); 1319 mutex_exit(&mrhdl->mr_lock); 1320 } 1321 1322 TAVOR_TNF_EXIT(tavor_mr_sync); 1323 return (DDI_SUCCESS); 1324 1325 mrsync_fail: 1326 TNF_PROBE_1(tavor_mr_sync_fail, TAVOR_TNF_ERROR, "", tnf_string, msg, 1327 errormsg); 1328 TAVOR_TNF_EXIT(tavor_mr_sync); 1329 return (status); 1330 } 1331 1332 1333 /* 1334 * tavor_mw_alloc() 1335 * Context: Can be called from interrupt or base context. 1336 */ 1337 int 1338 tavor_mw_alloc(tavor_state_t *state, tavor_pdhdl_t pd, ibt_mw_flags_t flags, 1339 tavor_mwhdl_t *mwhdl) 1340 { 1341 tavor_rsrc_t *mpt, *rsrc; 1342 tavor_hw_mpt_t mpt_entry; 1343 tavor_mwhdl_t mw; 1344 uint_t sleep; 1345 int status; 1346 char *errormsg; 1347 1348 TAVOR_TNF_ENTER(tavor_mw_alloc); 1349 1350 /* 1351 * Check the sleep flag. Ensure that it is consistent with the 1352 * current thread context (i.e. if we are currently in the interrupt 1353 * context, then we shouldn't be attempting to sleep). 1354 */ 1355 sleep = (flags & IBT_MW_NOSLEEP) ? TAVOR_NOSLEEP : TAVOR_SLEEP; 1356 if ((sleep == TAVOR_SLEEP) && 1357 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1358 /* Set "status" and "errormsg" and goto failure */ 1359 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 1360 goto mwalloc_fail; 1361 } 1362 1363 /* Increment the reference count on the protection domain (PD) */ 1364 tavor_pd_refcnt_inc(pd); 1365 1366 /* 1367 * Allocate an MPT entry (for use as a memory window). Since the 1368 * Tavor hardware uses the MPT entry for memory regions and for 1369 * memory windows, we will fill in this MPT with all the necessary 1370 * parameters for the memory window. And then (just as we do for 1371 * memory regions) ownership will be passed to the hardware in the 1372 * final step below. If we fail here, we must undo the protection 1373 * domain reference count. 1374 */ 1375 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 1376 if (status != DDI_SUCCESS) { 1377 /* Set "status" and "errormsg" and goto failure */ 1378 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 1379 goto mwalloc_fail1; 1380 } 1381 1382 /* 1383 * Allocate the software structure for tracking the memory window (i.e. 1384 * the Tavor Memory Window handle). Note: This is actually the same 1385 * software structure used for tracking memory regions, but since many 1386 * of the same properties are needed, only a single structure is 1387 * necessary. If we fail here, we must undo the protection domain 1388 * reference count and the previous resource allocation. 1389 */ 1390 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 1391 if (status != DDI_SUCCESS) { 1392 /* Set "status" and "errormsg" and goto failure */ 1393 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 1394 goto mwalloc_fail2; 1395 } 1396 mw = (tavor_mwhdl_t)rsrc->tr_addr; 1397 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 1398 1399 /* 1400 * Calculate an "unbound" RKey from MPT index. In much the same way 1401 * as we do for memory regions (above), this key is constructed from 1402 * a "constrained" (which depends on the MPT index) and an 1403 * "unconstrained" portion (which may be arbitrarily chosen). 1404 */ 1405 tavor_mr_keycalc(state, mpt->tr_indx, &mw->mr_rkey); 1406 1407 /* 1408 * Fill in the MPT entry. This is the final step before passing 1409 * ownership of the MPT entry to the Tavor hardware. We use all of 1410 * the information collected/calculated above to fill in the 1411 * requisite portions of the MPT. Note: fewer entries in the MPT 1412 * entry are necessary to allocate a memory window. 1413 */ 1414 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 1415 mpt_entry.reg_win = TAVOR_MPT_IS_WINDOW; 1416 mpt_entry.mem_key = mw->mr_rkey; 1417 mpt_entry.pd = pd->pd_pdnum; 1418 1419 /* 1420 * Write the MPT entry to hardware. Lastly, we pass ownership of 1421 * the entry to the hardware. Note: in general, this operation 1422 * shouldn't fail. But if it does, we have to undo everything we've 1423 * done above before returning error. 1424 */ 1425 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1426 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 1427 if (status != TAVOR_CMD_SUCCESS) { 1428 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 1429 status); 1430 TNF_PROBE_1(tavor_mw_alloc_sw2hw_mpt_cmd_fail, 1431 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 1432 /* Set "status" and "errormsg" and goto failure */ 1433 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 1434 "tavor SW2HW_MPT command"); 1435 goto mwalloc_fail3; 1436 } 1437 1438 /* 1439 * Fill in the rest of the Tavor Memory Window handle. Having 1440 * successfully transferred ownership of the MPT, we can update the 1441 * following fields for use in further operations on the MW. 1442 */ 1443 mw->mr_mptrsrcp = mpt; 1444 mw->mr_pdhdl = pd; 1445 mw->mr_rsrcp = rsrc; 1446 *mwhdl = mw; 1447 1448 TAVOR_TNF_EXIT(tavor_mw_alloc); 1449 return (DDI_SUCCESS); 1450 1451 mwalloc_fail3: 1452 tavor_rsrc_free(state, &rsrc); 1453 mwalloc_fail2: 1454 tavor_rsrc_free(state, &mpt); 1455 mwalloc_fail1: 1456 tavor_pd_refcnt_dec(pd); 1457 mwalloc_fail: 1458 TNF_PROBE_1(tavor_mw_alloc_fail, TAVOR_TNF_ERROR, "", 1459 tnf_string, msg, errormsg); 1460 TAVOR_TNF_EXIT(tavor_mw_alloc); 1461 return (status); 1462 } 1463 1464 1465 /* 1466 * tavor_mw_free() 1467 * Context: Can be called from interrupt or base context. 1468 */ 1469 int 1470 tavor_mw_free(tavor_state_t *state, tavor_mwhdl_t *mwhdl, uint_t sleep) 1471 { 1472 tavor_rsrc_t *mpt, *rsrc; 1473 tavor_mwhdl_t mw; 1474 int status; 1475 char *errormsg; 1476 tavor_pdhdl_t pd; 1477 1478 TAVOR_TNF_ENTER(tavor_mw_free); 1479 1480 /* 1481 * Check the sleep flag. Ensure that it is consistent with the 1482 * current thread context (i.e. if we are currently in the interrupt 1483 * context, then we shouldn't be attempting to sleep). 1484 */ 1485 if ((sleep == TAVOR_SLEEP) && 1486 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1487 /* Set "status" and "errormsg" and goto failure */ 1488 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags"); 1489 TNF_PROBE_1(tavor_mw_free_fail, TAVOR_TNF_ERROR, "", 1490 tnf_string, msg, errormsg); 1491 TAVOR_TNF_EXIT(tavor_mw_free); 1492 return (status); 1493 } 1494 1495 /* 1496 * Pull all the necessary information from the Tavor Memory Window 1497 * handle. This is necessary here because the resource for the 1498 * MW handle is going to be freed up as part of the this operation. 1499 */ 1500 mw = *mwhdl; 1501 mutex_enter(&mw->mr_lock); 1502 mpt = mw->mr_mptrsrcp; 1503 rsrc = mw->mr_rsrcp; 1504 pd = mw->mr_pdhdl; 1505 mutex_exit(&mw->mr_lock); 1506 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 1507 1508 /* 1509 * Reclaim the MPT entry from hardware. Note: in general, it is 1510 * unexpected for this operation to return an error. 1511 */ 1512 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL, 1513 0, mpt->tr_indx, sleep); 1514 if (status != TAVOR_CMD_SUCCESS) { 1515 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: %08x\n", 1516 status); 1517 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail, TAVOR_TNF_ERROR, "", 1518 tnf_uint, status, status); 1519 TAVOR_TNF_EXIT(tavor_mw_free); 1520 return (IBT_INVALID_PARAM); 1521 } 1522 1523 /* Free the Tavor Memory Window handle */ 1524 tavor_rsrc_free(state, &rsrc); 1525 1526 /* Free up the MPT entry resource */ 1527 tavor_rsrc_free(state, &mpt); 1528 1529 /* Decrement the reference count on the protection domain (PD) */ 1530 tavor_pd_refcnt_dec(pd); 1531 1532 /* Set the mwhdl pointer to NULL and return success */ 1533 *mwhdl = NULL; 1534 1535 TAVOR_TNF_EXIT(tavor_mw_free); 1536 return (DDI_SUCCESS); 1537 } 1538 1539 1540 /* 1541 * tavor_mr_keycalc() 1542 * Context: Can be called from interrupt or base context. 1543 */ 1544 void 1545 tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key) 1546 { 1547 uint32_t tmp, log_num_mpt; 1548 1549 /* 1550 * Generate a simple key from counter. Note: We increment this 1551 * static variable _intentionally_ without any kind of mutex around 1552 * it. First, single-threading all operations through a single lock 1553 * would be a bad idea (from a performance point-of-view). Second, 1554 * the upper "unconstrained" bits don't really have to be unique 1555 * because the lower bits are guaranteed to be (although we do make a 1556 * best effort to ensure that they are). Third, the window for the 1557 * race (where both threads read and update the counter at the same 1558 * time) is incredibly small. 1559 * And, lastly, we'd like to make this into a "random" key XXX 1560 */ 1561 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(tavor_debug_memkey_cnt)) 1562 log_num_mpt = state->ts_cfg_profile->cp_log_num_mpt; 1563 tmp = (tavor_debug_memkey_cnt++) << log_num_mpt; 1564 *key = tmp | indx; 1565 } 1566 1567 1568 /* 1569 * tavor_mr_common_reg() 1570 * Context: Can be called from interrupt or base context. 1571 */ 1572 static int 1573 tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd, 1574 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op) 1575 { 1576 tavor_rsrc_pool_info_t *rsrc_pool; 1577 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 1578 tavor_umap_db_entry_t *umapdb; 1579 tavor_sw_refcnt_t *swrc_tmp; 1580 tavor_hw_mpt_t mpt_entry; 1581 tavor_mrhdl_t mr; 1582 ibt_mr_flags_t flags; 1583 tavor_bind_info_t *bh; 1584 ddi_dma_handle_t bind_dmahdl; 1585 ddi_umem_cookie_t umem_cookie; 1586 size_t umem_len; 1587 caddr_t umem_addr; 1588 uint64_t mtt_addr, mtt_ddrbaseaddr, max_sz; 1589 uint_t sleep, mtt_pgsize_bits, bind_type, mr_is_umem; 1590 int status, umem_flags, bind_override_addr; 1591 char *errormsg; 1592 1593 TAVOR_TNF_ENTER(tavor_mr_common_reg); 1594 1595 /* 1596 * Check the "options" flag. Currently this flag tells the driver 1597 * whether or not the region should be bound normally (i.e. with 1598 * entries written into the PCI IOMMU), whether it should be 1599 * registered to bypass the IOMMU, and whether or not the resulting 1600 * address should be "zero-based" (to aid the alignment restrictions 1601 * for QPs). 1602 */ 1603 if (op == NULL) { 1604 bind_type = TAVOR_BINDMEM_NORMAL; 1605 bind_dmahdl = NULL; 1606 bind_override_addr = 0; 1607 } else { 1608 bind_type = op->mro_bind_type; 1609 bind_dmahdl = op->mro_bind_dmahdl; 1610 bind_override_addr = op->mro_bind_override_addr; 1611 } 1612 1613 /* Extract the flags field from the tavor_bind_info_t */ 1614 flags = bind->bi_flags; 1615 1616 /* 1617 * Check for invalid length. Check is the length is zero or if the 1618 * length is larger than the maximum configured value. Return error 1619 * if it is. 1620 */ 1621 max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz); 1622 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 1623 /* Set "status" and "errormsg" and goto failure */ 1624 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length"); 1625 goto mrcommon_fail; 1626 } 1627 1628 /* 1629 * Check the sleep flag. Ensure that it is consistent with the 1630 * current thread context (i.e. if we are currently in the interrupt 1631 * context, then we shouldn't be attempting to sleep). 1632 */ 1633 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1634 if ((sleep == TAVOR_SLEEP) && 1635 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1636 /* Set "status" and "errormsg" and goto failure */ 1637 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 1638 goto mrcommon_fail; 1639 } 1640 1641 /* 1642 * Get the base address for the MTT table. This will be necessary 1643 * below when we are setting up the MPT entry. 1644 */ 1645 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 1646 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 1647 1648 /* Increment the reference count on the protection domain (PD) */ 1649 tavor_pd_refcnt_inc(pd); 1650 1651 /* 1652 * Allocate an MPT entry. This will be filled in with all the 1653 * necessary parameters to define the memory region. And then 1654 * ownership will be passed to the hardware in the final step 1655 * below. If we fail here, we must undo the protection domain 1656 * reference count. 1657 */ 1658 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 1659 if (status != DDI_SUCCESS) { 1660 /* Set "status" and "errormsg" and goto failure */ 1661 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 1662 goto mrcommon_fail1; 1663 } 1664 1665 /* 1666 * Allocate the software structure for tracking the memory region (i.e. 1667 * the Tavor Memory Region handle). If we fail here, we must undo 1668 * the protection domain reference count and the previous resource 1669 * allocation. 1670 */ 1671 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 1672 if (status != DDI_SUCCESS) { 1673 /* Set "status" and "errormsg" and goto failure */ 1674 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 1675 goto mrcommon_fail2; 1676 } 1677 mr = (tavor_mrhdl_t)rsrc->tr_addr; 1678 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 1679 1680 /* 1681 * Setup and validate the memory region access flags. This means 1682 * translating the IBTF's enable flags into the access flags that 1683 * will be used in later operations. 1684 */ 1685 mr->mr_accflag = 0; 1686 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 1687 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 1688 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 1689 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 1690 if (flags & IBT_MR_ENABLE_REMOTE_READ) 1691 mr->mr_accflag |= IBT_MR_REMOTE_READ; 1692 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 1693 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 1694 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 1695 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 1696 1697 /* 1698 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 1699 * from a certain number of "constrained" bits (the least significant 1700 * bits) and some number of "unconstrained" bits. The constrained 1701 * bits must be set to the index of the entry in the MPT table, but 1702 * the unconstrained bits can be set to any value we wish. Note: 1703 * if no remote access is required, then the RKey value is not filled 1704 * in. Otherwise both Rkey and LKey are given the same value. 1705 */ 1706 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 1707 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1708 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1709 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1710 mr->mr_rkey = mr->mr_lkey; 1711 } 1712 1713 /* 1714 * Determine if the memory is from userland and pin the pages 1715 * with umem_lockmemory() if necessary. 1716 * Then, if this is userland memory, allocate an entry in the 1717 * "userland resources database". This will later be added to 1718 * the database (after all further memory registration operations are 1719 * successful). If we fail here, we must undo the reference counts 1720 * and the previous resource allocations. 1721 */ 1722 mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0); 1723 if (mr_is_umem) { 1724 umem_len = ptob(btopr(bind->bi_len + 1725 ((uintptr_t)bind->bi_addr & PAGEOFFSET))); 1726 umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET); 1727 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 1728 DDI_UMEMLOCK_LONGTERM); 1729 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 1730 &umem_cookie, &tavor_umem_cbops, NULL); 1731 if (status != 0) { 1732 /* Set "status" and "errormsg" and goto failure */ 1733 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin"); 1734 goto mrcommon_fail3; 1735 } 1736 1737 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1738 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1739 1740 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len, 1741 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 1742 if (bind->bi_buf == NULL) { 1743 /* Set "status" and "errormsg" and goto failure */ 1744 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed iosetup"); 1745 goto mrcommon_fail3; 1746 } 1747 bind->bi_type = TAVOR_BINDHDL_UBUF; 1748 bind->bi_buf->b_flags |= B_READ; 1749 1750 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1751 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1752 1753 umapdb = tavor_umap_db_alloc(state->ts_instance, 1754 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 1755 (uint64_t)(uintptr_t)rsrc); 1756 if (umapdb == NULL) { 1757 /* Set "status" and "errormsg" and goto failure */ 1758 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add"); 1759 goto mrcommon_fail4; 1760 } 1761 } 1762 1763 /* 1764 * Setup the bindinfo for the mtt bind call 1765 */ 1766 bh = &mr->mr_bindinfo; 1767 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh)) 1768 bcopy(bind, bh, sizeof (tavor_bind_info_t)); 1769 bh->bi_bypass = bind_type; 1770 status = tavor_mr_mtt_bind(state, bh, bind_dmahdl, &mtt, 1771 &mtt_pgsize_bits); 1772 if (status != DDI_SUCCESS) { 1773 /* Set "status" and "errormsg" and goto failure */ 1774 TAVOR_TNF_FAIL(status, "failed mtt bind"); 1775 goto mrcommon_fail5; 1776 } 1777 mr->mr_logmttpgsz = mtt_pgsize_bits; 1778 1779 /* 1780 * Allocate MTT reference count (to track shared memory regions). 1781 * This reference count resource may never be used on the given 1782 * memory region, but if it is ever later registered as "shared" 1783 * memory region then this resource will be necessary. If we fail 1784 * here, we do pretty much the same as above to clean up. 1785 */ 1786 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, sleep, 1787 &mtt_refcnt); 1788 if (status != DDI_SUCCESS) { 1789 /* Set "status" and "errormsg" and goto failure */ 1790 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed refence count"); 1791 goto mrcommon_fail6; 1792 } 1793 mr->mr_mttrefcntp = mtt_refcnt; 1794 swrc_tmp = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr; 1795 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp)) 1796 TAVOR_MTT_REFCNT_INIT(swrc_tmp); 1797 1798 /* 1799 * Fill in the MPT entry. This is the final step before passing 1800 * ownership of the MPT entry to the Tavor hardware. We use all of 1801 * the information collected/calculated above to fill in the 1802 * requisite portions of the MPT. 1803 */ 1804 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 1805 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 1806 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 1807 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 1808 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 1809 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 1810 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 1811 mpt_entry.lr = 1; 1812 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 1813 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 1814 mpt_entry.mem_key = mr->mr_lkey; 1815 mpt_entry.pd = pd->pd_pdnum; 1816 if (bind_override_addr == 0) { 1817 mpt_entry.start_addr = bh->bi_addr; 1818 } else { 1819 bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1); 1820 mpt_entry.start_addr = bh->bi_addr; 1821 } 1822 mpt_entry.reg_win_len = bh->bi_len; 1823 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 1824 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 1825 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 1826 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 1827 1828 /* 1829 * Write the MPT entry to hardware. Lastly, we pass ownership of 1830 * the entry to the hardware. Note: in general, this operation 1831 * shouldn't fail. But if it does, we have to undo everything we've 1832 * done above before returning error. 1833 */ 1834 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1835 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 1836 if (status != TAVOR_CMD_SUCCESS) { 1837 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 1838 status); 1839 TNF_PROBE_1(tavor_mr_common_reg_sw2hw_mpt_cmd_fail, 1840 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 1841 /* Set "status" and "errormsg" and goto failure */ 1842 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 1843 "tavor SW2HW_MPT command"); 1844 goto mrcommon_fail7; 1845 } 1846 1847 /* 1848 * Fill in the rest of the Tavor Memory Region handle. Having 1849 * successfully transferred ownership of the MPT, we can update the 1850 * following fields for use in further operations on the MR. 1851 */ 1852 mr->mr_mptrsrcp = mpt; 1853 mr->mr_mttrsrcp = mtt; 1854 mr->mr_pdhdl = pd; 1855 mr->mr_rsrcp = rsrc; 1856 mr->mr_is_umem = mr_is_umem; 1857 mr->mr_is_fmr = 0; 1858 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 1859 mr->mr_umem_cbfunc = NULL; 1860 mr->mr_umem_cbarg1 = NULL; 1861 mr->mr_umem_cbarg2 = NULL; 1862 1863 /* 1864 * If this is userland memory, then we need to insert the previously 1865 * allocated entry into the "userland resources database". This will 1866 * allow for later coordination between the tavor_umap_umemlock_cb() 1867 * callback and tavor_mr_deregister(). 1868 */ 1869 if (mr_is_umem) { 1870 tavor_umap_db_add(umapdb); 1871 } 1872 1873 *mrhdl = mr; 1874 1875 TAVOR_TNF_EXIT(tavor_mr_common_reg); 1876 return (DDI_SUCCESS); 1877 1878 /* 1879 * The following is cleanup for all possible failure cases in this routine 1880 */ 1881 mrcommon_fail7: 1882 tavor_rsrc_free(state, &mtt_refcnt); 1883 mrcommon_fail6: 1884 tavor_rsrc_free(state, &mtt); 1885 tavor_mr_mem_unbind(state, bh); 1886 bind->bi_type = bh->bi_type; 1887 mrcommon_fail5: 1888 if (mr_is_umem) { 1889 tavor_umap_db_free(umapdb); 1890 } 1891 mrcommon_fail4: 1892 if (mr_is_umem) { 1893 /* 1894 * Free up the memory ddi_umem_iosetup() allocates 1895 * internally. 1896 */ 1897 if (bind->bi_type == TAVOR_BINDHDL_UBUF) { 1898 freerbuf(bind->bi_buf); 1899 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1900 bind->bi_type = TAVOR_BINDHDL_NONE; 1901 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1902 } 1903 ddi_umem_unlock(umem_cookie); 1904 } 1905 mrcommon_fail3: 1906 tavor_rsrc_free(state, &rsrc); 1907 mrcommon_fail2: 1908 tavor_rsrc_free(state, &mpt); 1909 mrcommon_fail1: 1910 tavor_pd_refcnt_dec(pd); 1911 mrcommon_fail: 1912 TNF_PROBE_1(tavor_mr_common_reg_fail, TAVOR_TNF_ERROR, "", 1913 tnf_string, msg, errormsg); 1914 TAVOR_TNF_EXIT(tavor_mr_common_reg); 1915 return (status); 1916 } 1917 1918 /* 1919 * tavor_mr_mtt_bind() 1920 * Context: Can be called from interrupt or base context. 1921 */ 1922 int 1923 tavor_mr_mtt_bind(tavor_state_t *state, tavor_bind_info_t *bind, 1924 ddi_dma_handle_t bind_dmahdl, tavor_rsrc_t **mtt, uint_t *mtt_pgsize_bits) 1925 { 1926 uint64_t nummtt; 1927 uint_t sleep; 1928 int status; 1929 char *errormsg; 1930 1931 TAVOR_TNF_ENTER(tavor_mr_common_reg); 1932 1933 /* 1934 * Check the sleep flag. Ensure that it is consistent with the 1935 * current thread context (i.e. if we are currently in the interrupt 1936 * context, then we shouldn't be attempting to sleep). 1937 */ 1938 sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1939 if ((sleep == TAVOR_SLEEP) && 1940 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1941 /* Set "status" and "errormsg" and goto failure */ 1942 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 1943 goto mrmttbind_fail; 1944 } 1945 1946 /* 1947 * Bind the memory and determine the mapped addresses. This is 1948 * the first of two routines that do all the "heavy lifting" for 1949 * the Tavor memory registration routines. The tavor_mr_mem_bind() 1950 * routine takes the "bind" struct with all its fields filled 1951 * in and returns a list of DMA cookies (for the PCI mapped addresses 1952 * corresponding to the specified address region) which are used by 1953 * the tavor_mr_fast_mtt_write() routine below. If we fail here, we 1954 * must undo all the previous resource allocation (and PD reference 1955 * count). 1956 */ 1957 status = tavor_mr_mem_bind(state, bind, bind_dmahdl, sleep); 1958 if (status != DDI_SUCCESS) { 1959 /* Set "status" and "errormsg" and goto failure */ 1960 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind"); 1961 goto mrmttbind_fail; 1962 } 1963 1964 /* 1965 * Determine number of pages spanned. This routine uses the 1966 * information in the "bind" struct to determine the required 1967 * number of MTT entries needed (and returns the suggested page size - 1968 * as a "power-of-2" - for each MTT entry). 1969 */ 1970 nummtt = tavor_mr_nummtt_needed(state, bind, mtt_pgsize_bits); 1971 1972 /* 1973 * Allocate the MTT entries. Use the calculations performed above to 1974 * allocate the required number of MTT entries. Note: MTT entries are 1975 * allocated in "MTT segments" which consist of complete cachelines 1976 * (i.e. 8 entries, 16 entries, etc.) So the TAVOR_NUMMTT_TO_MTTSEG() 1977 * macro is used to do the proper conversion. If we fail here, we 1978 * must not only undo all the previous resource allocation (and PD 1979 * reference count), but we must also unbind the memory. 1980 */ 1981 status = tavor_rsrc_alloc(state, TAVOR_MTT, 1982 TAVOR_NUMMTT_TO_MTTSEG(nummtt), sleep, mtt); 1983 if (status != DDI_SUCCESS) { 1984 /* Set "status" and "errormsg" and goto failure */ 1985 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT"); 1986 goto mrmttbind_fail2; 1987 } 1988 1989 /* 1990 * Write the mapped addresses into the MTT entries. This is part two 1991 * of the "heavy lifting" routines that we talked about above. Note: 1992 * we pass the suggested page size from the earlier operation here. 1993 * And if we fail here, we again do pretty much the same huge clean up. 1994 */ 1995 status = tavor_mr_fast_mtt_write(*mtt, bind, *mtt_pgsize_bits); 1996 if (status != DDI_SUCCESS) { 1997 /* Set "status" and "errormsg" and goto failure */ 1998 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "failed write mtt"); 1999 goto mrmttbind_fail3; 2000 } 2001 TAVOR_TNF_EXIT(tavor_mr_mtt_bind); 2002 return (DDI_SUCCESS); 2003 2004 /* 2005 * The following is cleanup for all possible failure cases in this routine 2006 */ 2007 mrmttbind_fail3: 2008 tavor_rsrc_free(state, mtt); 2009 mrmttbind_fail2: 2010 tavor_mr_mem_unbind(state, bind); 2011 mrmttbind_fail: 2012 TNF_PROBE_1(tavor_mr_mtt_bind_fail, TAVOR_TNF_ERROR, "", 2013 tnf_string, msg, errormsg); 2014 TAVOR_TNF_EXIT(tavor_mr_mtt_bind); 2015 return (status); 2016 } 2017 2018 2019 /* 2020 * tavor_mr_mtt_unbind() 2021 * Context: Can be called from interrupt or base context. 2022 */ 2023 int 2024 tavor_mr_mtt_unbind(tavor_state_t *state, tavor_bind_info_t *bind, 2025 tavor_rsrc_t *mtt) 2026 { 2027 TAVOR_TNF_ENTER(tavor_mr_mtt_unbind); 2028 2029 /* 2030 * Free up the MTT entries and unbind the memory. Here, as above, we 2031 * attempt to free these resources only if it is appropriate to do so. 2032 */ 2033 tavor_mr_mem_unbind(state, bind); 2034 tavor_rsrc_free(state, &mtt); 2035 2036 TAVOR_TNF_EXIT(tavor_mr_mtt_unbind); 2037 return (DDI_SUCCESS); 2038 } 2039 2040 2041 /* 2042 * tavor_mr_common_rereg() 2043 * Context: Can be called from interrupt or base context. 2044 */ 2045 static int 2046 tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr, 2047 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new, 2048 tavor_mr_options_t *op) 2049 { 2050 tavor_rsrc_t *mpt; 2051 ibt_mr_attr_flags_t acc_flags_to_use; 2052 ibt_mr_flags_t flags; 2053 tavor_pdhdl_t pd_to_use; 2054 tavor_hw_mpt_t mpt_entry; 2055 uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use; 2056 uint_t sleep, dereg_level; 2057 int status; 2058 char *errormsg; 2059 2060 TAVOR_TNF_ENTER(tavor_mr_common_rereg); 2061 2062 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2063 2064 /* 2065 * Check here to see if the memory region corresponds to a userland 2066 * mapping. Reregistration of userland memory regions is not 2067 * currently supported. Return failure. XXX 2068 */ 2069 if (mr->mr_is_umem) { 2070 /* Set "status" and "errormsg" and goto failure */ 2071 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl"); 2072 goto mrrereg_fail; 2073 } 2074 2075 mutex_enter(&mr->mr_lock); 2076 2077 /* Pull MPT resource pointer from the Tavor Memory Region handle */ 2078 mpt = mr->mr_mptrsrcp; 2079 2080 /* Extract the flags field from the tavor_bind_info_t */ 2081 flags = bind->bi_flags; 2082 2083 /* 2084 * Check the sleep flag. Ensure that it is consistent with the 2085 * current thread context (i.e. if we are currently in the interrupt 2086 * context, then we shouldn't be attempting to sleep). 2087 */ 2088 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 2089 if ((sleep == TAVOR_SLEEP) && 2090 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 2091 mutex_exit(&mr->mr_lock); 2092 /* Set "status" and "errormsg" and goto failure */ 2093 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 2094 goto mrrereg_fail; 2095 } 2096 2097 /* 2098 * First step is to temporarily invalidate the MPT entry. This 2099 * regains ownership from the hardware, and gives us the opportunity 2100 * to modify the entry. Note: The HW2SW_MPT command returns the 2101 * current MPT entry contents. These are saved away here because 2102 * they will be reused in a later step below. If the region has 2103 * bound memory windows that we fail returning an "in use" error code. 2104 * Otherwise, this is an unexpected error and we deregister the 2105 * memory region and return error. 2106 * 2107 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect 2108 * against holding the lock around this rereg call in all contexts. 2109 */ 2110 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry, 2111 sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN); 2112 if (status != TAVOR_CMD_SUCCESS) { 2113 mutex_exit(&mr->mr_lock); 2114 if (status == TAVOR_CMD_REG_BOUND) { 2115 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2116 return (IBT_MR_IN_USE); 2117 } else { 2118 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: " 2119 "%08x\n", status); 2120 2121 /* 2122 * Call deregister and ensure that all current 2123 * resources get freed up 2124 */ 2125 if (tavor_mr_deregister(state, &mr, 2126 TAVOR_MR_DEREG_ALL, sleep) != DDI_SUCCESS) { 2127 TAVOR_WARNING(state, "failed to deregister " 2128 "memory region"); 2129 } 2130 TNF_PROBE_1(tavor_mr_common_rereg_hw2sw_mpt_cmd_fail, 2131 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 2132 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2133 return (ibc_get_ci_failure(0)); 2134 } 2135 } 2136 2137 /* 2138 * If we're changing the protection domain, then validate the new one 2139 */ 2140 if (flags & IBT_MR_CHANGE_PD) { 2141 2142 /* Check for valid PD handle pointer */ 2143 if (pd == NULL) { 2144 mutex_exit(&mr->mr_lock); 2145 /* 2146 * Call deregister and ensure that all current 2147 * resources get properly freed up. Unnecessary 2148 * here to attempt to regain software ownership 2149 * of the MPT entry as that has already been 2150 * done above. 2151 */ 2152 if (tavor_mr_deregister(state, &mr, 2153 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != 2154 DDI_SUCCESS) { 2155 TAVOR_WARNING(state, "failed to deregister " 2156 "memory region"); 2157 } 2158 /* Set "status" and "errormsg" and goto failure */ 2159 TAVOR_TNF_FAIL(IBT_PD_HDL_INVALID, "invalid PD handle"); 2160 goto mrrereg_fail; 2161 } 2162 2163 /* Use the new PD handle in all operations below */ 2164 pd_to_use = pd; 2165 2166 } else { 2167 /* Use the current PD handle in all operations below */ 2168 pd_to_use = mr->mr_pdhdl; 2169 } 2170 2171 /* 2172 * If we're changing access permissions, then validate the new ones 2173 */ 2174 if (flags & IBT_MR_CHANGE_ACCESS) { 2175 /* 2176 * Validate the access flags. Both remote write and remote 2177 * atomic require the local write flag to be set 2178 */ 2179 if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) || 2180 (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) && 2181 !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) { 2182 mutex_exit(&mr->mr_lock); 2183 /* 2184 * Call deregister and ensure that all current 2185 * resources get properly freed up. Unnecessary 2186 * here to attempt to regain software ownership 2187 * of the MPT entry as that has already been 2188 * done above. 2189 */ 2190 if (tavor_mr_deregister(state, &mr, 2191 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != 2192 DDI_SUCCESS) { 2193 TAVOR_WARNING(state, "failed to deregister " 2194 "memory region"); 2195 } 2196 /* Set "status" and "errormsg" and goto failure */ 2197 TAVOR_TNF_FAIL(IBT_MR_ACCESS_REQ_INVALID, 2198 "invalid access flags"); 2199 goto mrrereg_fail; 2200 } 2201 2202 /* 2203 * Setup and validate the memory region access flags. This 2204 * means translating the IBTF's enable flags into the access 2205 * flags that will be used in later operations. 2206 */ 2207 acc_flags_to_use = 0; 2208 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 2209 acc_flags_to_use |= IBT_MR_WINDOW_BIND; 2210 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 2211 acc_flags_to_use |= IBT_MR_LOCAL_WRITE; 2212 if (flags & IBT_MR_ENABLE_REMOTE_READ) 2213 acc_flags_to_use |= IBT_MR_REMOTE_READ; 2214 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 2215 acc_flags_to_use |= IBT_MR_REMOTE_WRITE; 2216 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 2217 acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC; 2218 2219 } else { 2220 acc_flags_to_use = mr->mr_accflag; 2221 } 2222 2223 /* 2224 * If we're modifying the translation, then figure out whether 2225 * we can reuse the current MTT resources. This means calling 2226 * tavor_mr_rereg_xlat_helper() which does most of the heavy lifting 2227 * for the reregistration. If the current memory region contains 2228 * sufficient MTT entries for the new regions, then it will be 2229 * reused and filled in. Otherwise, new entries will be allocated, 2230 * the old ones will be freed, and the new entries will be filled 2231 * in. Note: If we're not modifying the translation, then we 2232 * should already have all the information we need to update the MPT. 2233 * Also note: If tavor_mr_rereg_xlat_helper() fails, it will return 2234 * a "dereg_level" which is the level of cleanup that needs to be 2235 * passed to tavor_mr_deregister() to finish the cleanup. 2236 */ 2237 if (flags & IBT_MR_CHANGE_TRANSLATION) { 2238 status = tavor_mr_rereg_xlat_helper(state, mr, bind, op, 2239 &mtt_addr_to_use, sleep, &dereg_level); 2240 if (status != DDI_SUCCESS) { 2241 mutex_exit(&mr->mr_lock); 2242 /* 2243 * Call deregister and ensure that all resources get 2244 * properly freed up. 2245 */ 2246 if (tavor_mr_deregister(state, &mr, dereg_level, 2247 sleep) != DDI_SUCCESS) { 2248 TAVOR_WARNING(state, "failed to deregister " 2249 "memory region"); 2250 } 2251 2252 /* Set "status" and "errormsg" and goto failure */ 2253 TAVOR_TNF_FAIL(status, "failed rereg helper"); 2254 goto mrrereg_fail; 2255 } 2256 vaddr_to_use = mr->mr_bindinfo.bi_addr; 2257 len_to_use = mr->mr_bindinfo.bi_len; 2258 } else { 2259 mtt_addr_to_use = (((uint64_t)mpt_entry.mttseg_addr_h << 32) | 2260 ((uint64_t)mpt_entry.mttseg_addr_l << 6)); 2261 vaddr_to_use = mr->mr_bindinfo.bi_addr; 2262 len_to_use = mr->mr_bindinfo.bi_len; 2263 } 2264 2265 /* 2266 * Calculate new keys (Lkey, Rkey) from MPT index. Just like they were 2267 * when the region was first registered, each key is formed from 2268 * "constrained" bits and "unconstrained" bits. Note: If no remote 2269 * access is required, then the RKey value is not filled in. Otherwise 2270 * both Rkey and LKey are given the same value. 2271 */ 2272 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 2273 if ((acc_flags_to_use & IBT_MR_REMOTE_READ) || 2274 (acc_flags_to_use & IBT_MR_REMOTE_WRITE) || 2275 (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) { 2276 mr->mr_rkey = mr->mr_lkey; 2277 } 2278 2279 /* 2280 * Update the MPT entry with the new information. Some of this 2281 * information is retained from the previous operation, some of 2282 * it is new based on request. 2283 */ 2284 mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND) ? 1 : 0; 2285 mpt_entry.atomic = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 2286 mpt_entry.rw = (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ? 1 : 0; 2287 mpt_entry.rr = (acc_flags_to_use & IBT_MR_REMOTE_READ) ? 1 : 0; 2288 mpt_entry.lw = (acc_flags_to_use & IBT_MR_LOCAL_WRITE) ? 1 : 0; 2289 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 2290 mpt_entry.mem_key = mr->mr_lkey; 2291 mpt_entry.pd = pd_to_use->pd_pdnum; 2292 mpt_entry.start_addr = vaddr_to_use; 2293 mpt_entry.reg_win_len = len_to_use; 2294 mpt_entry.mttseg_addr_h = mtt_addr_to_use >> 32; 2295 mpt_entry.mttseg_addr_l = mtt_addr_to_use >> 6; 2296 2297 /* 2298 * Write the updated MPT entry to hardware 2299 * 2300 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect 2301 * against holding the lock around this rereg call in all contexts. 2302 */ 2303 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 2304 sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN); 2305 if (status != TAVOR_CMD_SUCCESS) { 2306 mutex_exit(&mr->mr_lock); 2307 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 2308 status); 2309 /* 2310 * Call deregister and ensure that all current resources get 2311 * properly freed up. Unnecessary here to attempt to regain 2312 * software ownership of the MPT entry as that has already 2313 * been done above. 2314 */ 2315 if (tavor_mr_deregister(state, &mr, 2316 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) { 2317 TAVOR_WARNING(state, "failed to deregister memory " 2318 "region"); 2319 } 2320 TNF_PROBE_1(tavor_mr_common_rereg_sw2hw_mpt_cmd_fail, 2321 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 2322 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2323 return (ibc_get_ci_failure(0)); 2324 } 2325 2326 /* 2327 * If we're changing PD, then update their reference counts now. 2328 * This means decrementing the reference count on the old PD and 2329 * incrementing the reference count on the new PD. 2330 */ 2331 if (flags & IBT_MR_CHANGE_PD) { 2332 tavor_pd_refcnt_dec(mr->mr_pdhdl); 2333 tavor_pd_refcnt_inc(pd); 2334 } 2335 2336 /* 2337 * Update the contents of the Tavor Memory Region handle to reflect 2338 * what has been changed. 2339 */ 2340 mr->mr_pdhdl = pd_to_use; 2341 mr->mr_accflag = acc_flags_to_use; 2342 mr->mr_is_umem = 0; 2343 mr->mr_is_fmr = 0; 2344 mr->mr_umemcookie = NULL; 2345 2346 /* New MR handle is same as the old */ 2347 *mrhdl_new = mr; 2348 mutex_exit(&mr->mr_lock); 2349 2350 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2351 return (DDI_SUCCESS); 2352 2353 mrrereg_fail: 2354 TNF_PROBE_1(tavor_mr_common_rereg_fail, TAVOR_TNF_ERROR, "", 2355 tnf_string, msg, errormsg); 2356 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2357 return (status); 2358 } 2359 2360 2361 /* 2362 * tavor_mr_rereg_xlat_helper 2363 * Context: Can be called from interrupt or base context. 2364 * Note: This routine expects the "mr_lock" to be held when it 2365 * is called. Upon returning failure, this routine passes information 2366 * about what "dereg_level" should be passed to tavor_mr_deregister(). 2367 */ 2368 static int 2369 tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr, 2370 tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr, 2371 uint_t sleep, uint_t *dereg_level) 2372 { 2373 tavor_rsrc_pool_info_t *rsrc_pool; 2374 tavor_rsrc_t *mtt, *mtt_refcnt; 2375 tavor_sw_refcnt_t *swrc_old, *swrc_new; 2376 ddi_dma_handle_t dmahdl; 2377 uint64_t nummtt_needed, nummtt_in_currrsrc, max_sz; 2378 uint64_t mtt_ddrbaseaddr; 2379 uint_t mtt_pgsize_bits, bind_type, reuse_dmahdl; 2380 int status; 2381 char *errormsg; 2382 2383 TAVOR_TNF_ENTER(tavor_mr_rereg_xlat_helper); 2384 2385 ASSERT(MUTEX_HELD(&mr->mr_lock)); 2386 2387 /* 2388 * Check the "options" flag. Currently this flag tells the driver 2389 * whether or not the region should be bound normally (i.e. with 2390 * entries written into the PCI IOMMU) or whether it should be 2391 * registered to bypass the IOMMU. 2392 */ 2393 if (op == NULL) { 2394 bind_type = TAVOR_BINDMEM_NORMAL; 2395 } else { 2396 bind_type = op->mro_bind_type; 2397 } 2398 2399 /* 2400 * Check for invalid length. Check is the length is zero or if the 2401 * length is larger than the maximum configured value. Return error 2402 * if it is. 2403 */ 2404 max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz); 2405 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 2406 /* 2407 * Deregister will be called upon returning failure from this 2408 * routine. This will ensure that all current resources get 2409 * properly freed up. Unnecessary to attempt to regain 2410 * software ownership of the MPT entry as that has already 2411 * been done above (in tavor_mr_reregister()) 2412 */ 2413 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT; 2414 2415 /* Set "status" and "errormsg" and goto failure */ 2416 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length"); 2417 goto mrrereghelp_fail; 2418 } 2419 2420 /* 2421 * Determine the number of pages necessary for new region and the 2422 * number of pages supported by the current MTT resources 2423 */ 2424 nummtt_needed = tavor_mr_nummtt_needed(state, bind, &mtt_pgsize_bits); 2425 nummtt_in_currrsrc = mr->mr_mttrsrcp->tr_len >> TAVOR_MTT_SIZE_SHIFT; 2426 2427 /* 2428 * Depending on whether we have enough pages or not, the next step is 2429 * to fill in a set of MTT entries that reflect the new mapping. In 2430 * the first case below, we already have enough entries. This means 2431 * we need to unbind the memory from the previous mapping, bind the 2432 * memory for the new mapping, write the new MTT entries, and update 2433 * the mr to reflect the changes. 2434 * In the second case below, we do not have enough entries in the 2435 * current mapping. So, in this case, we need not only to unbind the 2436 * current mapping, but we need to free up the MTT resources associated 2437 * with that mapping. After we've successfully done that, we continue 2438 * by binding the new memory, allocating new MTT entries, writing the 2439 * new MTT entries, and updating the mr to reflect the changes. 2440 */ 2441 2442 /* 2443 * If this region is being shared (i.e. MTT refcount != 1), then we 2444 * can't reuse the current MTT resources regardless of their size. 2445 * Instead we'll need to alloc new ones (below) just as if there 2446 * hadn't been enough room in the current entries. 2447 */ 2448 swrc_old = (tavor_sw_refcnt_t *)mr->mr_mttrefcntp->tr_addr; 2449 if (TAVOR_MTT_IS_NOT_SHARED(swrc_old) && 2450 (nummtt_needed <= nummtt_in_currrsrc)) { 2451 2452 /* 2453 * Unbind the old mapping for this memory region, but retain 2454 * the ddi_dma_handle_t (if possible) for reuse in the bind 2455 * operation below. Note: If original memory region was 2456 * bound for IOMMU bypass and the new region can not use 2457 * bypass, then a new DMA handle will be necessary. 2458 */ 2459 if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2460 mr->mr_bindinfo.bi_free_dmahdl = 0; 2461 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2462 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2463 reuse_dmahdl = 1; 2464 } else { 2465 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2466 dmahdl = NULL; 2467 reuse_dmahdl = 0; 2468 } 2469 2470 /* 2471 * Bind the new memory and determine the mapped addresses. 2472 * As described, this routine and tavor_mr_fast_mtt_write() 2473 * do the majority of the work for the memory registration 2474 * operations. Note: When we successfully finish the binding, 2475 * we will set the "bi_free_dmahdl" flag to indicate that 2476 * even though we may have reused the ddi_dma_handle_t we do 2477 * wish it to be freed up at some later time. Note also that 2478 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2479 */ 2480 bind->bi_bypass = bind_type; 2481 status = tavor_mr_mem_bind(state, bind, dmahdl, sleep); 2482 if (status != DDI_SUCCESS) { 2483 if (reuse_dmahdl) { 2484 ddi_dma_free_handle(&dmahdl); 2485 } 2486 2487 /* 2488 * Deregister will be called upon returning failure 2489 * from this routine. This will ensure that all 2490 * current resources get properly freed up. 2491 * Unnecessary to attempt to regain software ownership 2492 * of the MPT entry as that has already been done 2493 * above (in tavor_mr_reregister()). Also unnecessary 2494 * to attempt to unbind the memory. 2495 */ 2496 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2497 2498 /* Set "status" and "errormsg" and goto failure */ 2499 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind"); 2500 goto mrrereghelp_fail; 2501 } 2502 if (reuse_dmahdl) { 2503 bind->bi_free_dmahdl = 1; 2504 } 2505 2506 /* 2507 * Using the new mapping, but reusing the current MTT 2508 * resources, write the updated entries to MTT 2509 */ 2510 mtt = mr->mr_mttrsrcp; 2511 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits); 2512 if (status != DDI_SUCCESS) { 2513 /* 2514 * Deregister will be called upon returning failure 2515 * from this routine. This will ensure that all 2516 * current resources get properly freed up. 2517 * Unnecessary to attempt to regain software ownership 2518 * of the MPT entry as that has already been done 2519 * above (in tavor_mr_reregister()). Also unnecessary 2520 * to attempt to unbind the memory. 2521 * 2522 * But we do need to unbind the newly bound memory 2523 * before returning. 2524 */ 2525 tavor_mr_mem_unbind(state, bind); 2526 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2527 2528 /* Set "status" and "errormsg" and goto failure */ 2529 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 2530 "failed write mtt"); 2531 goto mrrereghelp_fail; 2532 } 2533 2534 /* Put the updated information into the Mem Region handle */ 2535 mr->mr_bindinfo = *bind; 2536 mr->mr_logmttpgsz = mtt_pgsize_bits; 2537 2538 } else { 2539 /* 2540 * Check if the memory region MTT is shared by any other MRs. 2541 * Since the resource may be shared between multiple memory 2542 * regions (as a result of a "RegisterSharedMR()" verb) it is 2543 * important that we not unbind any resources prematurely. 2544 */ 2545 if (!TAVOR_MTT_IS_SHARED(swrc_old)) { 2546 /* 2547 * Unbind the old mapping for this memory region, but 2548 * retain the ddi_dma_handle_t for reuse in the bind 2549 * operation below. Note: This can only be done here 2550 * because the region being reregistered is not 2551 * currently shared. Also if original memory region 2552 * was bound for IOMMU bypass and the new region can 2553 * not use bypass, then a new DMA handle will be 2554 * necessary. 2555 */ 2556 if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2557 mr->mr_bindinfo.bi_free_dmahdl = 0; 2558 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2559 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2560 reuse_dmahdl = 1; 2561 } else { 2562 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2563 dmahdl = NULL; 2564 reuse_dmahdl = 0; 2565 } 2566 } else { 2567 dmahdl = NULL; 2568 reuse_dmahdl = 0; 2569 } 2570 2571 /* 2572 * Bind the new memory and determine the mapped addresses. 2573 * As described, this routine and tavor_mr_fast_mtt_write() 2574 * do the majority of the work for the memory registration 2575 * operations. Note: When we successfully finish the binding, 2576 * we will set the "bi_free_dmahdl" flag to indicate that 2577 * even though we may have reused the ddi_dma_handle_t we do 2578 * wish it to be freed up at some later time. Note also that 2579 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2580 */ 2581 bind->bi_bypass = bind_type; 2582 status = tavor_mr_mem_bind(state, bind, dmahdl, sleep); 2583 if (status != DDI_SUCCESS) { 2584 if (reuse_dmahdl) { 2585 ddi_dma_free_handle(&dmahdl); 2586 } 2587 2588 /* 2589 * Deregister will be called upon returning failure 2590 * from this routine. This will ensure that all 2591 * current resources get properly freed up. 2592 * Unnecessary to attempt to regain software ownership 2593 * of the MPT entry as that has already been done 2594 * above (in tavor_mr_reregister()). Also unnecessary 2595 * to attempt to unbind the memory. 2596 */ 2597 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2598 2599 /* Set "status" and "errormsg" and goto failure */ 2600 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind"); 2601 goto mrrereghelp_fail; 2602 } 2603 if (reuse_dmahdl) { 2604 bind->bi_free_dmahdl = 1; 2605 } 2606 2607 /* 2608 * Allocate the new MTT entries resource 2609 */ 2610 status = tavor_rsrc_alloc(state, TAVOR_MTT, 2611 TAVOR_NUMMTT_TO_MTTSEG(nummtt_needed), sleep, &mtt); 2612 if (status != DDI_SUCCESS) { 2613 /* 2614 * Deregister will be called upon returning failure 2615 * from this routine. This will ensure that all 2616 * current resources get properly freed up. 2617 * Unnecessary to attempt to regain software ownership 2618 * of the MPT entry as that has already been done 2619 * above (in tavor_mr_reregister()). Also unnecessary 2620 * to attempt to unbind the memory. 2621 * 2622 * But we do need to unbind the newly bound memory 2623 * before returning. 2624 */ 2625 tavor_mr_mem_unbind(state, bind); 2626 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2627 2628 /* Set "status" and "errormsg" and goto failure */ 2629 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT"); 2630 goto mrrereghelp_fail; 2631 } 2632 2633 /* 2634 * Allocate MTT reference count (to track shared memory 2635 * regions). As mentioned elsewhere above, this reference 2636 * count resource may never be used on the given memory region, 2637 * but if it is ever later registered as a "shared" memory 2638 * region then this resource will be necessary. Note: This 2639 * is only necessary here if the existing memory region is 2640 * already being shared (because otherwise we already have 2641 * a useable reference count resource). 2642 */ 2643 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2644 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, 2645 sleep, &mtt_refcnt); 2646 if (status != DDI_SUCCESS) { 2647 /* 2648 * Deregister will be called upon returning 2649 * failure from this routine. This will ensure 2650 * that all current resources get properly 2651 * freed up. Unnecessary to attempt to regain 2652 * software ownership of the MPT entry as that 2653 * has already been done above (in 2654 * tavor_mr_reregister()). Also unnecessary 2655 * to attempt to unbind the memory. 2656 * 2657 * But we need to unbind the newly bound 2658 * memory and free up the newly allocated MTT 2659 * entries before returning. 2660 */ 2661 tavor_mr_mem_unbind(state, bind); 2662 tavor_rsrc_free(state, &mtt); 2663 *dereg_level = 2664 TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2665 2666 /* Set "status"/"errormsg", goto failure */ 2667 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, 2668 "failed reference count"); 2669 goto mrrereghelp_fail; 2670 } 2671 swrc_new = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr; 2672 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new)) 2673 TAVOR_MTT_REFCNT_INIT(swrc_new); 2674 } else { 2675 mtt_refcnt = mr->mr_mttrefcntp; 2676 } 2677 2678 /* 2679 * Using the new mapping and the new MTT resources, write the 2680 * updated entries to MTT 2681 */ 2682 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits); 2683 if (status != DDI_SUCCESS) { 2684 /* 2685 * Deregister will be called upon returning failure 2686 * from this routine. This will ensure that all 2687 * current resources get properly freed up. 2688 * Unnecessary to attempt to regain software ownership 2689 * of the MPT entry as that has already been done 2690 * above (in tavor_mr_reregister()). Also unnecessary 2691 * to attempt to unbind the memory. 2692 * 2693 * But we need to unbind the newly bound memory, 2694 * free up the newly allocated MTT entries, and 2695 * (possibly) free the new MTT reference count 2696 * resource before returning. 2697 */ 2698 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2699 tavor_rsrc_free(state, &mtt_refcnt); 2700 } 2701 tavor_mr_mem_unbind(state, bind); 2702 tavor_rsrc_free(state, &mtt); 2703 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2704 2705 /* Set "status" and "errormsg" and goto failure */ 2706 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed write mtt"); 2707 goto mrrereghelp_fail; 2708 } 2709 2710 /* 2711 * Check if the memory region MTT is shared by any other MRs. 2712 * Since the resource may be shared between multiple memory 2713 * regions (as a result of a "RegisterSharedMR()" verb) it is 2714 * important that we not free up any resources prematurely. 2715 */ 2716 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2717 /* Decrement MTT reference count for "old" region */ 2718 (void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp); 2719 } else { 2720 /* Free up the old MTT entries resource */ 2721 tavor_rsrc_free(state, &mr->mr_mttrsrcp); 2722 } 2723 2724 /* Put the updated information into the mrhdl */ 2725 mr->mr_bindinfo = *bind; 2726 mr->mr_logmttpgsz = mtt_pgsize_bits; 2727 mr->mr_mttrsrcp = mtt; 2728 mr->mr_mttrefcntp = mtt_refcnt; 2729 } 2730 2731 /* 2732 * Calculate and return the updated MTT address (in the DDR address 2733 * space). This will be used by the caller (tavor_mr_reregister) in 2734 * the updated MPT entry 2735 */ 2736 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 2737 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 2738 *mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << 2739 TAVOR_MTT_SIZE_SHIFT); 2740 2741 TAVOR_TNF_EXIT(tavor_mr_rereg_xlat_helper); 2742 return (DDI_SUCCESS); 2743 2744 mrrereghelp_fail: 2745 TNF_PROBE_1(tavor_mr_rereg_xlat_helper_fail, TAVOR_TNF_ERROR, "", 2746 tnf_string, msg, errormsg); 2747 TAVOR_TNF_EXIT(tavor_mr_rereg_xlat_helper); 2748 return (status); 2749 } 2750 2751 2752 /* 2753 * tavor_mr_nummtt_needed() 2754 * Context: Can be called from interrupt or base context. 2755 */ 2756 /* ARGSUSED */ 2757 static uint64_t 2758 tavor_mr_nummtt_needed(tavor_state_t *state, tavor_bind_info_t *bind, 2759 uint_t *mtt_pgsize_bits) 2760 { 2761 uint64_t pg_offset_mask; 2762 uint64_t pg_offset, tmp_length; 2763 2764 /* 2765 * For now we specify the page size as 8Kb (the default page size for 2766 * the sun4u architecture), or 4Kb for x86. Figure out optimal page 2767 * size by examining the dmacookies XXX 2768 */ 2769 *mtt_pgsize_bits = PAGESHIFT; 2770 2771 pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1; 2772 pg_offset = bind->bi_addr & pg_offset_mask; 2773 tmp_length = pg_offset + (bind->bi_len - 1); 2774 return ((tmp_length >> *mtt_pgsize_bits) + 1); 2775 } 2776 2777 2778 /* 2779 * tavor_mr_mem_bind() 2780 * Context: Can be called from interrupt or base context. 2781 */ 2782 static int 2783 tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind, 2784 ddi_dma_handle_t dmahdl, uint_t sleep) 2785 { 2786 ddi_dma_attr_t dma_attr; 2787 int (*callback)(caddr_t); 2788 uint_t dma_xfer_mode; 2789 int status; 2790 2791 /* bi_type must be set to a meaningful value to get a bind handle */ 2792 ASSERT(bind->bi_type == TAVOR_BINDHDL_VADDR || 2793 bind->bi_type == TAVOR_BINDHDL_BUF || 2794 bind->bi_type == TAVOR_BINDHDL_UBUF); 2795 2796 TAVOR_TNF_ENTER(tavor_mr_mem_bind); 2797 2798 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2799 2800 /* Set the callback flag appropriately */ 2801 callback = (sleep == TAVOR_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT; 2802 2803 /* Determine whether to map STREAMING or CONSISTENT */ 2804 dma_xfer_mode = (bind->bi_flags & IBT_MR_NONCOHERENT) ? 2805 DDI_DMA_STREAMING : DDI_DMA_CONSISTENT; 2806 2807 /* 2808 * Initialize many of the default DMA attributes. Then, if we're 2809 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag. 2810 */ 2811 if (dmahdl == NULL) { 2812 tavor_dma_attr_init(&dma_attr); 2813 #ifdef __sparc 2814 /* 2815 * First, disable streaming and switch to consistent if 2816 * configured to do so and IOMMU BYPASS is enabled. 2817 */ 2818 if (state->ts_cfg_profile->cp_disable_streaming_on_bypass && 2819 dma_xfer_mode == DDI_DMA_STREAMING && 2820 bind->bi_bypass == TAVOR_BINDMEM_BYPASS) { 2821 dma_xfer_mode = DDI_DMA_CONSISTENT; 2822 } 2823 2824 /* 2825 * Then, if streaming is still specified, then "bypass" is not 2826 * allowed. 2827 */ 2828 if ((dma_xfer_mode == DDI_DMA_CONSISTENT) && 2829 (bind->bi_bypass == TAVOR_BINDMEM_BYPASS)) { 2830 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2831 } 2832 #endif 2833 /* Allocate a DMA handle for the binding */ 2834 status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr, 2835 callback, NULL, &bind->bi_dmahdl); 2836 if (status != DDI_SUCCESS) { 2837 TNF_PROBE_0(tavor_mr_mem_bind_dmahdl_fail, 2838 TAVOR_TNF_ERROR, ""); 2839 TAVOR_TNF_EXIT(tavor_mr_mem_bind); 2840 return (status); 2841 } 2842 bind->bi_free_dmahdl = 1; 2843 2844 } else { 2845 bind->bi_dmahdl = dmahdl; 2846 bind->bi_free_dmahdl = 0; 2847 } 2848 2849 /* 2850 * Bind the memory to get the PCI mapped addresses. The decision 2851 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle() 2852 * is determined by the "bi_type" flag. Note: if the bind operation 2853 * fails then we have to free up the DMA handle and return error. 2854 */ 2855 if (bind->bi_type == TAVOR_BINDHDL_VADDR) { 2856 status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL, 2857 (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len, 2858 (DDI_DMA_RDWR | dma_xfer_mode), callback, NULL, 2859 &bind->bi_dmacookie, &bind->bi_cookiecnt); 2860 } else { /* TAVOR_BINDHDL_BUF || TAVOR_BINDHDL_UBUF */ 2861 status = ddi_dma_buf_bind_handle(bind->bi_dmahdl, 2862 bind->bi_buf, (DDI_DMA_RDWR | dma_xfer_mode), callback, 2863 NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt); 2864 } 2865 2866 if (status != DDI_DMA_MAPPED) { 2867 if (bind->bi_free_dmahdl != 0) { 2868 ddi_dma_free_handle(&bind->bi_dmahdl); 2869 } 2870 TNF_PROBE_0(tavor_mr_mem_bind_dmabind_fail, TAVOR_TNF_ERROR, 2871 ""); 2872 TAVOR_TNF_EXIT(tavor_mr_mem_bind); 2873 return (status); 2874 } 2875 2876 TAVOR_TNF_EXIT(tavor_mr_mem_bind); 2877 return (DDI_SUCCESS); 2878 } 2879 2880 2881 /* 2882 * tavor_mr_mem_unbind() 2883 * Context: Can be called from interrupt or base context. 2884 */ 2885 static void 2886 tavor_mr_mem_unbind(tavor_state_t *state, tavor_bind_info_t *bind) 2887 { 2888 int status; 2889 2890 TAVOR_TNF_ENTER(tavor_mr_mem_unbind); 2891 2892 /* 2893 * In case of TAVOR_BINDHDL_UBUF, the memory bi_buf points to 2894 * is actually allocated by ddi_umem_iosetup() internally, then 2895 * it's required to free it here. Reset bi_type to TAVOR_BINDHDL_NONE 2896 * not to free it again later. 2897 */ 2898 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2899 if (bind->bi_type == TAVOR_BINDHDL_UBUF) { 2900 freerbuf(bind->bi_buf); 2901 bind->bi_type = TAVOR_BINDHDL_NONE; 2902 } 2903 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 2904 2905 /* 2906 * Unbind the DMA memory for the region 2907 * 2908 * Note: The only way ddi_dma_unbind_handle() currently 2909 * can return an error is if the handle passed in is invalid. 2910 * Since this should never happen, we choose to return void 2911 * from this function! If this does return an error, however, 2912 * then we print a warning message to the console. 2913 */ 2914 status = ddi_dma_unbind_handle(bind->bi_dmahdl); 2915 if (status != DDI_SUCCESS) { 2916 TAVOR_WARNING(state, "failed to unbind DMA mapping"); 2917 TNF_PROBE_0(tavor_mr_mem_unbind_dmaunbind_fail, 2918 TAVOR_TNF_ERROR, ""); 2919 TAVOR_TNF_EXIT(tavor_mr_mem_unbind); 2920 return; 2921 } 2922 2923 /* Free up the DMA handle */ 2924 if (bind->bi_free_dmahdl != 0) { 2925 ddi_dma_free_handle(&bind->bi_dmahdl); 2926 } 2927 2928 TAVOR_TNF_EXIT(tavor_mr_mem_unbind); 2929 } 2930 2931 2932 /* 2933 * tavor_mr_fast_mtt_write() 2934 * Context: Can be called from interrupt or base context. 2935 */ 2936 static int 2937 tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind, 2938 uint32_t mtt_pgsize_bits) 2939 { 2940 ddi_dma_cookie_t dmacookie; 2941 uint_t cookie_cnt; 2942 uint64_t *mtt_table; 2943 uint64_t mtt_entry; 2944 uint64_t addr, endaddr; 2945 uint64_t pagesize; 2946 int i; 2947 2948 TAVOR_TNF_ENTER(tavor_mr_fast_mtt_write); 2949 2950 /* Calculate page size from the suggested value passed in */ 2951 pagesize = ((uint64_t)1 << mtt_pgsize_bits); 2952 2953 /* 2954 * Walk the "cookie list" and fill in the MTT table entries 2955 */ 2956 i = 0; 2957 mtt_table = (uint64_t *)mtt->tr_addr; 2958 dmacookie = bind->bi_dmacookie; 2959 cookie_cnt = bind->bi_cookiecnt; 2960 while (cookie_cnt-- > 0) { 2961 addr = dmacookie.dmac_laddress; 2962 endaddr = addr + (dmacookie.dmac_size - 1); 2963 addr = addr & ~((uint64_t)pagesize - 1); 2964 while (addr <= endaddr) { 2965 /* 2966 * Fill in the mapped addresses (calculated above) and 2967 * set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry. 2968 */ 2969 mtt_entry = addr | TAVOR_MTT_ENTRY_PRESET; 2970 ddi_put64(mtt->tr_acchdl, &mtt_table[i], mtt_entry); 2971 addr += pagesize; 2972 i++; 2973 2974 if (addr == 0) { 2975 static int do_once = 1; 2976 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", 2977 do_once)) 2978 if (do_once) { 2979 do_once = 0; 2980 cmn_err(CE_NOTE, "probable error in " 2981 "dma_cookie address from caller\n"); 2982 } 2983 break; 2984 } 2985 } 2986 2987 /* 2988 * When we've reached the end of the current DMA cookie, 2989 * jump to the next cookie (if there are more) 2990 */ 2991 if (cookie_cnt != 0) { 2992 ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie); 2993 } 2994 } 2995 2996 TAVOR_TNF_EXIT(tavor_mr_fast_mtt_write); 2997 return (DDI_SUCCESS); 2998 } 2999 3000 /* 3001 * tavor_mr_fast_mtt_write_fmr() 3002 * Context: Can be called from interrupt or base context. 3003 */ 3004 static int 3005 tavor_mr_fast_mtt_write_fmr(tavor_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, 3006 uint32_t mtt_pgsize_bits) 3007 { 3008 uint64_t *mtt_table; 3009 ibt_phys_addr_t *buf; 3010 uint64_t mtt_entry; 3011 uint64_t addr, first_addr, endaddr; 3012 uint64_t pagesize; 3013 int i; 3014 3015 TAVOR_TNF_ENTER(tavor_mr_fast_mtt_write_fmr); 3016 3017 /* Calculate page size from the suggested value passed in */ 3018 pagesize = ((uint64_t)1 << mtt_pgsize_bits); 3019 3020 /* 3021 * Walk the "buf list" and fill in the MTT table entries 3022 */ 3023 mtt_table = (uint64_t *)mtt->tr_addr; 3024 for (i = 0; i < mem_pattr->pmr_num_buf; i++) { 3025 buf = &mem_pattr->pmr_addr_list[i]; 3026 3027 /* 3028 * For first cookie, use the offset field to determine where 3029 * the buffer starts. The end addr is then calculated with the 3030 * offset in mind. 3031 */ 3032 if (i == 0) { 3033 first_addr = addr = buf->p_laddr + 3034 mem_pattr->pmr_offset; 3035 endaddr = addr + (mem_pattr->pmr_buf_sz - 1) - 3036 mem_pattr->pmr_offset; 3037 /* 3038 * For last cookie, determine end addr based on starting 3039 * address and size of the total buffer 3040 */ 3041 } else if (i == mem_pattr->pmr_num_buf - 1) { 3042 addr = buf->p_laddr; 3043 endaddr = addr + (first_addr + mem_pattr->pmr_len & 3044 (mem_pattr->pmr_buf_sz - 1)); 3045 /* 3046 * For the middle cookies case, start and end addr are 3047 * straightforward. Just use the laddr, and the size, as all 3048 * middle cookies are a set size. 3049 */ 3050 } else { 3051 addr = buf->p_laddr; 3052 endaddr = addr + (mem_pattr->pmr_buf_sz - 1); 3053 } 3054 3055 addr = addr & ~((uint64_t)pagesize - 1); 3056 while (addr <= endaddr) { 3057 /* 3058 * Fill in the mapped addresses (calculated above) and 3059 * set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry. 3060 */ 3061 mtt_entry = addr | TAVOR_MTT_ENTRY_PRESET; 3062 ddi_put64(mtt->tr_acchdl, &mtt_table[i], mtt_entry); 3063 addr += pagesize; 3064 } 3065 } 3066 3067 TAVOR_TNF_EXIT(tavor_mr_fast_mtt_write_fmr); 3068 return (DDI_SUCCESS); 3069 } 3070 3071 3072 /* 3073 * tavor_mtt_refcnt_inc() 3074 * Context: Can be called from interrupt or base context. 3075 */ 3076 static int 3077 tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc) 3078 { 3079 tavor_sw_refcnt_t *rc; 3080 uint32_t cnt; 3081 3082 rc = (tavor_sw_refcnt_t *)rsrc->tr_addr; 3083 3084 /* Increment the MTT's reference count */ 3085 mutex_enter(&rc->swrc_lock); 3086 TNF_PROBE_1_DEBUG(tavor_mtt_refcnt_inc, TAVOR_TNF_TRACE, "", 3087 tnf_uint, refcnt, rc->swrc_refcnt); 3088 cnt = rc->swrc_refcnt++; 3089 mutex_exit(&rc->swrc_lock); 3090 3091 return (cnt); 3092 } 3093 3094 3095 /* 3096 * tavor_mtt_refcnt_dec() 3097 * Context: Can be called from interrupt or base context. 3098 */ 3099 static int 3100 tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc) 3101 { 3102 tavor_sw_refcnt_t *rc; 3103 uint32_t cnt; 3104 3105 rc = (tavor_sw_refcnt_t *)rsrc->tr_addr; 3106 3107 /* Decrement the MTT's reference count */ 3108 mutex_enter(&rc->swrc_lock); 3109 cnt = --rc->swrc_refcnt; 3110 TNF_PROBE_1_DEBUG(tavor_mtt_refcnt_dec, TAVOR_TNF_TRACE, "", 3111 tnf_uint, refcnt, rc->swrc_refcnt); 3112 mutex_exit(&rc->swrc_lock); 3113 3114 return (cnt); 3115 } 3116