1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * hermon_misc.c 29 * Hermon Miscellaneous routines - Address Handle, Multicast, Protection 30 * Domain, and port-related operations 31 * 32 * Implements all the routines necessary for allocating, freeing, querying 33 * and modifying Address Handles and Protection Domains. Also implements 34 * all the routines necessary for adding and removing Queue Pairs to/from 35 * Multicast Groups. Lastly, it implements the routines necessary for 36 * port-related query and modify operations. 37 */ 38 39 #include <sys/types.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/bitmap.h> 45 #include <sys/sysmacros.h> 46 47 #include <sys/ib/adapters/hermon/hermon.h> 48 49 extern uint32_t hermon_kernel_data_ro; 50 51 /* used for helping uniquify fmr pool taskq name */ 52 static uint_t hermon_debug_fmrpool_cnt = 0x00000000; 53 54 static int hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg, 55 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, uint_t *qp_found); 56 static int hermon_mcg_qplist_remove(hermon_mcghdl_t mcg, 57 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp); 58 static void hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp); 59 static void hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp); 60 static uint_t hermon_mcg_walk_mgid_hash(hermon_state_t *state, 61 uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx); 62 static void hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, 63 hermon_hw_mcg_t *mcg_hdr, ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc); 64 static int hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx, 65 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry); 66 static int hermon_mcg_entry_invalidate(hermon_state_t *state, 67 hermon_hw_mcg_t *mcg_entry, uint_t indx); 68 static int hermon_mgid_is_valid(ib_gid_t gid); 69 static int hermon_mlid_is_valid(ib_lid_t lid); 70 static void hermon_fmr_processing(void *fmr_args); 71 static int hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t pool); 72 static void hermon_fmr_cache_init(hermon_fmrhdl_t fmr); 73 static void hermon_fmr_cache_fini(hermon_fmrhdl_t fmr); 74 static int hermon_fmr_avl_compare(const void *q, const void *e); 75 76 77 #define HERMON_MAX_DBR_PAGES_PER_USER 64 78 #define HERMON_DBR_KEY(index, page) \ 79 (((uint64_t)index) * HERMON_MAX_DBR_PAGES_PER_USER + (page)) 80 81 static hermon_udbr_page_t * 82 hermon_dbr_new_user_page(hermon_state_t *state, uint_t index, 83 uint_t page) 84 { 85 hermon_udbr_page_t *pagep; 86 ddi_dma_attr_t dma_attr; 87 uint_t cookiecnt; 88 int status; 89 hermon_umap_db_entry_t *umapdb; 90 91 pagep = kmem_alloc(sizeof (*pagep), KM_SLEEP); 92 pagep->upg_index = page; 93 pagep->upg_nfree = PAGESIZE / sizeof (hermon_dbr_t); 94 95 /* Allocate 1 bit per dbr for free/alloc management (0 => "free") */ 96 pagep->upg_free = kmem_zalloc(PAGESIZE / sizeof (hermon_dbr_t) / 8, 97 KM_SLEEP); 98 pagep->upg_kvaddr = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP, 99 &pagep->upg_umemcookie); /* not HERMON_PAGESIZE here */ 100 101 pagep->upg_buf = ddi_umem_iosetup(pagep->upg_umemcookie, 0, 102 PAGESIZE, B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 103 104 hermon_dma_attr_init(state, &dma_attr); 105 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 106 DDI_DMA_SLEEP, NULL, &pagep->upg_dmahdl); 107 if (status != DDI_SUCCESS) { 108 IBTF_DPRINTF_L2("hermon", "hermon_new_user_page: " 109 "ddi_dma_buf_bind_handle failed: %d", status); 110 return (NULL); 111 } 112 status = ddi_dma_buf_bind_handle(pagep->upg_dmahdl, 113 pagep->upg_buf, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 114 DDI_DMA_SLEEP, NULL, &pagep->upg_dmacookie, &cookiecnt); 115 if (status != DDI_SUCCESS) { 116 IBTF_DPRINTF_L2("hermon", "hermon_dbr_new_user_page: " 117 "ddi_dma_buf_bind_handle failed: %d", status); 118 ddi_dma_free_handle(&pagep->upg_dmahdl); 119 return (NULL); 120 } 121 ASSERT(cookiecnt == 1); 122 123 /* create db entry for mmap */ 124 umapdb = hermon_umap_db_alloc(state->hs_instance, 125 HERMON_DBR_KEY(index, page), MLNX_UMAP_DBRMEM_RSRC, 126 (uint64_t)(uintptr_t)pagep); 127 hermon_umap_db_add(umapdb); 128 return (pagep); 129 } 130 131 132 /*ARGSUSED*/ 133 static int 134 hermon_user_dbr_alloc(hermon_state_t *state, uint_t index, 135 ddi_acc_handle_t *acchdl, hermon_dbr_t **vdbr, uint64_t *pdbr, 136 uint64_t *mapoffset) 137 { 138 hermon_user_dbr_t *udbr; 139 hermon_udbr_page_t *pagep; 140 uint_t next_page; 141 int dbr_index; 142 int i1, i2, i3, last; 143 uint64_t u64, mask; 144 145 mutex_enter(&state->hs_dbr_lock); 146 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link) 147 if (udbr->udbr_index == index) 148 break; 149 if (udbr == NULL) { 150 udbr = kmem_alloc(sizeof (*udbr), KM_SLEEP); 151 udbr->udbr_link = state->hs_user_dbr; 152 state->hs_user_dbr = udbr; 153 udbr->udbr_index = index; 154 udbr->udbr_pagep = NULL; 155 } 156 pagep = udbr->udbr_pagep; 157 next_page = (pagep == NULL) ? 0 : (pagep->upg_index + 1); 158 while (pagep != NULL) 159 if (pagep->upg_nfree > 0) 160 break; 161 else 162 pagep = pagep->upg_link; 163 if (pagep == NULL) { 164 pagep = hermon_dbr_new_user_page(state, index, next_page); 165 if (pagep == NULL) { 166 mutex_exit(&state->hs_dbr_lock); 167 return (DDI_FAILURE); 168 } 169 pagep->upg_link = udbr->udbr_pagep; 170 udbr->udbr_pagep = pagep; 171 } 172 173 /* Since nfree > 0, we're assured the loops below will succeed */ 174 175 /* First, find a 64-bit (not ~0) that has a free dbr */ 176 last = PAGESIZE / sizeof (uint64_t) / 64; 177 mask = ~0ull; 178 for (i1 = 0; i1 < last; i1++) 179 if ((pagep->upg_free[i1] & mask) != mask) 180 break; 181 u64 = pagep->upg_free[i1]; 182 183 /* Second, find a byte (not 0xff) that has a free dbr */ 184 last = sizeof (uint64_t) / sizeof (uint8_t); 185 for (i2 = 0, mask = 0xff; i2 < last; i2++, mask <<= 8) 186 if ((u64 & mask) != mask) 187 break; 188 189 /* Third, find a bit that is free (0) */ 190 for (i3 = 0; i3 < sizeof (uint64_t) / sizeof (uint8_t); i3++) 191 if ((u64 & (1ul << (i3 + 8 * i2))) == 0) 192 break; 193 194 /* Mark it as allocated */ 195 pagep->upg_free[i1] |= (1ul << (i3 + 8 * i2)); 196 197 dbr_index = ((i1 * sizeof (uint64_t)) + i2) * sizeof (uint64_t) + i3; 198 pagep->upg_nfree--; 199 ((uint64_t *)(void *)pagep->upg_kvaddr)[dbr_index] = 0; /* clear dbr */ 200 *mapoffset = ((HERMON_DBR_KEY(index, pagep->upg_index) << 201 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_DBRMEM_RSRC) << PAGESHIFT; 202 *vdbr = (hermon_dbr_t *)((uint64_t *)(void *)pagep->upg_kvaddr + 203 dbr_index); 204 *pdbr = pagep->upg_dmacookie.dmac_laddress + dbr_index * 205 sizeof (uint64_t); 206 207 mutex_exit(&state->hs_dbr_lock); 208 return (DDI_SUCCESS); 209 } 210 211 static void 212 hermon_user_dbr_free(hermon_state_t *state, uint_t index, hermon_dbr_t *record) 213 { 214 hermon_user_dbr_t *udbr; 215 hermon_udbr_page_t *pagep; 216 caddr_t kvaddr; 217 uint_t dbr_index; 218 uint_t max_free = PAGESIZE / sizeof (hermon_dbr_t); 219 int i1, i2; 220 221 dbr_index = (uintptr_t)record & PAGEOFFSET; /* offset (not yet index) */ 222 kvaddr = (caddr_t)record - dbr_index; 223 dbr_index /= sizeof (hermon_dbr_t); /* now it's the index */ 224 225 mutex_enter(&state->hs_dbr_lock); 226 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link) 227 if (udbr->udbr_index == index) 228 break; 229 if (udbr == NULL) { 230 IBTF_DPRINTF_L2("hermon", "free user dbr: udbr struct not " 231 "found for index %x", index); 232 mutex_exit(&state->hs_dbr_lock); 233 return; 234 } 235 for (pagep = udbr->udbr_pagep; pagep != NULL; pagep = pagep->upg_link) 236 if (pagep->upg_kvaddr == kvaddr) 237 break; 238 if (pagep == NULL) { 239 IBTF_DPRINTF_L2("hermon", "free user dbr: pagep struct not" 240 " found for index %x, kvaddr %p, DBR index %x", 241 index, kvaddr, dbr_index); 242 mutex_exit(&state->hs_dbr_lock); 243 return; 244 } 245 if (pagep->upg_nfree >= max_free) { 246 IBTF_DPRINTF_L2("hermon", "free user dbr: overflow: " 247 "UCE index %x, DBR index %x", index, dbr_index); 248 mutex_exit(&state->hs_dbr_lock); 249 return; 250 } 251 ASSERT(dbr_index < max_free); 252 i1 = dbr_index / 64; 253 i2 = dbr_index % 64; 254 ASSERT((pagep->upg_free[i1] & (1ul << i2)) == (1ul << i2)); 255 pagep->upg_free[i1] &= ~(1ul << i2); 256 pagep->upg_nfree++; 257 mutex_exit(&state->hs_dbr_lock); 258 } 259 260 /* 261 * hermon_dbr_page_alloc() 262 * first page allocation - called from attach or open 263 * in this case, we want exactly one page per call, and aligned on a 264 * page - and may need to be mapped to the user for access 265 */ 266 int 267 hermon_dbr_page_alloc(hermon_state_t *state, hermon_dbr_info_t **dinfo) 268 { 269 int status; 270 ddi_dma_handle_t dma_hdl; 271 ddi_acc_handle_t acc_hdl; 272 ddi_dma_attr_t dma_attr; 273 ddi_dma_cookie_t cookie; 274 uint_t cookie_cnt; 275 int i; 276 hermon_dbr_info_t *info; 277 caddr_t dmaaddr; 278 uint64_t dmalen; 279 280 info = kmem_zalloc(sizeof (hermon_dbr_info_t), KM_SLEEP); 281 282 /* 283 * Initialize many of the default DMA attributes. Then set additional 284 * alignment restrictions if necessary for the dbr memory, meaning 285 * page aligned. Also use the configured value for IOMMU bypass 286 */ 287 hermon_dma_attr_init(state, &dma_attr); 288 dma_attr.dma_attr_align = PAGESIZE; 289 dma_attr.dma_attr_sgllen = 1; /* make sure only one cookie */ 290 291 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 292 DDI_DMA_SLEEP, NULL, &dma_hdl); 293 if (status != DDI_SUCCESS) { 294 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 295 cmn_err(CE_NOTE, "dbr DMA handle alloc failed\n"); 296 return (DDI_FAILURE); 297 } 298 299 status = ddi_dma_mem_alloc(dma_hdl, PAGESIZE, 300 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 301 NULL, &dmaaddr, (size_t *)&dmalen, &acc_hdl); 302 if (status != DDI_SUCCESS) { 303 ddi_dma_free_handle(&dma_hdl); 304 cmn_err(CE_CONT, "dbr DMA mem alloc failed(status %d)", status); 305 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 306 return (DDI_FAILURE); 307 } 308 309 /* this memory won't be IB registered, so do the bind here */ 310 status = ddi_dma_addr_bind_handle(dma_hdl, NULL, 311 dmaaddr, (size_t)dmalen, DDI_DMA_RDWR | 312 DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt); 313 if (status != DDI_SUCCESS) { 314 ddi_dma_mem_free(&acc_hdl); 315 ddi_dma_free_handle(&dma_hdl); 316 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 317 cmn_err(CE_CONT, "dbr DMA bind handle failed (status %d)", 318 status); 319 return (DDI_FAILURE); 320 } 321 *dinfo = info; /* Pass back the pointer */ 322 323 /* init the info structure with returned info */ 324 info->dbr_dmahdl = dma_hdl; 325 info->dbr_acchdl = acc_hdl; 326 info->dbr_page = (hermon_dbr_t *)(void *)dmaaddr; 327 info->dbr_link = NULL; 328 /* extract the phys addr from the cookie */ 329 info->dbr_paddr = cookie.dmac_laddress; 330 info->dbr_firstfree = 0; 331 info->dbr_nfree = HERMON_NUM_DBR_PER_PAGE; 332 /* link all DBrs onto the free list */ 333 for (i = 0; i < HERMON_NUM_DBR_PER_PAGE; i++) { 334 info->dbr_page[i] = i + 1; 335 } 336 337 return (DDI_SUCCESS); 338 } 339 340 341 /* 342 * hermon_dbr_alloc() 343 * DBr record allocation - called from alloc cq/qp/srq 344 * will check for available dbrs in current 345 * page - if needed it will allocate another and link them 346 */ 347 348 int 349 hermon_dbr_alloc(hermon_state_t *state, uint_t index, ddi_acc_handle_t *acchdl, 350 hermon_dbr_t **vdbr, uint64_t *pdbr, uint64_t *mapoffset) 351 { 352 hermon_dbr_t *record = NULL; 353 hermon_dbr_info_t *info = NULL; 354 uint32_t idx; 355 int status; 356 357 if (index != state->hs_kernel_uar_index) 358 return (hermon_user_dbr_alloc(state, index, acchdl, vdbr, pdbr, 359 mapoffset)); 360 361 mutex_enter(&state->hs_dbr_lock); 362 for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link) 363 if (info->dbr_nfree != 0) 364 break; /* found a page w/ one available */ 365 366 if (info == NULL) { /* did NOT find a page with one available */ 367 status = hermon_dbr_page_alloc(state, &info); 368 if (status != DDI_SUCCESS) { 369 /* do error handling */ 370 mutex_exit(&state->hs_dbr_lock); 371 return (DDI_FAILURE); 372 } 373 /* got a new page, so link it in. */ 374 info->dbr_link = state->hs_kern_dbr; 375 state->hs_kern_dbr = info; 376 } 377 idx = info->dbr_firstfree; 378 record = info->dbr_page + idx; 379 info->dbr_firstfree = *record; 380 info->dbr_nfree--; 381 *record = 0; 382 383 *acchdl = info->dbr_acchdl; 384 *vdbr = record; 385 *pdbr = info->dbr_paddr + idx * sizeof (hermon_dbr_t); 386 mutex_exit(&state->hs_dbr_lock); 387 return (DDI_SUCCESS); 388 } 389 390 /* 391 * hermon_dbr_free() 392 * DBr record deallocation - called from free cq/qp 393 * will update the counter in the header, and invalidate 394 * the dbr, but will NEVER free pages of dbrs - small 395 * price to pay, but userland access never will anyway 396 */ 397 void 398 hermon_dbr_free(hermon_state_t *state, uint_t indx, hermon_dbr_t *record) 399 { 400 hermon_dbr_t *page; 401 hermon_dbr_info_t *info; 402 403 if (indx != state->hs_kernel_uar_index) { 404 hermon_user_dbr_free(state, indx, record); 405 return; 406 } 407 page = (hermon_dbr_t *)(uintptr_t)((uintptr_t)record & PAGEMASK); 408 mutex_enter(&state->hs_dbr_lock); 409 for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link) 410 if (info->dbr_page == page) 411 break; 412 ASSERT(info != NULL); 413 *record = info->dbr_firstfree; 414 info->dbr_firstfree = record - info->dbr_page; 415 info->dbr_nfree++; 416 mutex_exit(&state->hs_dbr_lock); 417 } 418 419 /* 420 * hermon_dbr_kern_free() 421 * Context: Can be called only from detach context. 422 * 423 * Free all kernel dbr pages. This includes the freeing of all the dma 424 * resources acquired during the allocation of the pages. 425 * 426 * Also, free all the user dbr pages. 427 */ 428 void 429 hermon_dbr_kern_free(hermon_state_t *state) 430 { 431 hermon_dbr_info_t *info, *link; 432 hermon_user_dbr_t *udbr, *next; 433 hermon_udbr_page_t *pagep, *nextp; 434 hermon_umap_db_entry_t *umapdb; 435 int instance, status; 436 uint64_t value; 437 extern hermon_umap_db_t hermon_userland_rsrc_db; 438 439 mutex_enter(&state->hs_dbr_lock); 440 for (info = state->hs_kern_dbr; info != NULL; info = link) { 441 (void) ddi_dma_unbind_handle(info->dbr_dmahdl); 442 ddi_dma_mem_free(&info->dbr_acchdl); /* free page */ 443 ddi_dma_free_handle(&info->dbr_dmahdl); 444 link = info->dbr_link; 445 kmem_free(info, sizeof (hermon_dbr_info_t)); 446 } 447 448 udbr = state->hs_user_dbr; 449 instance = state->hs_instance; 450 mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock); 451 while (udbr != NULL) { 452 pagep = udbr->udbr_pagep; 453 while (pagep != NULL) { 454 /* probably need to remove "db" */ 455 (void) ddi_dma_unbind_handle(pagep->upg_dmahdl); 456 ddi_dma_free_handle(&pagep->upg_dmahdl); 457 freerbuf(pagep->upg_buf); 458 ddi_umem_free(pagep->upg_umemcookie); 459 status = hermon_umap_db_find_nolock(instance, 460 HERMON_DBR_KEY(udbr->udbr_index, 461 pagep->upg_index), MLNX_UMAP_DBRMEM_RSRC, 462 &value, HERMON_UMAP_DB_REMOVE, &umapdb); 463 if (status == DDI_SUCCESS) 464 hermon_umap_db_free(umapdb); 465 kmem_free(pagep->upg_free, 466 PAGESIZE / sizeof (hermon_dbr_t) / 8); 467 nextp = pagep->upg_link; 468 kmem_free(pagep, sizeof (*pagep)); 469 pagep = nextp; 470 } 471 next = udbr->udbr_link; 472 kmem_free(udbr, sizeof (*udbr)); 473 udbr = next; 474 } 475 mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock); 476 mutex_exit(&state->hs_dbr_lock); 477 } 478 479 /* 480 * hermon_ah_alloc() 481 * Context: Can be called only from user or kernel context. 482 */ 483 int 484 hermon_ah_alloc(hermon_state_t *state, hermon_pdhdl_t pd, 485 ibt_adds_vect_t *attr_p, hermon_ahhdl_t *ahhdl, uint_t sleepflag) 486 { 487 hermon_rsrc_t *rsrc; 488 hermon_hw_udav_t *udav; 489 hermon_ahhdl_t ah; 490 int status; 491 492 /* 493 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to 494 * indicate that we wish to allocate an "invalid" (i.e. empty) 495 * address handle XXX 496 */ 497 498 /* Validate that specified port number is legal */ 499 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) { 500 return (IBT_HCA_PORT_INVALID); 501 } 502 503 /* 504 * Allocate the software structure for tracking the address handle 505 * (i.e. the Hermon Address Handle struct). 506 */ 507 status = hermon_rsrc_alloc(state, HERMON_AHHDL, 1, sleepflag, &rsrc); 508 if (status != DDI_SUCCESS) { 509 return (IBT_INSUFF_RESOURCE); 510 } 511 ah = (hermon_ahhdl_t)rsrc->hr_addr; 512 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 513 514 /* Increment the reference count on the protection domain (PD) */ 515 hermon_pd_refcnt_inc(pd); 516 517 udav = (hermon_hw_udav_t *)kmem_zalloc(sizeof (hermon_hw_udav_t), 518 KM_SLEEP); 519 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav)) 520 521 /* 522 * Fill in the UDAV data. We first zero out the UDAV, then populate 523 * it by then calling hermon_set_addr_path() to fill in the common 524 * portions that can be pulled from the "ibt_adds_vect_t" passed in 525 */ 526 status = hermon_set_addr_path(state, attr_p, 527 (hermon_hw_addr_path_t *)udav, HERMON_ADDRPATH_UDAV); 528 if (status != DDI_SUCCESS) { 529 hermon_pd_refcnt_dec(pd); 530 hermon_rsrc_free(state, &rsrc); 531 return (status); 532 } 533 udav->pd = pd->pd_pdnum; 534 udav->sl = attr_p->av_srvl; 535 536 /* 537 * Fill in the rest of the Hermon Address Handle struct. 538 * 539 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field 540 * here because we may need to return it later to the IBTF (as a 541 * result of a subsequent query operation). Unlike the other UDAV 542 * parameters, the value of "av_dgid.gid_guid" is not always preserved. 543 * The reason for this is described in hermon_set_addr_path(). 544 */ 545 ah->ah_rsrcp = rsrc; 546 ah->ah_pdhdl = pd; 547 ah->ah_udav = udav; 548 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 549 *ahhdl = ah; 550 551 return (DDI_SUCCESS); 552 } 553 554 555 /* 556 * hermon_ah_free() 557 * Context: Can be called only from user or kernel context. 558 */ 559 /* ARGSUSED */ 560 int 561 hermon_ah_free(hermon_state_t *state, hermon_ahhdl_t *ahhdl, uint_t sleepflag) 562 { 563 hermon_rsrc_t *rsrc; 564 hermon_pdhdl_t pd; 565 hermon_ahhdl_t ah; 566 567 /* 568 * Pull all the necessary information from the Hermon Address Handle 569 * struct. This is necessary here because the resource for the 570 * AH is going to be freed up as part of this operation. 571 */ 572 ah = *ahhdl; 573 mutex_enter(&ah->ah_lock); 574 rsrc = ah->ah_rsrcp; 575 pd = ah->ah_pdhdl; 576 mutex_exit(&ah->ah_lock); 577 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 578 579 /* Free the UDAV memory */ 580 kmem_free(ah->ah_udav, sizeof (hermon_hw_udav_t)); 581 582 /* Decrement the reference count on the protection domain (PD) */ 583 hermon_pd_refcnt_dec(pd); 584 585 /* Free the Hermon Address Handle structure */ 586 hermon_rsrc_free(state, &rsrc); 587 588 /* Set the ahhdl pointer to NULL and return success */ 589 *ahhdl = NULL; 590 591 return (DDI_SUCCESS); 592 } 593 594 595 /* 596 * hermon_ah_query() 597 * Context: Can be called from interrupt or base context. 598 */ 599 /* ARGSUSED */ 600 int 601 hermon_ah_query(hermon_state_t *state, hermon_ahhdl_t ah, hermon_pdhdl_t *pd, 602 ibt_adds_vect_t *attr_p) 603 { 604 mutex_enter(&ah->ah_lock); 605 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p)) 606 607 /* 608 * Pull the PD and UDAV from the Hermon Address Handle structure 609 */ 610 *pd = ah->ah_pdhdl; 611 612 /* 613 * Fill in "ibt_adds_vect_t". We call hermon_get_addr_path() to fill 614 * the common portions that can be pulled from the UDAV we pass in. 615 * 616 * NOTE: We will also fill the "av_dgid.gid_guid" field from the 617 * "ah_save_guid" field we have previously saved away. The reason 618 * for this is described in hermon_ah_alloc() and hermon_ah_modify(). 619 */ 620 hermon_get_addr_path(state, (hermon_hw_addr_path_t *)ah->ah_udav, 621 attr_p, HERMON_ADDRPATH_UDAV); 622 623 attr_p->av_dgid.gid_guid = ah->ah_save_guid; 624 625 mutex_exit(&ah->ah_lock); 626 return (DDI_SUCCESS); 627 } 628 629 630 /* 631 * hermon_ah_modify() 632 * Context: Can be called from interrupt or base context. 633 */ 634 /* ARGSUSED */ 635 int 636 hermon_ah_modify(hermon_state_t *state, hermon_ahhdl_t ah, 637 ibt_adds_vect_t *attr_p) 638 { 639 hermon_hw_udav_t old_udav; 640 uint64_t data_old; 641 int status, size, i; 642 643 /* Validate that specified port number is legal */ 644 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) { 645 return (IBT_HCA_PORT_INVALID); 646 } 647 648 mutex_enter(&ah->ah_lock); 649 650 /* Save a copy of the current UDAV data in old_udav. */ 651 bcopy(ah->ah_udav, &old_udav, sizeof (hermon_hw_udav_t)); 652 653 /* 654 * Fill in the new UDAV with the caller's data, passed in via the 655 * "ibt_adds_vect_t" structure. 656 * 657 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid" 658 * field here (just as we did during hermon_ah_alloc()) because we 659 * may need to return it later to the IBTF (as a result of a 660 * subsequent query operation). As explained in hermon_ah_alloc(), 661 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid" 662 * is not always preserved. The reason for this is described in 663 * hermon_set_addr_path(). 664 */ 665 status = hermon_set_addr_path(state, attr_p, 666 (hermon_hw_addr_path_t *)ah->ah_udav, HERMON_ADDRPATH_UDAV); 667 if (status != DDI_SUCCESS) { 668 mutex_exit(&ah->ah_lock); 669 return (status); 670 } 671 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 672 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(ah->ah_udav))) 673 ah->ah_udav->sl = attr_p->av_srvl; 674 675 /* 676 * Copy changes into the new UDAV. 677 * Note: We copy in 64-bit chunks. For the first two of these 678 * chunks it is necessary to read the current contents of the 679 * UDAV, mask off the modifiable portions (maintaining any 680 * of the "reserved" portions), and then mask on the new data. 681 */ 682 size = sizeof (hermon_hw_udav_t) >> 3; 683 for (i = 0; i < size; i++) { 684 data_old = ((uint64_t *)&old_udav)[i]; 685 686 /* 687 * Apply mask to change only the relevant values. 688 */ 689 if (i == 0) { 690 data_old = data_old & HERMON_UDAV_MODIFY_MASK0; 691 } else if (i == 1) { 692 data_old = data_old & HERMON_UDAV_MODIFY_MASK1; 693 } else { 694 data_old = 0; 695 } 696 697 /* Store the updated values to the UDAV */ 698 ((uint64_t *)ah->ah_udav)[i] |= data_old; 699 } 700 701 /* 702 * Put the valid PD number back into the UDAV entry, as it 703 * might have been clobbered above. 704 */ 705 ah->ah_udav->pd = old_udav.pd; 706 707 708 mutex_exit(&ah->ah_lock); 709 return (DDI_SUCCESS); 710 } 711 712 /* 713 * hermon_mcg_attach() 714 * Context: Can be called only from user or kernel context. 715 */ 716 int 717 hermon_mcg_attach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid, 718 ib_lid_t lid) 719 { 720 hermon_rsrc_t *rsrc; 721 hermon_hw_mcg_t *mcg_entry; 722 hermon_hw_mcg_qp_list_t *mcg_entry_qplist; 723 hermon_mcghdl_t mcg, newmcg; 724 uint64_t mgid_hash; 725 uint32_t end_indx; 726 int status; 727 uint_t qp_found; 728 729 /* 730 * It is only allowed to attach MCG to UD queue pairs. Verify 731 * that the intended QP is of the appropriate transport type 732 */ 733 if (qp->qp_serv_type != HERMON_QP_UD) { 734 return (IBT_QP_SRV_TYPE_INVALID); 735 } 736 737 /* 738 * Check for invalid Multicast DLID. Specifically, all Multicast 739 * LIDs should be within a well defined range. If the specified LID 740 * is outside of that range, then return an error. 741 */ 742 if (hermon_mlid_is_valid(lid) == 0) { 743 return (IBT_MC_MLID_INVALID); 744 } 745 /* 746 * Check for invalid Multicast GID. All Multicast GIDs should have 747 * a well-defined pattern of bits and flags that are allowable. If 748 * the specified GID does not meet the criteria, then return an error. 749 */ 750 if (hermon_mgid_is_valid(gid) == 0) { 751 return (IBT_MC_MGID_INVALID); 752 } 753 754 /* 755 * Compute the MGID hash value. Since the MCG table is arranged as 756 * a number of separate hash chains, this operation converts the 757 * specified MGID into the starting index of an entry in the hash 758 * table (i.e. the index for the start of the appropriate hash chain). 759 * Subsequent operations below will walk the chain searching for the 760 * right place to add this new QP. 761 */ 762 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 763 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT()); 764 if (status != HERMON_CMD_SUCCESS) { 765 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n", 766 status); 767 if (status == HERMON_CMD_INVALID_STATUS) { 768 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 769 } 770 return (ibc_get_ci_failure(0)); 771 } 772 773 /* 774 * Grab the multicast group mutex. Then grab the pre-allocated 775 * temporary buffer used for holding and/or modifying MCG entries. 776 * Zero out the temporary MCG entry before we begin. 777 */ 778 mutex_enter(&state->hs_mcglock); 779 mcg_entry = state->hs_mcgtmp; 780 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry); 781 bzero(mcg_entry, HERMON_MCGMEM_SZ(state)); 782 783 /* 784 * Walk through the array of MCG entries starting at "mgid_hash". 785 * Try to find the appropriate place for this new QP to be added. 786 * This could happen when the first entry of the chain has MGID == 0 787 * (which means that the hash chain is empty), or because we find 788 * an entry with the same MGID (in which case we'll add the QP to 789 * that MCG), or because we come to the end of the chain (in which 790 * case this is the first QP being added to the multicast group that 791 * corresponds to the MGID. The hermon_mcg_walk_mgid_hash() routine 792 * walks the list and returns an index into the MCG table. The entry 793 * at this index is then checked to determine which case we have 794 * fallen into (see below). Note: We are using the "shadow" MCG 795 * list (of hermon_mcg_t structs) for this lookup because the real 796 * MCG entries are in hardware (and the lookup process would be much 797 * more time consuming). 798 */ 799 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL); 800 mcg = &state->hs_mcghdl[end_indx]; 801 802 /* 803 * If MGID == 0, then the hash chain is empty. Just fill in the 804 * current entry. Note: No need to allocate an MCG table entry 805 * as all the hash chain "heads" are already preallocated. 806 */ 807 if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) { 808 809 /* Fill in the current entry in the "shadow" MCG list */ 810 hermon_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL); 811 812 /* 813 * Try to add the new QP number to the list. This (and the 814 * above) routine fills in a temporary MCG. The "mcg_entry" 815 * and "mcg_entry_qplist" pointers simply point to different 816 * offsets within the same temporary copy of the MCG (for 817 * convenience). Note: If this fails, we need to invalidate 818 * the entries we've already put into the "shadow" list entry 819 * above. 820 */ 821 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 822 &qp_found); 823 if (status != DDI_SUCCESS) { 824 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s)); 825 mutex_exit(&state->hs_mcglock); 826 return (status); 827 } 828 if (!qp_found) 829 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1); 830 /* set the member count */ 831 832 /* 833 * Once the temporary MCG has been filled in, write the entry 834 * into the appropriate location in the Hermon MCG entry table. 835 * If it's successful, then drop the lock and return success. 836 * Note: In general, this operation shouldn't fail. If it 837 * does, then it is an indication that something (probably in 838 * HW, but maybe in SW) has gone seriously wrong. We still 839 * want to zero out the entries that we've filled in above 840 * (in the hermon_mcg_setup_new_hdr() routine). 841 */ 842 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 843 HERMON_CMD_NOSLEEP_SPIN); 844 if (status != HERMON_CMD_SUCCESS) { 845 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s)); 846 mutex_exit(&state->hs_mcglock); 847 HERMON_WARNING(state, "failed to write MCG entry"); 848 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 849 "%08x\n", status); 850 if (status == HERMON_CMD_INVALID_STATUS) { 851 hermon_fm_ereport(state, HCA_SYS_ERR, 852 HCA_ERR_SRV_LOST); 853 } 854 return (ibc_get_ci_failure(0)); 855 } 856 857 /* 858 * Now that we know all the Hermon firmware accesses have been 859 * successful, we update the "shadow" MCG entry by incrementing 860 * the "number of attached QPs" count. 861 * 862 * We increment only if the QP is not already part of the 863 * MCG by checking the 'qp_found' flag returned from the 864 * qplist_add above. 865 */ 866 if (!qp_found) { 867 mcg->mcg_num_qps++; 868 869 /* 870 * Increment the refcnt for this QP. Because the QP 871 * was added to this MCG, the refcnt must be 872 * incremented. 873 */ 874 hermon_qp_mcg_refcnt_inc(qp); 875 } 876 877 /* 878 * We drop the lock and return success. 879 */ 880 mutex_exit(&state->hs_mcglock); 881 return (DDI_SUCCESS); 882 } 883 884 /* 885 * If the specified MGID matches the MGID in the current entry, then 886 * we need to try to add the QP to the current MCG entry. In this 887 * case, it means that we need to read the existing MCG entry (into 888 * the temporary MCG), add the new QP number to the temporary entry 889 * (using the same method we used above), and write the entry back 890 * to the hardware (same as above). 891 */ 892 if ((mcg->mcg_mgid_h == gid.gid_prefix) && 893 (mcg->mcg_mgid_l == gid.gid_guid)) { 894 895 /* 896 * Read the current MCG entry into the temporary MCG. Note: 897 * In general, this operation shouldn't fail. If it does, 898 * then it is an indication that something (probably in HW, 899 * but maybe in SW) has gone seriously wrong. 900 */ 901 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 902 HERMON_CMD_NOSLEEP_SPIN); 903 if (status != HERMON_CMD_SUCCESS) { 904 mutex_exit(&state->hs_mcglock); 905 HERMON_WARNING(state, "failed to read MCG entry"); 906 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: " 907 "%08x\n", status); 908 if (status == HERMON_CMD_INVALID_STATUS) { 909 hermon_fm_ereport(state, HCA_SYS_ERR, 910 HCA_ERR_SRV_LOST); 911 } 912 return (ibc_get_ci_failure(0)); 913 } 914 915 /* 916 * Try to add the new QP number to the list. This routine 917 * fills in the necessary pieces of the temporary MCG. The 918 * "mcg_entry_qplist" pointer is used to point to the portion 919 * of the temporary MCG that holds the QP numbers. 920 * 921 * Note: hermon_mcg_qplist_add() returns SUCCESS if it 922 * already found the QP in the list. In this case, the QP is 923 * not added on to the list again. Check the flag 'qp_found' 924 * if this value is needed to be known. 925 * 926 */ 927 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 928 &qp_found); 929 if (status != DDI_SUCCESS) { 930 mutex_exit(&state->hs_mcglock); 931 return (status); 932 } 933 if (!qp_found) 934 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1); 935 /* set the member count */ 936 937 /* 938 * Once the temporary MCG has been updated, write the entry 939 * into the appropriate location in the Hermon MCG entry table. 940 * If it's successful, then drop the lock and return success. 941 * Note: In general, this operation shouldn't fail. If it 942 * does, then it is an indication that something (probably in 943 * HW, but maybe in SW) has gone seriously wrong. 944 */ 945 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 946 HERMON_CMD_NOSLEEP_SPIN); 947 if (status != HERMON_CMD_SUCCESS) { 948 mutex_exit(&state->hs_mcglock); 949 HERMON_WARNING(state, "failed to write MCG entry"); 950 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 951 "%08x\n", status); 952 if (status == HERMON_CMD_INVALID_STATUS) { 953 hermon_fm_ereport(state, HCA_SYS_ERR, 954 HCA_ERR_SRV_LOST); 955 } 956 return (ibc_get_ci_failure(0)); 957 } 958 959 /* 960 * Now that we know all the Hermon firmware accesses have been 961 * successful, we update the current "shadow" MCG entry by 962 * incrementing the "number of attached QPs" count. 963 * 964 * We increment only if the QP is not already part of the 965 * MCG by checking the 'qp_found' flag returned 966 * hermon_mcg_walk_mgid_hashfrom the qplist_add above. 967 */ 968 if (!qp_found) { 969 mcg->mcg_num_qps++; 970 971 /* 972 * Increment the refcnt for this QP. Because the QP 973 * was added to this MCG, the refcnt must be 974 * incremented. 975 */ 976 hermon_qp_mcg_refcnt_inc(qp); 977 } 978 979 /* 980 * We drop the lock and return success. 981 */ 982 mutex_exit(&state->hs_mcglock); 983 return (DDI_SUCCESS); 984 } 985 986 /* 987 * If we've reached here, then we're at the end of the hash chain. 988 * We need to allocate a new MCG entry, fill it in, write it to Hermon, 989 * and update the previous entry to link the new one to the end of the 990 * chain. 991 */ 992 993 /* 994 * Allocate an MCG table entry. This will be filled in with all 995 * the necessary parameters to define the multicast group. Then it 996 * will be written to the hardware in the next-to-last step below. 997 */ 998 status = hermon_rsrc_alloc(state, HERMON_MCG, 1, HERMON_NOSLEEP, &rsrc); 999 if (status != DDI_SUCCESS) { 1000 mutex_exit(&state->hs_mcglock); 1001 return (IBT_INSUFF_RESOURCE); 1002 } 1003 1004 /* 1005 * Fill in the new entry in the "shadow" MCG list. Note: Just as 1006 * it does above, hermon_mcg_setup_new_hdr() also fills in a portion 1007 * of the temporary MCG entry (the rest of which will be filled in by 1008 * hermon_mcg_qplist_add() below) 1009 */ 1010 newmcg = &state->hs_mcghdl[rsrc->hr_indx]; 1011 hermon_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc); 1012 1013 /* 1014 * Try to add the new QP number to the list. This routine fills in 1015 * the final necessary pieces of the temporary MCG. The 1016 * "mcg_entry_qplist" pointer is used to point to the portion of the 1017 * temporary MCG that holds the QP numbers. If we fail here, we 1018 * must undo the previous resource allocation. 1019 * 1020 * Note: hermon_mcg_qplist_add() can we return SUCCESS if it already 1021 * found the QP in the list. In this case, the QP is not added on to 1022 * the list again. Check the flag 'qp_found' if this value is needed 1023 * to be known. 1024 */ 1025 status = hermon_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp, 1026 &qp_found); 1027 if (status != DDI_SUCCESS) { 1028 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1029 hermon_rsrc_free(state, &rsrc); 1030 mutex_exit(&state->hs_mcglock); 1031 return (status); 1032 } 1033 mcg_entry->member_cnt = (newmcg->mcg_num_qps + 1); 1034 /* set the member count */ 1035 1036 /* 1037 * Once the temporary MCG has been updated, write the entry into the 1038 * appropriate location in the Hermon MCG entry table. If this is 1039 * successful, then we need to chain the previous entry to this one. 1040 * Note: In general, this operation shouldn't fail. If it does, then 1041 * it is an indication that something (probably in HW, but maybe in 1042 * SW) has gone seriously wrong. 1043 */ 1044 status = hermon_write_mgm_cmd_post(state, mcg_entry, rsrc->hr_indx, 1045 HERMON_CMD_NOSLEEP_SPIN); 1046 if (status != HERMON_CMD_SUCCESS) { 1047 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1048 hermon_rsrc_free(state, &rsrc); 1049 mutex_exit(&state->hs_mcglock); 1050 HERMON_WARNING(state, "failed to write MCG entry"); 1051 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1052 status); 1053 if (status == HERMON_CMD_INVALID_STATUS) { 1054 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1055 } 1056 return (ibc_get_ci_failure(0)); 1057 } 1058 1059 /* 1060 * Now read the current MCG entry (the one previously at the end of 1061 * hash chain) into the temporary MCG. We are going to update its 1062 * "next_gid_indx" now and write the entry back to the MCG table. 1063 * Note: In general, this operation shouldn't fail. If it does, then 1064 * it is an indication that something (probably in HW, but maybe in SW) 1065 * has gone seriously wrong. We will free up the MCG entry resource, 1066 * but we will not undo the previously written MCG entry in the HW. 1067 * This is OK, though, because the MCG entry is not currently attached 1068 * to any hash chain. 1069 */ 1070 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 1071 HERMON_CMD_NOSLEEP_SPIN); 1072 if (status != HERMON_CMD_SUCCESS) { 1073 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1074 hermon_rsrc_free(state, &rsrc); 1075 mutex_exit(&state->hs_mcglock); 1076 HERMON_WARNING(state, "failed to read MCG entry"); 1077 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1078 status); 1079 if (status == HERMON_CMD_INVALID_STATUS) { 1080 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1081 } 1082 return (ibc_get_ci_failure(0)); 1083 } 1084 1085 /* 1086 * Finally, we update the "next_gid_indx" field in the temporary MCG 1087 * and attempt to write the entry back into the Hermon MCG table. If 1088 * this succeeds, then we update the "shadow" list to reflect the 1089 * change, drop the lock, and return success. Note: In general, this 1090 * operation shouldn't fail. If it does, then it is an indication 1091 * that something (probably in HW, but maybe in SW) has gone seriously 1092 * wrong. Just as we do above, we will free up the MCG entry resource, 1093 * but we will not try to undo the previously written MCG entry. This 1094 * is OK, though, because (since we failed here to update the end of 1095 * the chain) that other entry is not currently attached to any chain. 1096 */ 1097 mcg_entry->next_gid_indx = rsrc->hr_indx; 1098 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 1099 HERMON_CMD_NOSLEEP_SPIN); 1100 if (status != HERMON_CMD_SUCCESS) { 1101 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1102 hermon_rsrc_free(state, &rsrc); 1103 mutex_exit(&state->hs_mcglock); 1104 HERMON_WARNING(state, "failed to write MCG entry"); 1105 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1106 status); 1107 if (status == HERMON_CMD_INVALID_STATUS) { 1108 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1109 } 1110 return (ibc_get_ci_failure(0)); 1111 } 1112 mcg = &state->hs_mcghdl[end_indx]; 1113 mcg->mcg_next_indx = rsrc->hr_indx; 1114 1115 /* 1116 * Now that we know all the Hermon firmware accesses have been 1117 * successful, we update the new "shadow" MCG entry by incrementing 1118 * the "number of attached QPs" count. Then we drop the lock and 1119 * return success. 1120 */ 1121 newmcg->mcg_num_qps++; 1122 1123 /* 1124 * Increment the refcnt for this QP. Because the QP 1125 * was added to this MCG, the refcnt must be 1126 * incremented. 1127 */ 1128 hermon_qp_mcg_refcnt_inc(qp); 1129 1130 mutex_exit(&state->hs_mcglock); 1131 return (DDI_SUCCESS); 1132 } 1133 1134 1135 /* 1136 * hermon_mcg_detach() 1137 * Context: Can be called only from user or kernel context. 1138 */ 1139 int 1140 hermon_mcg_detach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid, 1141 ib_lid_t lid) 1142 { 1143 hermon_hw_mcg_t *mcg_entry; 1144 hermon_hw_mcg_qp_list_t *mcg_entry_qplist; 1145 hermon_mcghdl_t mcg; 1146 uint64_t mgid_hash; 1147 uint32_t end_indx, prev_indx; 1148 int status; 1149 1150 /* 1151 * Check for invalid Multicast DLID. Specifically, all Multicast 1152 * LIDs should be within a well defined range. If the specified LID 1153 * is outside of that range, then return an error. 1154 */ 1155 if (hermon_mlid_is_valid(lid) == 0) { 1156 return (IBT_MC_MLID_INVALID); 1157 } 1158 1159 /* 1160 * Compute the MGID hash value. As described above, the MCG table is 1161 * arranged as a number of separate hash chains. This operation 1162 * converts the specified MGID into the starting index of an entry in 1163 * the hash table (i.e. the index for the start of the appropriate 1164 * hash chain). Subsequent operations below will walk the chain 1165 * searching for a matching entry from which to attempt to remove 1166 * the specified QP. 1167 */ 1168 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 1169 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT()); 1170 if (status != HERMON_CMD_SUCCESS) { 1171 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n", 1172 status); 1173 if (status == HERMON_CMD_INVALID_STATUS) { 1174 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1175 } 1176 return (ibc_get_ci_failure(0)); 1177 } 1178 1179 /* 1180 * Grab the multicast group mutex. Then grab the pre-allocated 1181 * temporary buffer used for holding and/or modifying MCG entries. 1182 */ 1183 mutex_enter(&state->hs_mcglock); 1184 mcg_entry = state->hs_mcgtmp; 1185 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry); 1186 1187 /* 1188 * Walk through the array of MCG entries starting at "mgid_hash". 1189 * Try to find an MCG entry with a matching MGID. The 1190 * hermon_mcg_walk_mgid_hash() routine walks the list and returns an 1191 * index into the MCG table. The entry at this index is checked to 1192 * determine whether it is a match or not. If it is a match, then 1193 * we continue on to attempt to remove the QP from the MCG. If it 1194 * is not a match (or not a valid MCG entry), then we return an error. 1195 */ 1196 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx); 1197 mcg = &state->hs_mcghdl[end_indx]; 1198 1199 /* 1200 * If MGID == 0 (the hash chain is empty) or if the specified MGID 1201 * does not match the MGID in the current entry, then return 1202 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not 1203 * valid). 1204 */ 1205 if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) || 1206 ((mcg->mcg_mgid_h != gid.gid_prefix) || 1207 (mcg->mcg_mgid_l != gid.gid_guid))) { 1208 mutex_exit(&state->hs_mcglock); 1209 return (IBT_MC_MGID_INVALID); 1210 } 1211 1212 /* 1213 * Read the current MCG entry into the temporary MCG. Note: In 1214 * general, this operation shouldn't fail. If it does, then it is 1215 * an indication that something (probably in HW, but maybe in SW) 1216 * has gone seriously wrong. 1217 */ 1218 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 1219 HERMON_CMD_NOSLEEP_SPIN); 1220 if (status != HERMON_CMD_SUCCESS) { 1221 mutex_exit(&state->hs_mcglock); 1222 HERMON_WARNING(state, "failed to read MCG entry"); 1223 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1224 status); 1225 if (status == HERMON_CMD_INVALID_STATUS) { 1226 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1227 } 1228 return (ibc_get_ci_failure(0)); 1229 } 1230 1231 /* 1232 * Search the QP number list for a match. If a match is found, then 1233 * remove the entry from the QP list. Otherwise, if no match is found, 1234 * return an error. 1235 */ 1236 status = hermon_mcg_qplist_remove(mcg, mcg_entry_qplist, qp); 1237 if (status != DDI_SUCCESS) { 1238 mutex_exit(&state->hs_mcglock); 1239 return (status); 1240 } 1241 1242 /* 1243 * Decrement the MCG count for this QP. When the 'qp_mcg' 1244 * field becomes 0, then this QP is no longer a member of any 1245 * MCG. 1246 */ 1247 hermon_qp_mcg_refcnt_dec(qp); 1248 1249 /* 1250 * If the current MCG's QP number list is about to be made empty 1251 * ("mcg_num_qps" == 1), then remove the entry itself from the hash 1252 * chain. Otherwise, just write the updated MCG entry back to the 1253 * hardware. In either case, once we successfully update the hardware 1254 * chain, then we decrement the "shadow" list entry's "mcg_num_qps" 1255 * count (or zero out the entire "shadow" list entry) before returning 1256 * success. Note: Zeroing out the "shadow" list entry is done 1257 * inside of hermon_mcg_hash_list_remove(). 1258 */ 1259 if (mcg->mcg_num_qps == 1) { 1260 1261 /* Remove an MCG entry from the hash chain */ 1262 status = hermon_mcg_hash_list_remove(state, end_indx, prev_indx, 1263 mcg_entry); 1264 if (status != DDI_SUCCESS) { 1265 mutex_exit(&state->hs_mcglock); 1266 return (status); 1267 } 1268 1269 } else { 1270 /* 1271 * Write the updated MCG entry back to the Hermon MCG table. 1272 * If this succeeds, then we update the "shadow" list to 1273 * reflect the change (i.e. decrement the "mcg_num_qps"), 1274 * drop the lock, and return success. Note: In general, 1275 * this operation shouldn't fail. If it does, then it is an 1276 * indication that something (probably in HW, but maybe in SW) 1277 * has gone seriously wrong. 1278 */ 1279 mcg_entry->member_cnt = (mcg->mcg_num_qps - 1); 1280 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 1281 HERMON_CMD_NOSLEEP_SPIN); 1282 if (status != HERMON_CMD_SUCCESS) { 1283 mutex_exit(&state->hs_mcglock); 1284 HERMON_WARNING(state, "failed to write MCG entry"); 1285 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 1286 "%08x\n", status); 1287 if (status == HERMON_CMD_INVALID_STATUS) { 1288 hermon_fm_ereport(state, HCA_SYS_ERR, 1289 HCA_ERR_SRV_LOST); 1290 } 1291 return (ibc_get_ci_failure(0)); 1292 } 1293 mcg->mcg_num_qps--; 1294 } 1295 1296 mutex_exit(&state->hs_mcglock); 1297 return (DDI_SUCCESS); 1298 } 1299 1300 /* 1301 * hermon_qp_mcg_refcnt_inc() 1302 * Context: Can be called from interrupt or base context. 1303 */ 1304 static void 1305 hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp) 1306 { 1307 /* Increment the QP's MCG reference count */ 1308 mutex_enter(&qp->qp_lock); 1309 qp->qp_mcg_refcnt++; 1310 mutex_exit(&qp->qp_lock); 1311 } 1312 1313 1314 /* 1315 * hermon_qp_mcg_refcnt_dec() 1316 * Context: Can be called from interrupt or base context. 1317 */ 1318 static void 1319 hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp) 1320 { 1321 /* Decrement the QP's MCG reference count */ 1322 mutex_enter(&qp->qp_lock); 1323 qp->qp_mcg_refcnt--; 1324 mutex_exit(&qp->qp_lock); 1325 } 1326 1327 1328 /* 1329 * hermon_mcg_qplist_add() 1330 * Context: Can be called from interrupt or base context. 1331 */ 1332 static int 1333 hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg, 1334 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, 1335 uint_t *qp_found) 1336 { 1337 uint_t qplist_indx; 1338 1339 ASSERT(MUTEX_HELD(&state->hs_mcglock)); 1340 1341 qplist_indx = mcg->mcg_num_qps; 1342 1343 /* 1344 * Determine if we have exceeded the maximum number of QP per 1345 * multicast group. If we have, then return an error 1346 */ 1347 if (qplist_indx >= state->hs_cfg_profile->cp_num_qp_per_mcg) { 1348 return (IBT_HCA_MCG_QP_EXCEEDED); 1349 } 1350 1351 /* 1352 * Determine if the QP is already attached to this MCG table. If it 1353 * is, then we break out and treat this operation as a NO-OP 1354 */ 1355 for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps; 1356 qplist_indx++) { 1357 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) { 1358 break; 1359 } 1360 } 1361 1362 /* 1363 * If the QP was already on the list, set 'qp_found' to TRUE. We still 1364 * return SUCCESS in this case, but the qplist will not have been 1365 * updated because the QP was already on the list. 1366 */ 1367 if (qplist_indx < mcg->mcg_num_qps) { 1368 *qp_found = 1; 1369 } else { 1370 /* 1371 * Otherwise, append the new QP number to the end of the 1372 * current QP list. Note: We will increment the "mcg_num_qps" 1373 * field on the "shadow" MCG list entry later (after we know 1374 * that all necessary Hermon firmware accesses have been 1375 * successful). 1376 * 1377 * Set 'qp_found' to 0 so we know the QP was added on to the 1378 * list for sure. 1379 */ 1380 mcg_qplist[qplist_indx].qpn = 1381 (qp->qp_qpnum | HERMON_MCG_QPN_BLOCK_LB); 1382 *qp_found = 0; 1383 } 1384 1385 return (DDI_SUCCESS); 1386 } 1387 1388 1389 1390 /* 1391 * hermon_mcg_qplist_remove() 1392 * Context: Can be called from interrupt or base context. 1393 */ 1394 static int 1395 hermon_mcg_qplist_remove(hermon_mcghdl_t mcg, 1396 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp) 1397 { 1398 uint_t i, qplist_indx; 1399 1400 /* 1401 * Search the MCG QP list for a matching QPN. When 1402 * it's found, we swap the last entry with the current 1403 * one, set the last entry to zero, decrement the last 1404 * entry, and return. If it's not found, then it's 1405 * and error. 1406 */ 1407 qplist_indx = mcg->mcg_num_qps; 1408 for (i = 0; i < qplist_indx; i++) { 1409 if (mcg_qplist[i].qpn == qp->qp_qpnum) { 1410 mcg_qplist[i] = mcg_qplist[qplist_indx - 1]; 1411 mcg_qplist[qplist_indx - 1].qpn = 0; 1412 1413 return (DDI_SUCCESS); 1414 } 1415 } 1416 1417 return (IBT_QP_HDL_INVALID); 1418 } 1419 1420 1421 /* 1422 * hermon_mcg_walk_mgid_hash() 1423 * Context: Can be called from interrupt or base context. 1424 */ 1425 static uint_t 1426 hermon_mcg_walk_mgid_hash(hermon_state_t *state, uint64_t start_indx, 1427 ib_gid_t mgid, uint_t *p_indx) 1428 { 1429 hermon_mcghdl_t curr_mcghdl; 1430 uint_t curr_indx, prev_indx; 1431 1432 ASSERT(MUTEX_HELD(&state->hs_mcglock)); 1433 1434 /* Start at the head of the hash chain */ 1435 curr_indx = (uint_t)start_indx; 1436 prev_indx = curr_indx; 1437 curr_mcghdl = &state->hs_mcghdl[curr_indx]; 1438 1439 /* If the first entry in the chain has MGID == 0, then stop */ 1440 if ((curr_mcghdl->mcg_mgid_h == 0) && 1441 (curr_mcghdl->mcg_mgid_l == 0)) { 1442 goto end_mgid_hash_walk; 1443 } 1444 1445 /* If the first entry in the chain matches the MGID, then stop */ 1446 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1447 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1448 goto end_mgid_hash_walk; 1449 } 1450 1451 /* Otherwise, walk the hash chain looking for a match */ 1452 while (curr_mcghdl->mcg_next_indx != 0) { 1453 prev_indx = curr_indx; 1454 curr_indx = curr_mcghdl->mcg_next_indx; 1455 curr_mcghdl = &state->hs_mcghdl[curr_indx]; 1456 1457 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1458 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1459 break; 1460 } 1461 } 1462 1463 end_mgid_hash_walk: 1464 /* 1465 * If necessary, return the index of the previous entry too. This 1466 * is primarily used for detaching a QP from a multicast group. It 1467 * may be necessary, in that case, to delete an MCG entry from the 1468 * hash chain and having the index of the previous entry is helpful. 1469 */ 1470 if (p_indx != NULL) { 1471 *p_indx = prev_indx; 1472 } 1473 return (curr_indx); 1474 } 1475 1476 1477 /* 1478 * hermon_mcg_setup_new_hdr() 1479 * Context: Can be called from interrupt or base context. 1480 */ 1481 static void 1482 hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, hermon_hw_mcg_t *mcg_hdr, 1483 ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc) 1484 { 1485 /* 1486 * Fill in the fields of the "shadow" entry used by software 1487 * to track MCG hardware entry 1488 */ 1489 mcg->mcg_mgid_h = mgid.gid_prefix; 1490 mcg->mcg_mgid_l = mgid.gid_guid; 1491 mcg->mcg_rsrcp = mcg_rsrc; 1492 mcg->mcg_next_indx = 0; 1493 mcg->mcg_num_qps = 0; 1494 1495 /* 1496 * Fill the header fields of the MCG entry (in the temporary copy) 1497 */ 1498 mcg_hdr->mgid_h = mgid.gid_prefix; 1499 mcg_hdr->mgid_l = mgid.gid_guid; 1500 mcg_hdr->next_gid_indx = 0; 1501 } 1502 1503 1504 /* 1505 * hermon_mcg_hash_list_remove() 1506 * Context: Can be called only from user or kernel context. 1507 */ 1508 static int 1509 hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx, 1510 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry) 1511 { 1512 hermon_mcghdl_t curr_mcg, prev_mcg, next_mcg; 1513 uint_t next_indx; 1514 int status; 1515 1516 /* Get the pointer to "shadow" list for current entry */ 1517 curr_mcg = &state->hs_mcghdl[curr_indx]; 1518 1519 /* 1520 * If this is the first entry on a hash chain, then attempt to replace 1521 * the entry with the next entry on the chain. If there are no 1522 * subsequent entries on the chain, then this is the only entry and 1523 * should be invalidated. 1524 */ 1525 if (curr_indx == prev_indx) { 1526 1527 /* 1528 * If this is the only entry on the chain, then invalidate it. 1529 * Note: Invalidating an MCG entry means writing all zeros 1530 * to the entry. This is only necessary for those MCG 1531 * entries that are the "head" entries of the individual hash 1532 * chains. Regardless of whether this operation returns 1533 * success or failure, return that result to the caller. 1534 */ 1535 next_indx = curr_mcg->mcg_next_indx; 1536 if (next_indx == 0) { 1537 status = hermon_mcg_entry_invalidate(state, mcg_entry, 1538 curr_indx); 1539 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1540 return (status); 1541 } 1542 1543 /* 1544 * Otherwise, this is just the first entry on the chain, so 1545 * grab the next one 1546 */ 1547 next_mcg = &state->hs_mcghdl[next_indx]; 1548 1549 /* 1550 * Read the next MCG entry into the temporary MCG. Note: 1551 * In general, this operation shouldn't fail. If it does, 1552 * then it is an indication that something (probably in HW, 1553 * but maybe in SW) has gone seriously wrong. 1554 */ 1555 status = hermon_read_mgm_cmd_post(state, mcg_entry, next_indx, 1556 HERMON_CMD_NOSLEEP_SPIN); 1557 if (status != HERMON_CMD_SUCCESS) { 1558 HERMON_WARNING(state, "failed to read MCG entry"); 1559 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: " 1560 "%08x\n", status); 1561 if (status == HERMON_CMD_INVALID_STATUS) { 1562 hermon_fm_ereport(state, HCA_SYS_ERR, 1563 HCA_ERR_SRV_LOST); 1564 } 1565 return (ibc_get_ci_failure(0)); 1566 } 1567 1568 /* 1569 * Copy/Write the temporary MCG back to the hardware MCG list 1570 * using the current index. This essentially removes the 1571 * current MCG entry from the list by writing over it with 1572 * the next one. If this is successful, then we can do the 1573 * same operation for the "shadow" list. And we can also 1574 * free up the Hermon MCG entry resource that was associated 1575 * with the (old) next entry. Note: In general, this 1576 * operation shouldn't fail. If it does, then it is an 1577 * indication that something (probably in HW, but maybe in SW) 1578 * has gone seriously wrong. 1579 */ 1580 status = hermon_write_mgm_cmd_post(state, mcg_entry, curr_indx, 1581 HERMON_CMD_NOSLEEP_SPIN); 1582 if (status != HERMON_CMD_SUCCESS) { 1583 HERMON_WARNING(state, "failed to write MCG entry"); 1584 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 1585 "%08x\n", status); 1586 if (status == HERMON_CMD_INVALID_STATUS) { 1587 hermon_fm_ereport(state, HCA_SYS_ERR, 1588 HCA_ERR_SRV_LOST); 1589 } 1590 return (ibc_get_ci_failure(0)); 1591 } 1592 1593 /* 1594 * Copy all the software tracking information from the next 1595 * entry on the "shadow" MCG list into the current entry on 1596 * the list. Then invalidate (zero out) the other "shadow" 1597 * list entry. 1598 */ 1599 bcopy(next_mcg, curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1600 bzero(next_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1601 1602 /* 1603 * Free up the Hermon MCG entry resource used by the "next" 1604 * MCG entry. That resource is no longer needed by any 1605 * MCG entry which is first on a hash chain (like the "next" 1606 * entry has just become). 1607 */ 1608 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1609 1610 return (DDI_SUCCESS); 1611 } 1612 1613 /* 1614 * Else if this is the last entry on the hash chain (or a middle 1615 * entry, then we update the previous entry's "next_gid_index" field 1616 * to make it point instead to the next entry on the chain. By 1617 * skipping over the removed entry in this way, we can then free up 1618 * any resources associated with the current entry. Note: We don't 1619 * need to invalidate the "skipped over" hardware entry because it 1620 * will no be longer connected to any hash chains, and if/when it is 1621 * finally re-used, it will be written with entirely new values. 1622 */ 1623 1624 /* 1625 * Read the next MCG entry into the temporary MCG. Note: In general, 1626 * this operation shouldn't fail. If it does, then it is an 1627 * indication that something (probably in HW, but maybe in SW) has 1628 * gone seriously wrong. 1629 */ 1630 status = hermon_read_mgm_cmd_post(state, mcg_entry, prev_indx, 1631 HERMON_CMD_NOSLEEP_SPIN); 1632 if (status != HERMON_CMD_SUCCESS) { 1633 HERMON_WARNING(state, "failed to read MCG entry"); 1634 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1635 status); 1636 if (status == HERMON_CMD_INVALID_STATUS) { 1637 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1638 } 1639 return (ibc_get_ci_failure(0)); 1640 } 1641 1642 /* 1643 * Finally, we update the "next_gid_indx" field in the temporary MCG 1644 * and attempt to write the entry back into the Hermon MCG table. If 1645 * this succeeds, then we update the "shadow" list to reflect the 1646 * change, free up the Hermon MCG entry resource that was associated 1647 * with the current entry, and return success. Note: In general, 1648 * this operation shouldn't fail. If it does, then it is an indication 1649 * that something (probably in HW, but maybe in SW) has gone seriously 1650 * wrong. 1651 */ 1652 mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx; 1653 status = hermon_write_mgm_cmd_post(state, mcg_entry, prev_indx, 1654 HERMON_CMD_NOSLEEP_SPIN); 1655 if (status != HERMON_CMD_SUCCESS) { 1656 HERMON_WARNING(state, "failed to write MCG entry"); 1657 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1658 status); 1659 if (status == HERMON_CMD_INVALID_STATUS) { 1660 hermon_fm_ereport(state, HCA_SYS_ERR, 1661 HCA_ERR_SRV_LOST); 1662 } 1663 return (ibc_get_ci_failure(0)); 1664 } 1665 1666 /* 1667 * Get the pointer to the "shadow" MCG list entry for the previous 1668 * MCG. Update its "mcg_next_indx" to point to the next entry 1669 * the one after the current entry. Note: This next index may be 1670 * zero, indicating the end of the list. 1671 */ 1672 prev_mcg = &state->hs_mcghdl[prev_indx]; 1673 prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx; 1674 1675 /* 1676 * Free up the Hermon MCG entry resource used by the current entry. 1677 * This resource is no longer needed because the chain now skips over 1678 * the current entry. Then invalidate (zero out) the current "shadow" 1679 * list entry. 1680 */ 1681 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1682 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1683 1684 return (DDI_SUCCESS); 1685 } 1686 1687 1688 /* 1689 * hermon_mcg_entry_invalidate() 1690 * Context: Can be called only from user or kernel context. 1691 */ 1692 static int 1693 hermon_mcg_entry_invalidate(hermon_state_t *state, hermon_hw_mcg_t *mcg_entry, 1694 uint_t indx) 1695 { 1696 int status; 1697 1698 /* 1699 * Invalidate the hardware MCG entry by zeroing out this temporary 1700 * MCG and writing it the the hardware. Note: In general, this 1701 * operation shouldn't fail. If it does, then it is an indication 1702 * that something (probably in HW, but maybe in SW) has gone seriously 1703 * wrong. 1704 */ 1705 bzero(mcg_entry, HERMON_MCGMEM_SZ(state)); 1706 status = hermon_write_mgm_cmd_post(state, mcg_entry, indx, 1707 HERMON_CMD_NOSLEEP_SPIN); 1708 if (status != HERMON_CMD_SUCCESS) { 1709 HERMON_WARNING(state, "failed to write MCG entry"); 1710 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1711 status); 1712 if (status == HERMON_CMD_INVALID_STATUS) { 1713 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1714 } 1715 return (ibc_get_ci_failure(0)); 1716 } 1717 1718 return (DDI_SUCCESS); 1719 } 1720 1721 1722 /* 1723 * hermon_mgid_is_valid() 1724 * Context: Can be called from interrupt or base context. 1725 */ 1726 static int 1727 hermon_mgid_is_valid(ib_gid_t gid) 1728 { 1729 uint_t topbits, flags, scope; 1730 1731 /* 1732 * According to IBA 1.1 specification (section 4.1.1) a valid 1733 * "multicast GID" must have its top eight bits set to all ones 1734 */ 1735 topbits = (gid.gid_prefix >> HERMON_MCG_TOPBITS_SHIFT) & 1736 HERMON_MCG_TOPBITS_MASK; 1737 if (topbits != HERMON_MCG_TOPBITS) { 1738 return (0); 1739 } 1740 1741 /* 1742 * The next 4 bits are the "flag" bits. These are valid only 1743 * if they are "0" (which correspond to permanently assigned/ 1744 * "well-known" multicast GIDs) or "1" (for so-called "transient" 1745 * multicast GIDs). All other values are reserved. 1746 */ 1747 flags = (gid.gid_prefix >> HERMON_MCG_FLAGS_SHIFT) & 1748 HERMON_MCG_FLAGS_MASK; 1749 if (!((flags == HERMON_MCG_FLAGS_PERM) || 1750 (flags == HERMON_MCG_FLAGS_NONPERM))) { 1751 return (0); 1752 } 1753 1754 /* 1755 * The next 4 bits are the "scope" bits. These are valid only 1756 * if they are "2" (Link-local), "5" (Site-local), "8" 1757 * (Organization-local) or "E" (Global). All other values 1758 * are reserved (or currently unassigned). 1759 */ 1760 scope = (gid.gid_prefix >> HERMON_MCG_SCOPE_SHIFT) & 1761 HERMON_MCG_SCOPE_MASK; 1762 if (!((scope == HERMON_MCG_SCOPE_LINKLOC) || 1763 (scope == HERMON_MCG_SCOPE_SITELOC) || 1764 (scope == HERMON_MCG_SCOPE_ORGLOC) || 1765 (scope == HERMON_MCG_SCOPE_GLOBAL))) { 1766 return (0); 1767 } 1768 1769 /* 1770 * If it passes all of the above checks, then we will consider it 1771 * a valid multicast GID. 1772 */ 1773 return (1); 1774 } 1775 1776 1777 /* 1778 * hermon_mlid_is_valid() 1779 * Context: Can be called from interrupt or base context. 1780 */ 1781 static int 1782 hermon_mlid_is_valid(ib_lid_t lid) 1783 { 1784 /* 1785 * According to IBA 1.1 specification (section 4.1.1) a valid 1786 * "multicast DLID" must be between 0xC000 and 0xFFFE. 1787 */ 1788 if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) { 1789 return (0); 1790 } 1791 1792 return (1); 1793 } 1794 1795 1796 /* 1797 * hermon_pd_alloc() 1798 * Context: Can be called only from user or kernel context. 1799 */ 1800 int 1801 hermon_pd_alloc(hermon_state_t *state, hermon_pdhdl_t *pdhdl, uint_t sleepflag) 1802 { 1803 hermon_rsrc_t *rsrc; 1804 hermon_pdhdl_t pd; 1805 int status; 1806 1807 /* 1808 * Allocate the software structure for tracking the protection domain 1809 * (i.e. the Hermon Protection Domain handle). By default each PD 1810 * structure will have a unique PD number assigned to it. All that 1811 * is necessary is for software to initialize the PD reference count 1812 * (to zero) and return success. 1813 */ 1814 status = hermon_rsrc_alloc(state, HERMON_PDHDL, 1, sleepflag, &rsrc); 1815 if (status != DDI_SUCCESS) { 1816 return (IBT_INSUFF_RESOURCE); 1817 } 1818 pd = (hermon_pdhdl_t)rsrc->hr_addr; 1819 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1820 1821 pd->pd_refcnt = 0; 1822 *pdhdl = pd; 1823 1824 return (DDI_SUCCESS); 1825 } 1826 1827 1828 /* 1829 * hermon_pd_free() 1830 * Context: Can be called only from user or kernel context. 1831 */ 1832 int 1833 hermon_pd_free(hermon_state_t *state, hermon_pdhdl_t *pdhdl) 1834 { 1835 hermon_rsrc_t *rsrc; 1836 hermon_pdhdl_t pd; 1837 1838 /* 1839 * Pull all the necessary information from the Hermon Protection Domain 1840 * handle. This is necessary here because the resource for the 1841 * PD is going to be freed up as part of this operation. 1842 */ 1843 pd = *pdhdl; 1844 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1845 rsrc = pd->pd_rsrcp; 1846 1847 /* 1848 * Check the PD reference count. If the reference count is non-zero, 1849 * then it means that this protection domain is still referenced by 1850 * some memory region, queue pair, address handle, or other IB object 1851 * If it is non-zero, then return an error. Otherwise, free the 1852 * Hermon resource and return success. 1853 */ 1854 if (pd->pd_refcnt != 0) { 1855 return (IBT_PD_IN_USE); 1856 } 1857 1858 /* Free the Hermon Protection Domain handle */ 1859 hermon_rsrc_free(state, &rsrc); 1860 1861 /* Set the pdhdl pointer to NULL and return success */ 1862 *pdhdl = (hermon_pdhdl_t)NULL; 1863 1864 return (DDI_SUCCESS); 1865 } 1866 1867 1868 /* 1869 * hermon_pd_refcnt_inc() 1870 * Context: Can be called from interrupt or base context. 1871 */ 1872 void 1873 hermon_pd_refcnt_inc(hermon_pdhdl_t pd) 1874 { 1875 /* Increment the protection domain's reference count */ 1876 atomic_inc_32(&pd->pd_refcnt); 1877 } 1878 1879 1880 /* 1881 * hermon_pd_refcnt_dec() 1882 * Context: Can be called from interrupt or base context. 1883 */ 1884 void 1885 hermon_pd_refcnt_dec(hermon_pdhdl_t pd) 1886 { 1887 /* Decrement the protection domain's reference count */ 1888 atomic_dec_32(&pd->pd_refcnt); 1889 } 1890 1891 1892 /* 1893 * hermon_port_query() 1894 * Context: Can be called only from user or kernel context. 1895 */ 1896 int 1897 hermon_port_query(hermon_state_t *state, uint_t port, ibt_hca_portinfo_t *pi) 1898 { 1899 sm_portinfo_t portinfo; 1900 sm_guidinfo_t guidinfo; 1901 sm_pkey_table_t pkeytable; 1902 ib_gid_t *sgid; 1903 uint_t sgid_max, pkey_max, tbl_size; 1904 int i, j, indx, status; 1905 ib_pkey_t *pkeyp; 1906 ib_guid_t *guidp; 1907 1908 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi)) 1909 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state)) 1910 1911 /* Validate that specified port number is legal */ 1912 if (!hermon_portnum_is_valid(state, port)) { 1913 return (IBT_HCA_PORT_INVALID); 1914 } 1915 pkeyp = state->hs_pkey[port - 1]; 1916 guidp = state->hs_guid[port - 1]; 1917 1918 /* 1919 * We use the Hermon MAD_IFC command to post a GetPortInfo MAD 1920 * to the firmware (for the specified port number). This returns 1921 * a full PortInfo MAD (in "portinfo") which we subsequently 1922 * parse to fill in the "ibt_hca_portinfo_t" structure returned 1923 * to the IBTF. 1924 */ 1925 status = hermon_getportinfo_cmd_post(state, port, 1926 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 1927 if (status != HERMON_CMD_SUCCESS) { 1928 cmn_err(CE_CONT, "Hermon: GetPortInfo (port %02d) command " 1929 "failed: %08x\n", port, status); 1930 if (status == HERMON_CMD_INVALID_STATUS) { 1931 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1932 } 1933 return (ibc_get_ci_failure(0)); 1934 } 1935 1936 /* 1937 * Parse the PortInfo MAD and fill in the IBTF structure 1938 */ 1939 pi->p_base_lid = portinfo.LID; 1940 pi->p_qkey_violations = portinfo.Q_KeyViolations; 1941 pi->p_pkey_violations = portinfo.P_KeyViolations; 1942 pi->p_sm_sl = portinfo.MasterSMSL; 1943 pi->p_sm_lid = portinfo.MasterSMLID; 1944 pi->p_linkstate = portinfo.PortState; 1945 pi->p_port_num = portinfo.LocalPortNum; 1946 pi->p_phys_state = portinfo.PortPhysicalState; 1947 pi->p_width_supported = portinfo.LinkWidthSupported; 1948 pi->p_width_enabled = portinfo.LinkWidthEnabled; 1949 pi->p_width_active = portinfo.LinkWidthActive; 1950 pi->p_speed_supported = portinfo.LinkSpeedSupported; 1951 pi->p_speed_enabled = portinfo.LinkSpeedEnabled; 1952 pi->p_speed_active = portinfo.LinkSpeedActive; 1953 pi->p_mtu = portinfo.MTUCap; 1954 pi->p_lmc = portinfo.LMC; 1955 pi->p_max_vl = portinfo.VLCap; 1956 pi->p_subnet_timeout = portinfo.SubnetTimeOut; 1957 pi->p_msg_sz = ((uint32_t)1 << HERMON_QP_LOG_MAX_MSGSZ); 1958 tbl_size = state->hs_cfg_profile->cp_log_max_gidtbl; 1959 pi->p_sgid_tbl_sz = (1 << tbl_size); 1960 tbl_size = state->hs_cfg_profile->cp_log_max_pkeytbl; 1961 pi->p_pkey_tbl_sz = (1 << tbl_size); 1962 state->hs_sn_prefix[port - 1] = portinfo.GidPrefix; 1963 1964 /* 1965 * Convert InfiniBand-defined port capability flags to the format 1966 * specified by the IBTF 1967 */ 1968 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM) 1969 pi->p_capabilities |= IBT_PORT_CAP_SM; 1970 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED) 1971 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED; 1972 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD) 1973 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL; 1974 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD) 1975 pi->p_capabilities |= IBT_PORT_CAP_DM; 1976 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD) 1977 pi->p_capabilities |= IBT_PORT_CAP_VENDOR; 1978 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_CLNT_REREG_SUPPD) 1979 pi->p_capabilities |= IBT_PORT_CAP_CLNT_REREG; 1980 1981 /* 1982 * Fill in the SGID table. Since the only access to the Hermon 1983 * GID tables is through the firmware's MAD_IFC interface, we 1984 * post as many GetGUIDInfo MADs as necessary to read in the entire 1985 * contents of the SGID table (for the specified port). Note: The 1986 * GetGUIDInfo command only gets eight GUIDs per operation. These 1987 * GUIDs are then appended to the GID prefix for the port (from the 1988 * GetPortInfo above) to form the entire SGID table. 1989 */ 1990 for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) { 1991 status = hermon_getguidinfo_cmd_post(state, port, i >> 3, 1992 HERMON_SLEEPFLAG_FOR_CONTEXT(), &guidinfo); 1993 if (status != HERMON_CMD_SUCCESS) { 1994 cmn_err(CE_CONT, "Hermon: GetGUIDInfo (port %02d) " 1995 "command failed: %08x\n", port, status); 1996 if (status == HERMON_CMD_INVALID_STATUS) { 1997 hermon_fm_ereport(state, HCA_SYS_ERR, 1998 HCA_ERR_SRV_LOST); 1999 } 2000 return (ibc_get_ci_failure(0)); 2001 } 2002 2003 /* Figure out how many of the entries are valid */ 2004 sgid_max = min((pi->p_sgid_tbl_sz - i), 8); 2005 for (j = 0; j < sgid_max; j++) { 2006 indx = (i + j); 2007 sgid = &pi->p_sgid_tbl[indx]; 2008 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid)) 2009 sgid->gid_prefix = portinfo.GidPrefix; 2010 guidp[indx] = sgid->gid_guid = 2011 guidinfo.GUIDBlocks[j]; 2012 } 2013 } 2014 2015 /* 2016 * Fill in the PKey table. Just as for the GID tables above, the 2017 * only access to the Hermon PKey tables is through the firmware's 2018 * MAD_IFC interface. We post as many GetPKeyTable MADs as necessary 2019 * to read in the entire contents of the PKey table (for the specified 2020 * port). Note: The GetPKeyTable command only gets 32 PKeys per 2021 * operation. 2022 */ 2023 for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) { 2024 status = hermon_getpkeytable_cmd_post(state, port, i, 2025 HERMON_SLEEPFLAG_FOR_CONTEXT(), &pkeytable); 2026 if (status != HERMON_CMD_SUCCESS) { 2027 cmn_err(CE_CONT, "Hermon: GetPKeyTable (port %02d) " 2028 "command failed: %08x\n", port, status); 2029 if (status == HERMON_CMD_INVALID_STATUS) { 2030 hermon_fm_ereport(state, HCA_SYS_ERR, 2031 HCA_ERR_SRV_LOST); 2032 } 2033 return (ibc_get_ci_failure(0)); 2034 } 2035 2036 /* Figure out how many of the entries are valid */ 2037 pkey_max = min((pi->p_pkey_tbl_sz - i), 32); 2038 for (j = 0; j < pkey_max; j++) { 2039 indx = (i + j); 2040 pkeyp[indx] = pi->p_pkey_tbl[indx] = 2041 pkeytable.P_KeyTableBlocks[j]; 2042 } 2043 } 2044 2045 return (DDI_SUCCESS); 2046 } 2047 2048 2049 /* 2050 * hermon_port_modify() 2051 * Context: Can be called only from user or kernel context. 2052 */ 2053 /* ARGSUSED */ 2054 int 2055 hermon_port_modify(hermon_state_t *state, uint8_t port, 2056 ibt_port_modify_flags_t flags, uint8_t init_type) 2057 { 2058 sm_portinfo_t portinfo; 2059 uint32_t capmask; 2060 int status; 2061 hermon_hw_set_port_t set_port; 2062 2063 /* 2064 * Return an error if either of the unsupported flags are set 2065 */ 2066 if ((flags & IBT_PORT_SHUTDOWN) || 2067 (flags & IBT_PORT_SET_INIT_TYPE)) { 2068 return (IBT_NOT_SUPPORTED); 2069 } 2070 2071 bzero(&set_port, sizeof (set_port)); 2072 2073 /* 2074 * Determine whether we are trying to reset the QKey counter 2075 */ 2076 if (flags & IBT_PORT_RESET_QKEY) 2077 set_port.rqk = 1; 2078 2079 /* Validate that specified port number is legal */ 2080 if (!hermon_portnum_is_valid(state, port)) { 2081 return (IBT_HCA_PORT_INVALID); 2082 } 2083 2084 /* 2085 * Use the Hermon MAD_IFC command to post a GetPortInfo MAD to the 2086 * firmware (for the specified port number). This returns a full 2087 * PortInfo MAD (in "portinfo") from which we pull the current 2088 * capability mask. We then modify the capability mask as directed 2089 * by the "pmod_flags" field, and write the updated capability mask 2090 * using the Hermon SET_IB command (below). 2091 */ 2092 status = hermon_getportinfo_cmd_post(state, port, 2093 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 2094 if (status != HERMON_CMD_SUCCESS) { 2095 if (status == HERMON_CMD_INVALID_STATUS) { 2096 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2097 } 2098 return (ibc_get_ci_failure(0)); 2099 } 2100 2101 /* 2102 * Convert InfiniBand-defined port capability flags to the format 2103 * specified by the IBTF. Specifically, we modify the capability 2104 * mask based on the specified values. 2105 */ 2106 capmask = portinfo.CapabilityMask; 2107 2108 if (flags & IBT_PORT_RESET_SM) 2109 capmask &= ~SM_CAP_MASK_IS_SM; 2110 else if (flags & IBT_PORT_SET_SM) 2111 capmask |= SM_CAP_MASK_IS_SM; 2112 2113 if (flags & IBT_PORT_RESET_SNMP) 2114 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD; 2115 else if (flags & IBT_PORT_SET_SNMP) 2116 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD; 2117 2118 if (flags & IBT_PORT_RESET_DEVMGT) 2119 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD; 2120 else if (flags & IBT_PORT_SET_DEVMGT) 2121 capmask |= SM_CAP_MASK_IS_DM_SUPPD; 2122 2123 if (flags & IBT_PORT_RESET_VENDOR) 2124 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD; 2125 else if (flags & IBT_PORT_SET_VENDOR) 2126 capmask |= SM_CAP_MASK_IS_VM_SUPPD; 2127 2128 set_port.cap_mask = capmask; 2129 2130 /* 2131 * Use the Hermon SET_PORT command to update the capability mask and 2132 * (possibly) reset the QKey violation counter for the specified port. 2133 * Note: In general, this operation shouldn't fail. If it does, then 2134 * it is an indication that something (probably in HW, but maybe in 2135 * SW) has gone seriously wrong. 2136 */ 2137 status = hermon_set_port_cmd_post(state, &set_port, port, 2138 HERMON_SLEEPFLAG_FOR_CONTEXT()); 2139 if (status != HERMON_CMD_SUCCESS) { 2140 HERMON_WARNING(state, "failed to modify port capabilities"); 2141 cmn_err(CE_CONT, "Hermon: SET_IB (port %02d) command failed: " 2142 "%08x\n", port, status); 2143 if (status == HERMON_CMD_INVALID_STATUS) { 2144 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2145 } 2146 return (ibc_get_ci_failure(0)); 2147 } 2148 2149 return (DDI_SUCCESS); 2150 } 2151 2152 2153 /* 2154 * hermon_set_addr_path() 2155 * Context: Can be called from interrupt or base context. 2156 * 2157 * Note: This routine is used for two purposes. It is used to fill in the 2158 * Hermon UDAV fields, and it is used to fill in the address path information 2159 * for QPs. Because the two Hermon structures are similar, common fields can 2160 * be filled in here. Because they are different, however, we pass 2161 * an additional flag to indicate which type is being filled and do each one 2162 * uniquely 2163 */ 2164 2165 int hermon_srate_override = -1; /* allows ease of testing */ 2166 2167 int 2168 hermon_set_addr_path(hermon_state_t *state, ibt_adds_vect_t *av, 2169 hermon_hw_addr_path_t *path, uint_t type) 2170 { 2171 uint_t gidtbl_sz; 2172 hermon_hw_udav_t *udav; 2173 2174 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2175 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2176 2177 udav = (hermon_hw_udav_t *)(void *)path; 2178 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav)) 2179 path->mlid = av->av_src_path; 2180 path->rlid = av->av_dlid; 2181 2182 switch (av->av_srate) { 2183 case IBT_SRATE_2: /* 1xSDR-2.5Gb/s injection rate */ 2184 path->max_stat_rate = 7; break; 2185 case IBT_SRATE_10: /* 4xSDR-10.0Gb/s injection rate */ 2186 path->max_stat_rate = 8; break; 2187 case IBT_SRATE_30: /* 12xSDR-30Gb/s injection rate */ 2188 path->max_stat_rate = 9; break; 2189 case IBT_SRATE_5: /* 1xDDR-5Gb/s injection rate */ 2190 path->max_stat_rate = 10; break; 2191 case IBT_SRATE_20: /* 4xDDR-20Gb/s injection rate */ 2192 path->max_stat_rate = 11; break; 2193 case IBT_SRATE_40: /* 4xQDR-40Gb/s injection rate */ 2194 path->max_stat_rate = 12; break; 2195 case IBT_SRATE_60: /* 12xDDR-60Gb/s injection rate */ 2196 path->max_stat_rate = 13; break; 2197 case IBT_SRATE_80: /* 8xQDR-80Gb/s injection rate */ 2198 path->max_stat_rate = 14; break; 2199 case IBT_SRATE_120: /* 12xQDR-120Gb/s injection rate */ 2200 path->max_stat_rate = 15; break; 2201 case IBT_SRATE_NOT_SPECIFIED: /* Max */ 2202 path->max_stat_rate = 0; break; 2203 default: 2204 return (IBT_STATIC_RATE_INVALID); 2205 } 2206 if (hermon_srate_override != -1) /* for evaluating HCA firmware */ 2207 path->max_stat_rate = hermon_srate_override; 2208 2209 /* If "grh" flag is set, then check for valid SGID index too */ 2210 gidtbl_sz = (1 << state->hs_queryport.log_max_gid); 2211 if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) { 2212 return (IBT_SGID_INVALID); 2213 } 2214 2215 /* 2216 * Fill in all "global" values regardless of the value in the GRH 2217 * flag. Because "grh" is not set unless "av_send_grh" is set, the 2218 * hardware will ignore the other "global" values as necessary. Note: 2219 * SW does this here to enable later query operations to return 2220 * exactly the same params that were passed when the addr path was 2221 * last written. 2222 */ 2223 path->grh = av->av_send_grh; 2224 if (type == HERMON_ADDRPATH_QP) { 2225 path->mgid_index = av->av_sgid_ix; 2226 } else { 2227 /* 2228 * For Hermon UDAV, the "mgid_index" field is the index into 2229 * a combined table (not a per-port table), but having sections 2230 * for each port. So some extra calculations are necessary. 2231 */ 2232 2233 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) + 2234 av->av_sgid_ix; 2235 2236 udav->portnum = av->av_port_num; 2237 } 2238 2239 /* 2240 * According to Hermon PRM, the (31:0) part of rgid_l must be set to 2241 * "0x2" if the 'grh' or 'g' bit is cleared. It also says that we 2242 * only need to do it for UDAV's. So we enforce that here. 2243 * 2244 * NOTE: The entire 64 bits worth of GUID info is actually being 2245 * preserved (for UDAVs) by the callers of this function 2246 * (hermon_ah_alloc() and hermon_ah_modify()) and as long as the 2247 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are 2248 * "don't care". 2249 */ 2250 if ((path->grh) || (type == HERMON_ADDRPATH_QP)) { 2251 path->flow_label = av->av_flow; 2252 path->tclass = av->av_tclass; 2253 path->hop_limit = av->av_hop; 2254 bcopy(&(av->av_dgid.gid_prefix), &(path->rgid_h), 2255 sizeof (uint64_t)); 2256 bcopy(&(av->av_dgid.gid_guid), &(path->rgid_l), 2257 sizeof (uint64_t)); 2258 } else { 2259 path->rgid_l = 0x2; 2260 path->flow_label = 0; 2261 path->tclass = 0; 2262 path->hop_limit = 0; 2263 path->rgid_h = 0; 2264 } 2265 /* extract the default service level */ 2266 udav->sl = (HERMON_DEF_SCHED_SELECTION & 0x3C) >> 2; 2267 2268 return (DDI_SUCCESS); 2269 } 2270 2271 2272 /* 2273 * hermon_get_addr_path() 2274 * Context: Can be called from interrupt or base context. 2275 * 2276 * Note: Just like hermon_set_addr_path() above, this routine is used for two 2277 * purposes. It is used to read in the Hermon UDAV fields, and it is used to 2278 * read in the address path information for QPs. Because the two Hermon 2279 * structures are similar, common fields can be read in here. But because 2280 * they are slightly different, we pass an additional flag to indicate which 2281 * type is being read. 2282 */ 2283 void 2284 hermon_get_addr_path(hermon_state_t *state, hermon_hw_addr_path_t *path, 2285 ibt_adds_vect_t *av, uint_t type) 2286 { 2287 uint_t gidtbl_sz; 2288 2289 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2290 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2291 2292 av->av_src_path = path->mlid; 2293 av->av_dlid = path->rlid; 2294 2295 /* Set "av_ipd" value from max_stat_rate */ 2296 switch (path->max_stat_rate) { 2297 case 7: /* 1xSDR-2.5Gb/s injection rate */ 2298 av->av_srate = IBT_SRATE_2; break; 2299 case 8: /* 4xSDR-10.0Gb/s injection rate */ 2300 av->av_srate = IBT_SRATE_10; break; 2301 case 9: /* 12xSDR-30Gb/s injection rate */ 2302 av->av_srate = IBT_SRATE_30; break; 2303 case 10: /* 1xDDR-5Gb/s injection rate */ 2304 av->av_srate = IBT_SRATE_5; break; 2305 case 11: /* 4xDDR-20Gb/s injection rate */ 2306 av->av_srate = IBT_SRATE_20; break; 2307 case 12: /* xQDR-40Gb/s injection rate */ 2308 av->av_srate = IBT_SRATE_40; break; 2309 case 13: /* 12xDDR-60Gb/s injection rate */ 2310 av->av_srate = IBT_SRATE_60; break; 2311 case 14: /* 8xQDR-80Gb/s injection rate */ 2312 av->av_srate = IBT_SRATE_80; break; 2313 case 15: /* 12xQDR-120Gb/s injection rate */ 2314 av->av_srate = IBT_SRATE_120; break; 2315 case 0: /* max */ 2316 av->av_srate = IBT_SRATE_NOT_SPECIFIED; break; 2317 default: /* 1x injection rate */ 2318 av->av_srate = IBT_SRATE_1X; 2319 } 2320 2321 /* 2322 * Extract all "global" values regardless of the value in the GRH 2323 * flag. Because "av_send_grh" is set only if "grh" is set, software 2324 * knows to ignore the other "global" values as necessary. Note: SW 2325 * does it this way to enable these query operations to return exactly 2326 * the same params that were passed when the addr path was last written. 2327 */ 2328 av->av_send_grh = path->grh; 2329 if (type == HERMON_ADDRPATH_QP) { 2330 av->av_sgid_ix = path->mgid_index; 2331 } else { 2332 /* 2333 * For Hermon UDAV, the "mgid_index" field is the index into 2334 * a combined table (not a per-port table). 2335 */ 2336 gidtbl_sz = (1 << state->hs_queryport.log_max_gid); 2337 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) * 2338 gidtbl_sz); 2339 2340 av->av_port_num = ((hermon_hw_udav_t *)(void *)path)->portnum; 2341 } 2342 av->av_flow = path->flow_label; 2343 av->av_tclass = path->tclass; 2344 av->av_hop = path->hop_limit; 2345 /* this is for alignment issue w/ the addr path struct in Hermon */ 2346 bcopy(&(path->rgid_h), &(av->av_dgid.gid_prefix), sizeof (uint64_t)); 2347 bcopy(&(path->rgid_l), &(av->av_dgid.gid_guid), sizeof (uint64_t)); 2348 } 2349 2350 2351 /* 2352 * hermon_portnum_is_valid() 2353 * Context: Can be called from interrupt or base context. 2354 */ 2355 int 2356 hermon_portnum_is_valid(hermon_state_t *state, uint_t portnum) 2357 { 2358 uint_t max_port; 2359 2360 max_port = state->hs_cfg_profile->cp_num_ports; 2361 if ((portnum <= max_port) && (portnum != 0)) { 2362 return (1); 2363 } else { 2364 return (0); 2365 } 2366 } 2367 2368 2369 /* 2370 * hermon_pkeyindex_is_valid() 2371 * Context: Can be called from interrupt or base context. 2372 */ 2373 int 2374 hermon_pkeyindex_is_valid(hermon_state_t *state, uint_t pkeyindx) 2375 { 2376 uint_t max_pkeyindx; 2377 2378 max_pkeyindx = 1 << state->hs_cfg_profile->cp_log_max_pkeytbl; 2379 if (pkeyindx < max_pkeyindx) { 2380 return (1); 2381 } else { 2382 return (0); 2383 } 2384 } 2385 2386 2387 /* 2388 * hermon_queue_alloc() 2389 * Context: Can be called from interrupt or base context. 2390 */ 2391 int 2392 hermon_queue_alloc(hermon_state_t *state, hermon_qalloc_info_t *qa_info, 2393 uint_t sleepflag) 2394 { 2395 ddi_dma_attr_t dma_attr; 2396 int (*callback)(caddr_t); 2397 uint64_t realsize, alloc_mask; 2398 uint_t type; 2399 int flag, status; 2400 2401 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2402 2403 /* Set the callback flag appropriately */ 2404 callback = (sleepflag == HERMON_SLEEP) ? DDI_DMA_SLEEP : 2405 DDI_DMA_DONTWAIT; 2406 2407 /* 2408 * Initialize many of the default DMA attributes. Then set additional 2409 * alignment restrictions as necessary for the queue memory. Also 2410 * respect the configured value for IOMMU bypass 2411 */ 2412 hermon_dma_attr_init(state, &dma_attr); 2413 dma_attr.dma_attr_align = qa_info->qa_bind_align; 2414 type = state->hs_cfg_profile->cp_iommu_bypass; 2415 if (type == HERMON_BINDMEM_BYPASS) { 2416 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2417 } 2418 2419 /* Allocate a DMA handle */ 2420 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, callback, NULL, 2421 &qa_info->qa_dmahdl); 2422 if (status != DDI_SUCCESS) { 2423 return (DDI_FAILURE); 2424 } 2425 2426 /* 2427 * Determine the amount of memory to allocate, depending on the values 2428 * in "qa_bind_align" and "qa_alloc_align". The problem we are trying 2429 * to solve here is that allocating a DMA handle with IOMMU bypass 2430 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments 2431 * that are less restrictive than the page size. Since we may need 2432 * stricter alignments on the memory allocated by ddi_dma_mem_alloc() 2433 * (e.g. in Hermon QP work queue memory allocation), we use the 2434 * following method to calculate how much additional memory to request, 2435 * and we enforce our own alignment on the allocated result. 2436 */ 2437 alloc_mask = qa_info->qa_alloc_align - 1; 2438 if (qa_info->qa_bind_align == qa_info->qa_alloc_align) { 2439 realsize = qa_info->qa_size; 2440 } else { 2441 realsize = qa_info->qa_size + alloc_mask; 2442 } 2443 2444 /* 2445 * If we are to allocate the queue from system memory, then use 2446 * ddi_dma_mem_alloc() to find the space. Otherwise, this is a 2447 * host memory allocation, use ddi_umem_alloc(). In either case, 2448 * return a pointer to the memory range allocated (including any 2449 * necessary alignment adjustments), the "real" memory pointer, 2450 * the "real" size, and a ddi_acc_handle_t to use when reading 2451 * from/writing to the memory. 2452 */ 2453 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) { 2454 /* Allocate system memory for the queue */ 2455 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize, 2456 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, callback, NULL, 2457 (caddr_t *)&qa_info->qa_buf_real, 2458 (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl); 2459 if (status != DDI_SUCCESS) { 2460 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2461 return (DDI_FAILURE); 2462 } 2463 2464 /* 2465 * Save temporary copy of the real pointer. (This may be 2466 * modified in the last step below). 2467 */ 2468 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2469 2470 bzero(qa_info->qa_buf_real, qa_info->qa_buf_realsz); 2471 2472 } else { /* HERMON_QUEUE_LOCATION_USERLAND */ 2473 2474 /* Allocate userland mappable memory for the queue */ 2475 flag = (sleepflag == HERMON_SLEEP) ? DDI_UMEM_SLEEP : 2476 DDI_UMEM_NOSLEEP; 2477 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag, 2478 &qa_info->qa_umemcookie); 2479 if (qa_info->qa_buf_real == NULL) { 2480 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2481 return (DDI_FAILURE); 2482 } 2483 2484 /* 2485 * Save temporary copy of the real pointer. (This may be 2486 * modified in the last step below). 2487 */ 2488 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2489 2490 } 2491 2492 /* 2493 * The next to last step is to ensure that the final address 2494 * ("qa_buf_aligned") has the appropriate "alloc" alignment 2495 * restriction applied to it (if necessary). 2496 */ 2497 if (qa_info->qa_bind_align != qa_info->qa_alloc_align) { 2498 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t) 2499 qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask); 2500 } 2501 /* 2502 * The last step is to figure out the offset of the start relative 2503 * to the first page of the region - will be used in the eqc/cqc 2504 * passed to the HW 2505 */ 2506 qa_info->qa_pgoffs = (uint_t)((uintptr_t) 2507 qa_info->qa_buf_aligned & HERMON_PAGEMASK); 2508 2509 return (DDI_SUCCESS); 2510 } 2511 2512 2513 /* 2514 * hermon_queue_free() 2515 * Context: Can be called from interrupt or base context. 2516 */ 2517 void 2518 hermon_queue_free(hermon_qalloc_info_t *qa_info) 2519 { 2520 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2521 2522 /* 2523 * Depending on how (i.e. from where) we allocated the memory for 2524 * this queue, we choose the appropriate method for releasing the 2525 * resources. 2526 */ 2527 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) { 2528 2529 ddi_dma_mem_free(&qa_info->qa_acchdl); 2530 2531 } else if (qa_info->qa_location == HERMON_QUEUE_LOCATION_USERLAND) { 2532 2533 ddi_umem_free(qa_info->qa_umemcookie); 2534 2535 } 2536 2537 /* Always free the dma handle */ 2538 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2539 } 2540 2541 /* 2542 * hermon_destroy_fmr_pool() 2543 * Create a pool of FMRs. 2544 * Context: Can be called from kernel context only. 2545 */ 2546 int 2547 hermon_create_fmr_pool(hermon_state_t *state, hermon_pdhdl_t pd, 2548 ibt_fmr_pool_attr_t *fmr_attr, hermon_fmrhdl_t *fmrpoolp) 2549 { 2550 hermon_fmrhdl_t fmrpool; 2551 hermon_fmr_list_t *fmr, *fmr_next; 2552 hermon_mrhdl_t mr; 2553 char taskqname[48]; 2554 int status; 2555 int sleep; 2556 int i; 2557 2558 sleep = (fmr_attr->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP : 2559 HERMON_NOSLEEP; 2560 if ((sleep == HERMON_SLEEP) && 2561 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 2562 return (IBT_INVALID_PARAM); 2563 } 2564 2565 fmrpool = (hermon_fmrhdl_t)kmem_zalloc(sizeof (*fmrpool), sleep); 2566 if (fmrpool == NULL) { 2567 status = IBT_INSUFF_RESOURCE; 2568 goto fail; 2569 } 2570 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmrpool)) 2571 2572 mutex_init(&fmrpool->fmr_lock, NULL, MUTEX_DRIVER, 2573 DDI_INTR_PRI(state->hs_intrmsi_pri)); 2574 2575 fmrpool->fmr_state = state; 2576 fmrpool->fmr_flush_function = fmr_attr->fmr_func_hdlr; 2577 fmrpool->fmr_flush_arg = fmr_attr->fmr_func_arg; 2578 fmrpool->fmr_pool_size = 0; 2579 fmrpool->fmr_cache = 0; 2580 fmrpool->fmr_max_pages = fmr_attr->fmr_max_pages_per_fmr; 2581 fmrpool->fmr_page_sz = fmr_attr->fmr_page_sz; 2582 fmrpool->fmr_dirty_watermark = fmr_attr->fmr_dirty_watermark; 2583 fmrpool->fmr_dirty_len = 0; 2584 fmrpool->fmr_flags = fmr_attr->fmr_flags; 2585 2586 /* Create taskq to handle cleanup and flush processing */ 2587 (void) snprintf(taskqname, 50, "fmrpool/%d/%d @ 0x%" PRIx64, 2588 fmr_attr->fmr_pool_size, hermon_debug_fmrpool_cnt, 2589 (uint64_t)(uintptr_t)fmrpool); 2590 fmrpool->fmr_taskq = ddi_taskq_create(state->hs_dip, taskqname, 2591 HERMON_TASKQ_NTHREADS, TASKQ_DEFAULTPRI, 0); 2592 if (fmrpool->fmr_taskq == NULL) { 2593 status = IBT_INSUFF_RESOURCE; 2594 goto fail1; 2595 } 2596 2597 fmrpool->fmr_free_list = NULL; 2598 fmrpool->fmr_dirty_list = NULL; 2599 2600 if (fmr_attr->fmr_cache) { 2601 hermon_fmr_cache_init(fmrpool); 2602 } 2603 2604 for (i = 0; i < fmr_attr->fmr_pool_size; i++) { 2605 status = hermon_mr_alloc_fmr(state, pd, fmrpool, &mr); 2606 if (status != DDI_SUCCESS) { 2607 goto fail2; 2608 } 2609 2610 fmr = (hermon_fmr_list_t *)kmem_zalloc( 2611 sizeof (hermon_fmr_list_t), sleep); 2612 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2613 2614 fmr->fmr = mr; 2615 fmr->fmr_refcnt = 0; 2616 fmr->fmr_remaps = 0; 2617 fmr->fmr_pool = fmrpool; 2618 fmr->fmr_in_cache = 0; 2619 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 2620 mr->mr_fmr = fmr; 2621 2622 fmr->fmr_next = fmrpool->fmr_free_list; 2623 fmrpool->fmr_free_list = fmr; 2624 fmrpool->fmr_pool_size++; 2625 } 2626 2627 /* Set to return pool */ 2628 *fmrpoolp = fmrpool; 2629 2630 return (IBT_SUCCESS); 2631 fail2: 2632 hermon_fmr_cache_fini(fmrpool); 2633 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { 2634 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2635 fmr_next = fmr->fmr_next; 2636 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr); 2637 kmem_free(fmr, sizeof (hermon_fmr_list_t)); 2638 } 2639 ddi_taskq_destroy(fmrpool->fmr_taskq); 2640 fail1: 2641 kmem_free(fmrpool, sizeof (*fmrpool)); 2642 fail: 2643 if (status == DDI_FAILURE) { 2644 return (ibc_get_ci_failure(0)); 2645 } else { 2646 return (status); 2647 } 2648 } 2649 2650 /* 2651 * hermon_destroy_fmr_pool() 2652 * Destroy an FMR pool and free all associated resources. 2653 * Context: Can be called from kernel context only. 2654 */ 2655 int 2656 hermon_destroy_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2657 { 2658 hermon_fmr_list_t *fmr, *fmr_next; 2659 int status; 2660 2661 mutex_enter(&fmrpool->fmr_lock); 2662 status = hermon_fmr_cleanup(state, fmrpool); 2663 if (status != DDI_SUCCESS) { 2664 mutex_exit(&fmrpool->fmr_lock); 2665 return (status); 2666 } 2667 2668 if (fmrpool->fmr_cache) { 2669 hermon_fmr_cache_fini(fmrpool); 2670 } 2671 2672 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { 2673 fmr_next = fmr->fmr_next; 2674 2675 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr); 2676 kmem_free(fmr, sizeof (hermon_fmr_list_t)); 2677 } 2678 mutex_exit(&fmrpool->fmr_lock); 2679 2680 ddi_taskq_destroy(fmrpool->fmr_taskq); 2681 mutex_destroy(&fmrpool->fmr_lock); 2682 2683 kmem_free(fmrpool, sizeof (*fmrpool)); 2684 return (DDI_SUCCESS); 2685 } 2686 2687 /* 2688 * hermon_flush_fmr_pool() 2689 * Ensure that all unmapped FMRs are fully invalidated. 2690 * Context: Can be called from kernel context only. 2691 */ 2692 int 2693 hermon_flush_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2694 { 2695 int status; 2696 2697 /* 2698 * Force the unmapping of all entries on the dirty list, regardless of 2699 * whether the watermark has been hit yet. 2700 */ 2701 /* grab the pool lock */ 2702 mutex_enter(&fmrpool->fmr_lock); 2703 status = hermon_fmr_cleanup(state, fmrpool); 2704 mutex_exit(&fmrpool->fmr_lock); 2705 return (status); 2706 } 2707 2708 /* 2709 * hermon_deregister_fmr() 2710 * Map memory into FMR 2711 * Context: Can be called from interrupt or base context. 2712 */ 2713 int 2714 hermon_register_physical_fmr(hermon_state_t *state, hermon_fmrhdl_t fmrpool, 2715 ibt_pmr_attr_t *mem_pattr, hermon_mrhdl_t *mr, 2716 ibt_pmr_desc_t *mem_desc_p) 2717 { 2718 hermon_fmr_list_t *fmr; 2719 hermon_fmr_list_t query; 2720 avl_index_t where; 2721 int status; 2722 2723 /* Check length */ 2724 mutex_enter(&fmrpool->fmr_lock); 2725 if (mem_pattr->pmr_len < 1 || (mem_pattr->pmr_num_buf > 2726 fmrpool->fmr_max_pages)) { 2727 mutex_exit(&fmrpool->fmr_lock); 2728 return (IBT_MR_LEN_INVALID); 2729 } 2730 2731 mutex_enter(&fmrpool->fmr_cachelock); 2732 /* lookup in fmr cache */ 2733 /* if exists, grab it, and return it */ 2734 if (fmrpool->fmr_cache) { 2735 query.fmr_desc.pmd_iova = mem_pattr->pmr_iova; 2736 query.fmr_desc.pmd_phys_buf_list_sz = mem_pattr->pmr_len; 2737 fmr = (hermon_fmr_list_t *)avl_find(&fmrpool->fmr_cache_avl, 2738 &query, &where); 2739 2740 /* 2741 * If valid FMR was found in cache, return that fmr info 2742 */ 2743 if (fmr != NULL) { 2744 fmr->fmr_refcnt++; 2745 /* Store pmr desc for use in cache */ 2746 (void) memcpy(mem_desc_p, &fmr->fmr_desc, 2747 sizeof (ibt_pmr_desc_t)); 2748 *mr = (hermon_mrhdl_t)fmr->fmr; 2749 mutex_exit(&fmrpool->fmr_cachelock); 2750 mutex_exit(&fmrpool->fmr_lock); 2751 return (DDI_SUCCESS); 2752 } 2753 } 2754 2755 /* FMR does not exist in cache, proceed with registration */ 2756 2757 /* grab next free entry */ 2758 fmr = fmrpool->fmr_free_list; 2759 if (fmr == NULL) { 2760 mutex_exit(&fmrpool->fmr_cachelock); 2761 mutex_exit(&fmrpool->fmr_lock); 2762 return (IBT_INSUFF_RESOURCE); 2763 } 2764 2765 fmrpool->fmr_free_list = fmrpool->fmr_free_list->fmr_next; 2766 fmr->fmr_next = NULL; 2767 2768 status = hermon_mr_register_physical_fmr(state, mem_pattr, fmr->fmr, 2769 mem_desc_p); 2770 if (status != DDI_SUCCESS) { 2771 mutex_exit(&fmrpool->fmr_cachelock); 2772 mutex_exit(&fmrpool->fmr_lock); 2773 return (status); 2774 } 2775 2776 fmr->fmr_refcnt = 1; 2777 fmr->fmr_remaps++; 2778 2779 /* Store pmr desc for use in cache */ 2780 (void) memcpy(&fmr->fmr_desc, mem_desc_p, sizeof (ibt_pmr_desc_t)); 2781 *mr = (hermon_mrhdl_t)fmr->fmr; 2782 2783 /* Store in cache */ 2784 if (fmrpool->fmr_cache) { 2785 if (!fmr->fmr_in_cache) { 2786 avl_insert(&fmrpool->fmr_cache_avl, fmr, where); 2787 fmr->fmr_in_cache = 1; 2788 } 2789 } 2790 2791 mutex_exit(&fmrpool->fmr_cachelock); 2792 mutex_exit(&fmrpool->fmr_lock); 2793 return (DDI_SUCCESS); 2794 } 2795 2796 /* 2797 * hermon_deregister_fmr() 2798 * Unmap FMR 2799 * Context: Can be called from kernel context only. 2800 */ 2801 int 2802 hermon_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr) 2803 { 2804 hermon_fmr_list_t *fmr; 2805 hermon_fmrhdl_t fmrpool; 2806 int status; 2807 2808 fmr = mr->mr_fmr; 2809 fmrpool = fmr->fmr_pool; 2810 2811 /* Grab pool lock */ 2812 mutex_enter(&fmrpool->fmr_lock); 2813 fmr->fmr_refcnt--; 2814 2815 if (fmr->fmr_refcnt == 0) { 2816 /* 2817 * First, do some bit of invalidation, reducing our exposure to 2818 * having this region still registered in hardware. 2819 */ 2820 (void) hermon_mr_invalidate_fmr(state, mr); 2821 2822 /* 2823 * If we've exhausted our remaps then add the FMR to the dirty 2824 * list, not allowing it to be re-used until we have done a 2825 * flush. Otherwise, simply add it back to the free list for 2826 * re-mapping. 2827 */ 2828 if (fmr->fmr_remaps < 2829 state->hs_cfg_profile->cp_fmr_max_remaps) { 2830 /* add to free list */ 2831 fmr->fmr_next = fmrpool->fmr_free_list; 2832 fmrpool->fmr_free_list = fmr; 2833 } else { 2834 /* add to dirty list */ 2835 fmr->fmr_next = fmrpool->fmr_dirty_list; 2836 fmrpool->fmr_dirty_list = fmr; 2837 fmrpool->fmr_dirty_len++; 2838 2839 status = ddi_taskq_dispatch(fmrpool->fmr_taskq, 2840 hermon_fmr_processing, fmrpool, DDI_NOSLEEP); 2841 if (status == DDI_FAILURE) { 2842 mutex_exit(&fmrpool->fmr_lock); 2843 return (IBT_INSUFF_RESOURCE); 2844 } 2845 } 2846 } 2847 /* Release pool lock */ 2848 mutex_exit(&fmrpool->fmr_lock); 2849 2850 return (DDI_SUCCESS); 2851 } 2852 2853 2854 /* 2855 * hermon_fmr_processing() 2856 * If required, perform cleanup. 2857 * Context: Called from taskq context only. 2858 */ 2859 static void 2860 hermon_fmr_processing(void *fmr_args) 2861 { 2862 hermon_fmrhdl_t fmrpool; 2863 int status; 2864 2865 ASSERT(fmr_args != NULL); 2866 2867 fmrpool = (hermon_fmrhdl_t)fmr_args; 2868 2869 /* grab pool lock */ 2870 mutex_enter(&fmrpool->fmr_lock); 2871 if (fmrpool->fmr_dirty_len >= fmrpool->fmr_dirty_watermark) { 2872 status = hermon_fmr_cleanup(fmrpool->fmr_state, fmrpool); 2873 if (status != DDI_SUCCESS) { 2874 mutex_exit(&fmrpool->fmr_lock); 2875 return; 2876 } 2877 2878 if (fmrpool->fmr_flush_function != NULL) { 2879 (void) fmrpool->fmr_flush_function( 2880 (ibc_fmr_pool_hdl_t)fmrpool, 2881 fmrpool->fmr_flush_arg); 2882 } 2883 } 2884 2885 /* let pool lock go */ 2886 mutex_exit(&fmrpool->fmr_lock); 2887 } 2888 2889 /* 2890 * hermon_fmr_cleanup() 2891 * Perform cleaning processing, walking the list and performing the MTT sync 2892 * operation if required. 2893 * Context: can be called from taskq or base context. 2894 */ 2895 static int 2896 hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2897 { 2898 hermon_fmr_list_t *fmr; 2899 hermon_fmr_list_t *fmr_next; 2900 int sync_needed; 2901 int status; 2902 2903 ASSERT(MUTEX_HELD(&fmrpool->fmr_lock)); 2904 2905 sync_needed = 0; 2906 for (fmr = fmrpool->fmr_dirty_list; fmr; fmr = fmr_next) { 2907 fmr_next = fmr->fmr_next; 2908 fmr->fmr_remaps = 0; 2909 2910 (void) hermon_mr_deregister_fmr(state, fmr->fmr); 2911 2912 /* 2913 * Update lists. 2914 * - add fmr back to free list 2915 * - remove fmr from dirty list 2916 */ 2917 fmr->fmr_next = fmrpool->fmr_free_list; 2918 fmrpool->fmr_free_list = fmr; 2919 2920 2921 /* 2922 * Because we have updated the dirty list, and deregistered the 2923 * FMR entry, we do need to sync the TPT, so we set the 2924 * 'sync_needed' flag here so we sync once we finish dirty_list 2925 * processing. 2926 */ 2927 sync_needed = 1; 2928 } 2929 2930 fmrpool->fmr_dirty_list = NULL; 2931 fmrpool->fmr_dirty_len = 0; 2932 2933 if (sync_needed) { 2934 status = hermon_sync_tpt_cmd_post(state, 2935 HERMON_CMD_NOSLEEP_SPIN); 2936 if (status != HERMON_CMD_SUCCESS) { 2937 return (status); 2938 } 2939 } 2940 2941 return (DDI_SUCCESS); 2942 } 2943 2944 /* 2945 * hermon_fmr_avl_compare() 2946 * Context: Can be called from user or kernel context. 2947 */ 2948 static int 2949 hermon_fmr_avl_compare(const void *q, const void *e) 2950 { 2951 hermon_fmr_list_t *entry, *query; 2952 2953 entry = (hermon_fmr_list_t *)e; 2954 query = (hermon_fmr_list_t *)q; 2955 2956 if (query->fmr_desc.pmd_iova < entry->fmr_desc.pmd_iova) { 2957 return (-1); 2958 } else if (query->fmr_desc.pmd_iova > entry->fmr_desc.pmd_iova) { 2959 return (+1); 2960 } else { 2961 return (0); 2962 } 2963 } 2964 2965 2966 /* 2967 * hermon_fmr_cache_init() 2968 * Context: Can be called from user or kernel context. 2969 */ 2970 static void 2971 hermon_fmr_cache_init(hermon_fmrhdl_t fmr) 2972 { 2973 /* Initialize the lock used for FMR cache AVL tree access */ 2974 mutex_init(&fmr->fmr_cachelock, NULL, MUTEX_DRIVER, 2975 DDI_INTR_PRI(fmr->fmr_state->hs_intrmsi_pri)); 2976 2977 /* Initialize the AVL tree for the FMR cache */ 2978 avl_create(&fmr->fmr_cache_avl, hermon_fmr_avl_compare, 2979 sizeof (hermon_fmr_list_t), 2980 offsetof(hermon_fmr_list_t, fmr_avlnode)); 2981 2982 fmr->fmr_cache = 1; 2983 } 2984 2985 2986 /* 2987 * hermon_fmr_cache_fini() 2988 * Context: Can be called from user or kernel context. 2989 */ 2990 static void 2991 hermon_fmr_cache_fini(hermon_fmrhdl_t fmr) 2992 { 2993 void *cookie; 2994 2995 /* 2996 * Empty all entries (if necessary) and destroy the AVL tree. 2997 * The FMRs themselves are freed as part of destroy_pool() 2998 */ 2999 cookie = NULL; 3000 while (((void *)(hermon_fmr_list_t *)avl_destroy_nodes( 3001 &fmr->fmr_cache_avl, &cookie)) != NULL) { 3002 /* loop through */ 3003 } 3004 avl_destroy(&fmr->fmr_cache_avl); 3005 3006 /* Destroy the lock used for FMR cache */ 3007 mutex_destroy(&fmr->fmr_cachelock); 3008 } 3009 3010 /* 3011 * hermon_get_dma_cookies() 3012 * Return DMA cookies in the pre-allocated paddr_list_p based on the length 3013 * needed. 3014 * Context: Can be called from interrupt or base context. 3015 */ 3016 int 3017 hermon_get_dma_cookies(hermon_state_t *state, ibt_phys_buf_t *paddr_list_p, 3018 ibt_va_attr_t *va_attrs, uint_t list_len, uint_t *cookiecnt, 3019 ibc_ma_hdl_t *ibc_ma_hdl_p) 3020 { 3021 ddi_dma_handle_t dma_hdl; 3022 ddi_dma_attr_t dma_attr; 3023 ddi_dma_cookie_t dmacookie; 3024 int (*callback)(caddr_t); 3025 int status; 3026 int i; 3027 3028 /* Set the callback flag appropriately */ 3029 callback = (va_attrs->va_flags & IBT_VA_NOSLEEP) ? DDI_DMA_DONTWAIT : 3030 DDI_DMA_SLEEP; 3031 if ((callback == DDI_DMA_SLEEP) && 3032 (HERMON_SLEEP != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 3033 return (IBT_INVALID_PARAM); 3034 } 3035 3036 /* 3037 * Initialize many of the default DMA attributes and allocate the DMA 3038 * handle. Then, if we're bypassing the IOMMU, set the 3039 * DDI_DMA_FORCE_PHYSICAL flag. 3040 */ 3041 hermon_dma_attr_init(state, &dma_attr); 3042 3043 #ifdef __x86 3044 /* 3045 * On x86 we can specify a maximum segment length for our returned 3046 * cookies. 3047 */ 3048 if (va_attrs->va_flags & IBT_VA_FMR) { 3049 dma_attr.dma_attr_seg = PAGESIZE - 1; 3050 } 3051 #endif 3052 3053 /* 3054 * Check to see if the RO flag is set, and if so, 3055 * set that bit in the attr structure as well. 3056 * 3057 * NOTE 1: This function is ONLY called by consumers, and only for 3058 * data buffers 3059 */ 3060 if (hermon_kernel_data_ro == HERMON_RO_ENABLED) { 3061 dma_attr.dma_attr_flags |= DDI_DMA_RELAXED_ORDERING; 3062 } 3063 3064 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 3065 callback, NULL, &dma_hdl); 3066 if (status != DDI_SUCCESS) { 3067 switch (status) { 3068 case DDI_DMA_NORESOURCES: 3069 return (IBT_INSUFF_RESOURCE); 3070 case DDI_DMA_BADATTR: 3071 default: 3072 return (ibc_get_ci_failure(0)); 3073 } 3074 } 3075 3076 /* 3077 * Now bind the handle with the correct DMA attributes. 3078 */ 3079 if (va_attrs->va_flags & IBT_VA_BUF) { 3080 status = ddi_dma_buf_bind_handle(dma_hdl, va_attrs->va_buf, 3081 DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, 3082 NULL, &dmacookie, cookiecnt); 3083 } else { 3084 status = ddi_dma_addr_bind_handle(dma_hdl, NULL, 3085 (caddr_t)(uintptr_t)va_attrs->va_vaddr, va_attrs->va_len, 3086 DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, 3087 NULL, &dmacookie, cookiecnt); 3088 } 3089 if (status != DDI_SUCCESS) { 3090 ddi_dma_free_handle(&dma_hdl); 3091 3092 switch (status) { 3093 case DDI_DMA_NORESOURCES: 3094 return (IBT_INSUFF_RESOURCE); 3095 case DDI_DMA_TOOBIG: 3096 return (IBT_INVALID_PARAM); 3097 case DDI_DMA_PARTIAL_MAP: 3098 case DDI_DMA_INUSE: 3099 case DDI_DMA_NOMAPPING: 3100 default: 3101 return (ibc_get_ci_failure(0)); 3102 } 3103 } 3104 3105 /* 3106 * Verify our physical buffer list (PBL) is large enough to handle the 3107 * number of cookies that were returned. 3108 */ 3109 if (*cookiecnt > list_len) { 3110 (void) ddi_dma_unbind_handle(dma_hdl); 3111 ddi_dma_free_handle(&dma_hdl); 3112 return (IBT_PBL_TOO_SMALL); 3113 } 3114 3115 /* 3116 * We store the cookies returned by the DDI into our own PBL. This 3117 * sets the cookies up for later processing (for example, if we want to 3118 * split up the cookies into smaller chunks). We use the laddr and 3119 * size fields in each cookie to create each individual entry (PBE). 3120 */ 3121 3122 /* 3123 * Store first cookie info first 3124 */ 3125 paddr_list_p[0].p_laddr = dmacookie.dmac_laddress; 3126 paddr_list_p[0].p_size = dmacookie.dmac_size; 3127 3128 /* 3129 * Loop through each cookie, storing each cookie into our physical 3130 * buffer list. 3131 */ 3132 for (i = 1; i < *cookiecnt; i++) { 3133 ddi_dma_nextcookie(dma_hdl, &dmacookie); 3134 3135 paddr_list_p[i].p_laddr = dmacookie.dmac_laddress; 3136 paddr_list_p[i].p_size = dmacookie.dmac_size; 3137 } 3138 3139 /* return handle */ 3140 *ibc_ma_hdl_p = (ibc_ma_hdl_t)dma_hdl; 3141 return (DDI_SUCCESS); 3142 } 3143 3144 /* 3145 * hermon_split_dma_cookies() 3146 * Split up cookies passed in from paddr_list_p, returning the new list in the 3147 * same buffers, based on the pagesize to split the cookies into. 3148 * Context: Can be called from interrupt or base context. 3149 */ 3150 /* ARGSUSED */ 3151 int 3152 hermon_split_dma_cookies(hermon_state_t *state, ibt_phys_buf_t *paddr_list, 3153 ib_memlen_t *paddr_offset, uint_t list_len, uint_t *cookiecnt, 3154 uint_t pagesize) 3155 { 3156 uint64_t pageoffset; 3157 uint64_t pagemask; 3158 uint_t pageshift; 3159 uint_t current_cookiecnt; 3160 uint_t cookies_needed; 3161 uint64_t last_size, extra_cookie; 3162 int i_increment; 3163 int i, k; 3164 int status; 3165 3166 /* Setup pagesize calculations */ 3167 pageoffset = pagesize - 1; 3168 pagemask = (~pageoffset); 3169 pageshift = highbit(pagesize) - 1; 3170 3171 /* 3172 * Setup first cookie offset based on pagesize requested. 3173 */ 3174 *paddr_offset = paddr_list[0].p_laddr & pageoffset; 3175 paddr_list[0].p_laddr &= pagemask; 3176 3177 /* Save away the current number of cookies that are passed in */ 3178 current_cookiecnt = *cookiecnt; 3179 3180 /* Perform splitting up of current cookies into pagesize blocks */ 3181 for (i = 0; i < current_cookiecnt; i += i_increment) { 3182 /* 3183 * If the cookie is smaller than pagesize, or already is 3184 * pagesize, then we are already within our limits, so we skip 3185 * it. 3186 */ 3187 if (paddr_list[i].p_size <= pagesize) { 3188 i_increment = 1; 3189 continue; 3190 } 3191 3192 /* 3193 * If this is our first cookie, then we have to deal with the 3194 * offset that may be present in the first address. So add 3195 * that to our size, to calculate potential change to the last 3196 * cookie's size. 3197 * 3198 * Also, calculate the number of cookies that we'll need to 3199 * split up this block into. 3200 */ 3201 if (i == 0) { 3202 last_size = (paddr_list[i].p_size + *paddr_offset) & 3203 pageoffset; 3204 cookies_needed = (paddr_list[i].p_size + 3205 *paddr_offset) >> pageshift; 3206 } else { 3207 last_size = 0; 3208 cookies_needed = paddr_list[i].p_size >> pageshift; 3209 } 3210 3211 /* 3212 * If our size is not a multiple of pagesize, we need one more 3213 * cookie. 3214 */ 3215 if (last_size) { 3216 extra_cookie = 1; 3217 } else { 3218 extra_cookie = 0; 3219 } 3220 3221 /* 3222 * Split cookie into pagesize chunks, shifting list of cookies 3223 * down, using more cookie slots in the PBL if necessary. 3224 */ 3225 status = hermon_dma_cookie_shift(paddr_list, i, list_len, 3226 current_cookiecnt - i, cookies_needed + extra_cookie); 3227 if (status != 0) { 3228 return (status); 3229 } 3230 3231 /* 3232 * If the very first cookie, we must take possible offset into 3233 * account. 3234 */ 3235 if (i == 0) { 3236 paddr_list[i].p_size = pagesize - *paddr_offset; 3237 } else { 3238 paddr_list[i].p_size = pagesize; 3239 } 3240 3241 /* 3242 * We have shifted the existing cookies down the PBL, now fill 3243 * in the blank entries by splitting up our current block. 3244 */ 3245 for (k = 1; k < cookies_needed; k++) { 3246 paddr_list[i + k].p_laddr = 3247 paddr_list[i + k - 1].p_laddr + pagesize; 3248 paddr_list[i + k].p_size = pagesize; 3249 } 3250 3251 /* If we have one extra cookie (of less than pagesize...) */ 3252 if (extra_cookie) { 3253 paddr_list[i + k].p_laddr = 3254 paddr_list[i + k - 1].p_laddr + pagesize; 3255 paddr_list[i + k].p_size = (size_t)last_size; 3256 } 3257 3258 /* Increment cookiecnt appropriately based on cookies used */ 3259 i_increment = cookies_needed + extra_cookie; 3260 current_cookiecnt += i_increment - 1; 3261 } 3262 3263 /* Update to new cookie count */ 3264 *cookiecnt = current_cookiecnt; 3265 return (DDI_SUCCESS); 3266 } 3267 3268 /* 3269 * hermon_dma_cookie_shift() 3270 * Context: Can be called from interrupt or base context. 3271 */ 3272 int 3273 hermon_dma_cookie_shift(ibt_phys_buf_t *paddr_list, int start, int end, 3274 int cookiecnt, int num_shift) 3275 { 3276 int shift_start; 3277 int i; 3278 3279 /* Calculating starting point in the PBL list */ 3280 shift_start = start + cookiecnt - 1; 3281 3282 /* Check if we're at the end of our PBL list */ 3283 if ((shift_start + num_shift - 1) >= end) { 3284 return (IBT_PBL_TOO_SMALL); 3285 } 3286 3287 for (i = shift_start; i > start; i--) { 3288 paddr_list[i + num_shift - 1] = paddr_list[i]; 3289 } 3290 3291 return (DDI_SUCCESS); 3292 } 3293 3294 3295 /* 3296 * hermon_free_dma_cookies() 3297 * Context: Can be called from interrupt or base context. 3298 */ 3299 int 3300 hermon_free_dma_cookies(ibc_ma_hdl_t ma_hdl) 3301 { 3302 ddi_dma_handle_t dma_hdl; 3303 int status; 3304 3305 dma_hdl = (ddi_dma_handle_t)ma_hdl; 3306 3307 status = ddi_dma_unbind_handle(dma_hdl); 3308 if (status != DDI_SUCCESS) { 3309 return (ibc_get_ci_failure(0)); 3310 } 3311 ddi_dma_free_handle(&dma_hdl); 3312 3313 return (DDI_SUCCESS); 3314 } 3315