1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * hermon_misc.c 29 * Hermon Miscellaneous routines - Address Handle, Multicast, Protection 30 * Domain, and port-related operations 31 * 32 * Implements all the routines necessary for allocating, freeing, querying 33 * and modifying Address Handles and Protection Domains. Also implements 34 * all the routines necessary for adding and removing Queue Pairs to/from 35 * Multicast Groups. Lastly, it implements the routines necessary for 36 * port-related query and modify operations. 37 */ 38 39 #include <sys/types.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/bitmap.h> 45 #include <sys/sysmacros.h> 46 47 #include <sys/ib/adapters/hermon/hermon.h> 48 49 extern uint32_t hermon_kernel_data_ro; 50 51 /* used for helping uniquify fmr pool taskq name */ 52 static uint_t hermon_debug_fmrpool_cnt = 0x00000000; 53 54 static int hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg, 55 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, uint_t *qp_found); 56 static int hermon_mcg_qplist_remove(hermon_mcghdl_t mcg, 57 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp); 58 static void hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp); 59 static void hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp); 60 static uint_t hermon_mcg_walk_mgid_hash(hermon_state_t *state, 61 uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx); 62 static void hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, 63 hermon_hw_mcg_t *mcg_hdr, ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc); 64 static int hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx, 65 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry); 66 static int hermon_mcg_entry_invalidate(hermon_state_t *state, 67 hermon_hw_mcg_t *mcg_entry, uint_t indx); 68 static int hermon_mgid_is_valid(ib_gid_t gid); 69 static int hermon_mlid_is_valid(ib_lid_t lid); 70 static void hermon_fmr_processing(void *fmr_args); 71 static int hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t pool); 72 static void hermon_fmr_cache_init(hermon_fmrhdl_t fmr); 73 static void hermon_fmr_cache_fini(hermon_fmrhdl_t fmr); 74 static int hermon_fmr_avl_compare(const void *q, const void *e); 75 76 77 #define HERMON_MAX_DBR_PAGES_PER_USER 64 78 #define HERMON_DBR_KEY(index, page) \ 79 (((uint64_t)index) * HERMON_MAX_DBR_PAGES_PER_USER + (page)) 80 81 static hermon_udbr_page_t * 82 hermon_dbr_new_user_page(hermon_state_t *state, uint_t index, 83 uint_t page) 84 { 85 hermon_udbr_page_t *pagep; 86 ddi_dma_attr_t dma_attr; 87 uint_t cookiecnt; 88 int i, status; 89 uint64_t *p; 90 hermon_umap_db_entry_t *umapdb; 91 92 pagep = kmem_alloc(sizeof (*pagep), KM_SLEEP); 93 pagep->upg_index = page; 94 pagep->upg_nfree = PAGESIZE / sizeof (hermon_dbr_t); 95 pagep->upg_firstfree = 0; 96 pagep->upg_kvaddr = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP, 97 &pagep->upg_umemcookie); /* not HERMON_PAGESIZE here */ 98 99 /* link free entries */ 100 p = (uint64_t *)(void *)pagep->upg_kvaddr; 101 for (i = pagep->upg_firstfree; i < pagep->upg_nfree; i++) 102 p[i] = i + 1; 103 pagep->upg_buf = ddi_umem_iosetup(pagep->upg_umemcookie, 0, 104 PAGESIZE, B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 105 106 hermon_dma_attr_init(state, &dma_attr); 107 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 108 DDI_DMA_SLEEP, NULL, &pagep->upg_dmahdl); 109 if (status != DDI_SUCCESS) { 110 IBTF_DPRINTF_L2("hermon", "hermon_new_user_page: " 111 "ddi_dma_buf_bind_handle failed: %d", status); 112 return (NULL); 113 } 114 status = ddi_dma_buf_bind_handle(pagep->upg_dmahdl, 115 pagep->upg_buf, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 116 DDI_DMA_SLEEP, NULL, &pagep->upg_dmacookie, &cookiecnt); 117 if (status != DDI_SUCCESS) { 118 IBTF_DPRINTF_L2("hermon", "hermon_dbr_new_user_page: " 119 "ddi_dma_buf_bind_handle failed: %d", status); 120 ddi_dma_free_handle(&pagep->upg_dmahdl); 121 return (NULL); 122 } 123 ASSERT(cookiecnt == 1); 124 125 /* create db entry for mmap */ 126 umapdb = hermon_umap_db_alloc(state->hs_instance, 127 HERMON_DBR_KEY(index, page), MLNX_UMAP_DBRMEM_RSRC, 128 (uint64_t)(uintptr_t)pagep); 129 hermon_umap_db_add(umapdb); 130 return (pagep); 131 } 132 133 134 /*ARGSUSED*/ 135 static int 136 hermon_user_dbr_alloc(hermon_state_t *state, uint_t index, 137 ddi_acc_handle_t *acchdl, hermon_dbr_t **vdbr, uint64_t *pdbr, 138 uint64_t *mapoffset) 139 { 140 hermon_user_dbr_t *udbr; 141 hermon_udbr_page_t *pagep; 142 uint_t next_page; 143 int j; 144 145 mutex_enter(&state->hs_dbr_lock); 146 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link) 147 if (udbr->udbr_index == index) 148 break; 149 if (udbr == NULL) { 150 udbr = kmem_alloc(sizeof (*udbr), KM_SLEEP); 151 udbr->udbr_link = state->hs_user_dbr; 152 state->hs_user_dbr = udbr; 153 udbr->udbr_index = index; 154 udbr->udbr_pagep = NULL; 155 } 156 pagep = udbr->udbr_pagep; 157 next_page = (pagep == NULL) ? 0 : (pagep->upg_index + 1); 158 while (pagep != NULL) 159 if (pagep->upg_nfree > 0) 160 break; 161 else 162 pagep = pagep->upg_link; 163 if (pagep == NULL) { 164 pagep = hermon_dbr_new_user_page(state, index, next_page); 165 if (pagep == NULL) { 166 mutex_exit(&state->hs_dbr_lock); 167 return (DDI_FAILURE); 168 } 169 pagep->upg_link = udbr->udbr_pagep; 170 udbr->udbr_pagep = pagep; 171 } 172 j = pagep->upg_firstfree; /* index within page */ 173 pagep->upg_firstfree = ((uint64_t *)(void *)pagep->upg_kvaddr)[j]; 174 pagep->upg_nfree--; 175 ((uint64_t *)(void *)pagep->upg_kvaddr)[j] = 0; /* clear dbr */ 176 *mapoffset = ((HERMON_DBR_KEY(index, pagep->upg_index) << 177 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_DBRMEM_RSRC) << PAGESHIFT; 178 *vdbr = (hermon_dbr_t *)((uint64_t *)(void *)pagep->upg_kvaddr + j); 179 *pdbr = pagep->upg_dmacookie.dmac_laddress + j * sizeof (uint64_t); 180 181 mutex_exit(&state->hs_dbr_lock); 182 return (DDI_SUCCESS); 183 } 184 185 static void 186 hermon_user_dbr_free(hermon_state_t *state, uint_t index, hermon_dbr_t *record) 187 { 188 hermon_user_dbr_t *udbr; 189 hermon_udbr_page_t *pagep; 190 caddr_t kvaddr; 191 uint_t dbr_index; 192 uint_t max_free = PAGESIZE / sizeof (hermon_dbr_t); 193 194 dbr_index = (uintptr_t)record & PAGEOFFSET; /* offset (not yet index) */ 195 kvaddr = (caddr_t)record - dbr_index; 196 dbr_index /= sizeof (hermon_dbr_t); /* now it's the index */ 197 198 mutex_enter(&state->hs_dbr_lock); 199 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link) 200 if (udbr->udbr_index == index) 201 break; 202 if (udbr == NULL) { 203 IBTF_DPRINTF_L2("hermon", "free user dbr: udbr struct not " 204 "found for index %x", index); 205 mutex_exit(&state->hs_dbr_lock); 206 return; 207 } 208 for (pagep = udbr->udbr_pagep; pagep != NULL; pagep = pagep->upg_link) 209 if (pagep->upg_kvaddr == kvaddr) 210 break; 211 if (pagep == NULL) { 212 IBTF_DPRINTF_L2("hermon", "free user dbr: pagep struct not" 213 " found for index %x, kvaddr %p, DBR index %x", 214 index, kvaddr, dbr_index); 215 mutex_exit(&state->hs_dbr_lock); 216 return; 217 } 218 if (pagep->upg_nfree >= max_free) { 219 IBTF_DPRINTF_L2("hermon", "free user dbr: overflow: " 220 "UCE index %x, DBR index %x", index, dbr_index); 221 mutex_exit(&state->hs_dbr_lock); 222 return; 223 } 224 ASSERT(dbr_index < max_free); 225 ((uint64_t *)(void *)kvaddr)[dbr_index] = pagep->upg_firstfree; 226 pagep->upg_firstfree = dbr_index; 227 pagep->upg_nfree++; 228 mutex_exit(&state->hs_dbr_lock); 229 230 /* XXX still need to unlink and free struct */ 231 /* XXX munmap needs to be managed */ 232 } 233 234 /* 235 * hermon_dbr_page_alloc() 236 * first page allocation - called from attach or open 237 * in this case, we want exactly one page per call, and aligned on a 238 * page - and may need to be mapped to the user for access 239 */ 240 241 int 242 hermon_dbr_page_alloc(hermon_state_t *state, hermon_dbr_info_t **dinfo) 243 { 244 int status; 245 ddi_dma_handle_t dma_hdl; 246 ddi_acc_handle_t acc_hdl; 247 ddi_dma_attr_t dma_attr; 248 ddi_dma_cookie_t cookie; 249 uint_t cookie_cnt; 250 hermon_dbr_header_t *pagehdr; 251 int i; 252 hermon_dbr_info_t *info; 253 uint64_t dmaaddr; 254 uint64_t dmalen; 255 256 info = kmem_zalloc(sizeof (hermon_dbr_info_t), KM_SLEEP); 257 258 /* 259 * Initialize many of the default DMA attributes. Then set additional 260 * alignment restrictions if necessary for the dbr memory, meaning 261 * page aligned. Also use the configured value for IOMMU bypass 262 */ 263 hermon_dma_attr_init(state, &dma_attr); 264 dma_attr.dma_attr_align = PAGESIZE; 265 dma_attr.dma_attr_sgllen = 1; /* make sure only one cookie */ 266 267 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 268 DDI_DMA_SLEEP, NULL, &dma_hdl); 269 if (status != DDI_SUCCESS) { 270 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 271 cmn_err(CE_NOTE, "dbr DMA handle alloc failed\n"); 272 return (DDI_FAILURE); 273 } 274 275 status = ddi_dma_mem_alloc(dma_hdl, PAGESIZE, 276 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 277 NULL, (caddr_t *)&dmaaddr, (size_t *)&dmalen, &acc_hdl); 278 if (status != DDI_SUCCESS) { 279 ddi_dma_free_handle(&dma_hdl); 280 cmn_err(CE_CONT, "dbr DMA mem alloc failed(status %d)", status); 281 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 282 return (DDI_FAILURE); 283 } 284 285 /* this memory won't be IB registered, so do the bind here */ 286 status = ddi_dma_addr_bind_handle(dma_hdl, NULL, 287 (caddr_t)(uintptr_t)dmaaddr, (size_t)dmalen, DDI_DMA_RDWR | 288 DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt); 289 if (status != DDI_SUCCESS) { 290 ddi_dma_mem_free(&acc_hdl); 291 ddi_dma_free_handle(&dma_hdl); 292 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 293 cmn_err(CE_CONT, "dbr DMA bind handle failed (status %d)", 294 status); 295 return (DDI_FAILURE); 296 } 297 *dinfo = info; /* Pass back the pointer */ 298 299 /* init the info structure with returned info */ 300 info->dbr_dmahdl = dma_hdl; 301 info->dbr_acchdl = acc_hdl; 302 info->dbr_page = (caddr_t)(uintptr_t)dmaaddr; 303 /* extract the phys addr from the cookie */ 304 info->dbr_paddr = cookie.dmac_laddress; 305 /* should have everything now, so do the init of the header */ 306 pagehdr = (hermon_dbr_header_t *)(void *)info->dbr_page; 307 pagehdr->next = 0; 308 pagehdr->firstfree = 0; 309 pagehdr->nfree = HERMON_NUM_DBR_PER_PAGE; 310 pagehdr->dbr_info = info; 311 /* link all DBrs onto the free list */ 312 for (i = 0; i < HERMON_NUM_DBR_PER_PAGE; i++) { 313 pagehdr->dbr[i] = i + 1; 314 } 315 316 return (DDI_SUCCESS); 317 } 318 319 320 /* 321 * hermon_dbr_alloc() 322 * DBr record allocation - called from alloc cq/qp/srq 323 * will check for available dbrs in current 324 * page - if needed it will allocate another and link them 325 */ 326 327 int 328 hermon_dbr_alloc(hermon_state_t *state, uint_t index, ddi_acc_handle_t *acchdl, 329 hermon_dbr_t **vdbr, uint64_t *pdbr, uint64_t *mapoffset) 330 { 331 hermon_dbr_header_t *pagehdr, *lastpage; 332 hermon_dbr_t *record = NULL; 333 hermon_dbr_info_t *dinfo = NULL; 334 int status; 335 336 if (index != state->hs_kernel_uar_index) 337 return (hermon_user_dbr_alloc(state, index, acchdl, vdbr, pdbr, 338 mapoffset)); 339 340 mutex_enter(&state->hs_dbr_lock); 341 /* 'pagehdr' holds pointer to first page */ 342 pagehdr = (hermon_dbr_header_t *)(void *)state->hs_kern_dbr; 343 do { 344 lastpage = pagehdr; /* save pagehdr for later linking */ 345 if (pagehdr->nfree == 0) { 346 pagehdr = (hermon_dbr_header_t *)(void *)pagehdr->next; 347 continue; /* page is full, go to next if there is one */ 348 } 349 dinfo = pagehdr->dbr_info; 350 break; /* found a page w/ one available */ 351 } while (pagehdr != 0); 352 353 if (dinfo == NULL) { /* did NOT find a page with one available */ 354 status = hermon_dbr_page_alloc(state, &dinfo); 355 if (status != DDI_SUCCESS) { 356 /* do error handling */ 357 mutex_exit(&state->hs_dbr_lock); 358 return (DDI_FAILURE); 359 } 360 /* got a new page, so link it in. */ 361 pagehdr = (hermon_dbr_header_t *)(void *)dinfo->dbr_page; 362 lastpage->next = pagehdr; 363 } 364 record = pagehdr->dbr + pagehdr->firstfree; 365 pagehdr->firstfree = *record; 366 pagehdr->nfree--; 367 *record = 0; 368 369 *acchdl = dinfo->dbr_acchdl; 370 *vdbr = record; 371 *pdbr = ((uintptr_t)record - (uintptr_t)pagehdr + dinfo->dbr_paddr); 372 mutex_exit(&state->hs_dbr_lock); 373 return (DDI_SUCCESS); 374 } 375 376 /* 377 * hermon_dbr_free() 378 * DBr record deallocation - called from free cq/qp 379 * will update the counter in the header, and invalidate 380 * the dbr, but will NEVER free pages of dbrs - small 381 * price to pay, but userland access never will anyway 382 */ 383 384 void 385 hermon_dbr_free(hermon_state_t *state, uint_t indx, hermon_dbr_t *record) 386 { 387 hermon_dbr_header_t *pagehdr; 388 389 if (indx != state->hs_kernel_uar_index) { 390 hermon_user_dbr_free(state, indx, record); 391 return; 392 } 393 mutex_enter(&state->hs_dbr_lock); 394 pagehdr = (hermon_dbr_header_t *)((uintptr_t)record & 395 (uintptr_t)PAGEMASK); 396 *record = pagehdr->firstfree; 397 pagehdr->firstfree = record - pagehdr->dbr; 398 pagehdr->nfree++; /* decr the count for this one */ 399 mutex_exit(&state->hs_dbr_lock); 400 } 401 402 /* 403 * hermon_dbr_kern_free() 404 * Context: Can be called only from detach context. 405 * 406 * Free all kernel dbr pages. This includes the freeing of all the dma 407 * resources acquired during the allocation of the pages. 408 * 409 * Also, free all the user dbr pages. 410 */ 411 void 412 hermon_dbr_kern_free(hermon_state_t *state) 413 { 414 hermon_dbr_header_t *pagehdr, *lastpage; 415 hermon_dbr_info_t *dinfo; 416 hermon_user_dbr_t *udbr, *next; 417 hermon_udbr_page_t *pagep, *nextp; 418 hermon_umap_db_entry_t *umapdb; 419 int instance, status; 420 uint64_t value; 421 extern hermon_umap_db_t hermon_userland_rsrc_db; 422 423 mutex_enter(&state->hs_dbr_lock); 424 pagehdr = (hermon_dbr_header_t *)(void *)state->hs_kern_dbr; 425 while (pagehdr != NULL) { 426 lastpage = (hermon_dbr_header_t *)(void *)pagehdr->next; 427 dinfo = pagehdr->dbr_info; 428 (void) ddi_dma_unbind_handle(dinfo->dbr_dmahdl); 429 ddi_dma_mem_free(&dinfo->dbr_acchdl); /* free page */ 430 ddi_dma_free_handle(&dinfo->dbr_dmahdl); 431 kmem_free(dinfo, sizeof (hermon_dbr_info_t)); 432 pagehdr = lastpage; 433 } 434 435 udbr = state->hs_user_dbr; 436 instance = state->hs_instance; 437 mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock); 438 while (udbr != NULL) { 439 pagep = udbr->udbr_pagep; 440 while (pagep != NULL) { 441 /* probably need to remove "db" */ 442 (void) ddi_dma_unbind_handle(pagep->upg_dmahdl); 443 ddi_dma_free_handle(&pagep->upg_dmahdl); 444 freerbuf(pagep->upg_buf); 445 ddi_umem_free(pagep->upg_umemcookie); 446 status = hermon_umap_db_find_nolock(instance, 447 HERMON_DBR_KEY(udbr->udbr_index, 448 pagep->upg_index), MLNX_UMAP_DBRMEM_RSRC, 449 &value, HERMON_UMAP_DB_REMOVE, &umapdb); 450 if (status == DDI_SUCCESS) 451 hermon_umap_db_free(umapdb); 452 nextp = pagep->upg_link; 453 kmem_free(pagep, sizeof (*pagep)); 454 pagep = nextp; 455 } 456 next = udbr->udbr_link; 457 kmem_free(udbr, sizeof (*udbr)); 458 udbr = next; 459 } 460 mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock); 461 mutex_exit(&state->hs_dbr_lock); 462 } 463 464 /* 465 * hermon_ah_alloc() 466 * Context: Can be called only from user or kernel context. 467 */ 468 int 469 hermon_ah_alloc(hermon_state_t *state, hermon_pdhdl_t pd, 470 ibt_adds_vect_t *attr_p, hermon_ahhdl_t *ahhdl, uint_t sleepflag) 471 { 472 hermon_rsrc_t *rsrc; 473 hermon_hw_udav_t *udav; 474 hermon_ahhdl_t ah; 475 int status; 476 477 /* 478 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to 479 * indicate that we wish to allocate an "invalid" (i.e. empty) 480 * address handle XXX 481 */ 482 483 /* Validate that specified port number is legal */ 484 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) { 485 return (IBT_HCA_PORT_INVALID); 486 } 487 488 /* 489 * Allocate the software structure for tracking the address handle 490 * (i.e. the Hermon Address Handle struct). 491 */ 492 status = hermon_rsrc_alloc(state, HERMON_AHHDL, 1, sleepflag, &rsrc); 493 if (status != DDI_SUCCESS) { 494 return (IBT_INSUFF_RESOURCE); 495 } 496 ah = (hermon_ahhdl_t)rsrc->hr_addr; 497 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 498 499 /* Increment the reference count on the protection domain (PD) */ 500 hermon_pd_refcnt_inc(pd); 501 502 udav = (hermon_hw_udav_t *)kmem_zalloc(sizeof (hermon_hw_udav_t), 503 KM_SLEEP); 504 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav)) 505 506 /* 507 * Fill in the UDAV data. We first zero out the UDAV, then populate 508 * it by then calling hermon_set_addr_path() to fill in the common 509 * portions that can be pulled from the "ibt_adds_vect_t" passed in 510 */ 511 status = hermon_set_addr_path(state, attr_p, 512 (hermon_hw_addr_path_t *)udav, HERMON_ADDRPATH_UDAV); 513 if (status != DDI_SUCCESS) { 514 hermon_pd_refcnt_dec(pd); 515 hermon_rsrc_free(state, &rsrc); 516 return (status); 517 } 518 udav->pd = pd->pd_pdnum; 519 udav->sl = attr_p->av_srvl; 520 521 /* 522 * Fill in the rest of the Hermon Address Handle struct. 523 * 524 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field 525 * here because we may need to return it later to the IBTF (as a 526 * result of a subsequent query operation). Unlike the other UDAV 527 * parameters, the value of "av_dgid.gid_guid" is not always preserved. 528 * The reason for this is described in hermon_set_addr_path(). 529 */ 530 ah->ah_rsrcp = rsrc; 531 ah->ah_pdhdl = pd; 532 ah->ah_udav = udav; 533 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 534 *ahhdl = ah; 535 536 return (DDI_SUCCESS); 537 } 538 539 540 /* 541 * hermon_ah_free() 542 * Context: Can be called only from user or kernel context. 543 */ 544 /* ARGSUSED */ 545 int 546 hermon_ah_free(hermon_state_t *state, hermon_ahhdl_t *ahhdl, uint_t sleepflag) 547 { 548 hermon_rsrc_t *rsrc; 549 hermon_pdhdl_t pd; 550 hermon_ahhdl_t ah; 551 552 /* 553 * Pull all the necessary information from the Hermon Address Handle 554 * struct. This is necessary here because the resource for the 555 * AH is going to be freed up as part of this operation. 556 */ 557 ah = *ahhdl; 558 mutex_enter(&ah->ah_lock); 559 rsrc = ah->ah_rsrcp; 560 pd = ah->ah_pdhdl; 561 mutex_exit(&ah->ah_lock); 562 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 563 564 /* Free the UDAV memory */ 565 kmem_free(ah->ah_udav, sizeof (hermon_hw_udav_t)); 566 567 /* Decrement the reference count on the protection domain (PD) */ 568 hermon_pd_refcnt_dec(pd); 569 570 /* Free the Hermon Address Handle structure */ 571 hermon_rsrc_free(state, &rsrc); 572 573 /* Set the ahhdl pointer to NULL and return success */ 574 *ahhdl = NULL; 575 576 return (DDI_SUCCESS); 577 } 578 579 580 /* 581 * hermon_ah_query() 582 * Context: Can be called from interrupt or base context. 583 */ 584 /* ARGSUSED */ 585 int 586 hermon_ah_query(hermon_state_t *state, hermon_ahhdl_t ah, hermon_pdhdl_t *pd, 587 ibt_adds_vect_t *attr_p) 588 { 589 mutex_enter(&ah->ah_lock); 590 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p)) 591 592 /* 593 * Pull the PD and UDAV from the Hermon Address Handle structure 594 */ 595 *pd = ah->ah_pdhdl; 596 597 /* 598 * Fill in "ibt_adds_vect_t". We call hermon_get_addr_path() to fill 599 * the common portions that can be pulled from the UDAV we pass in. 600 * 601 * NOTE: We will also fill the "av_dgid.gid_guid" field from the 602 * "ah_save_guid" field we have previously saved away. The reason 603 * for this is described in hermon_ah_alloc() and hermon_ah_modify(). 604 */ 605 hermon_get_addr_path(state, (hermon_hw_addr_path_t *)ah->ah_udav, 606 attr_p, HERMON_ADDRPATH_UDAV); 607 608 attr_p->av_dgid.gid_guid = ah->ah_save_guid; 609 610 mutex_exit(&ah->ah_lock); 611 return (DDI_SUCCESS); 612 } 613 614 615 /* 616 * hermon_ah_modify() 617 * Context: Can be called from interrupt or base context. 618 */ 619 /* ARGSUSED */ 620 int 621 hermon_ah_modify(hermon_state_t *state, hermon_ahhdl_t ah, 622 ibt_adds_vect_t *attr_p) 623 { 624 hermon_hw_udav_t old_udav; 625 uint64_t data_old; 626 int status, size, i; 627 628 /* Validate that specified port number is legal */ 629 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) { 630 return (IBT_HCA_PORT_INVALID); 631 } 632 633 mutex_enter(&ah->ah_lock); 634 635 /* Save a copy of the current UDAV data in old_udav. */ 636 bcopy(ah->ah_udav, &old_udav, sizeof (hermon_hw_udav_t)); 637 638 /* 639 * Fill in the new UDAV with the caller's data, passed in via the 640 * "ibt_adds_vect_t" structure. 641 * 642 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid" 643 * field here (just as we did during hermon_ah_alloc()) because we 644 * may need to return it later to the IBTF (as a result of a 645 * subsequent query operation). As explained in hermon_ah_alloc(), 646 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid" 647 * is not always preserved. The reason for this is described in 648 * hermon_set_addr_path(). 649 */ 650 status = hermon_set_addr_path(state, attr_p, 651 (hermon_hw_addr_path_t *)ah->ah_udav, HERMON_ADDRPATH_UDAV); 652 if (status != DDI_SUCCESS) { 653 mutex_exit(&ah->ah_lock); 654 return (status); 655 } 656 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 657 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(ah->ah_udav))) 658 ah->ah_udav->sl = attr_p->av_srvl; 659 660 /* 661 * Copy changes into the new UDAV. 662 * Note: We copy in 64-bit chunks. For the first two of these 663 * chunks it is necessary to read the current contents of the 664 * UDAV, mask off the modifiable portions (maintaining any 665 * of the "reserved" portions), and then mask on the new data. 666 */ 667 size = sizeof (hermon_hw_udav_t) >> 3; 668 for (i = 0; i < size; i++) { 669 data_old = ((uint64_t *)&old_udav)[i]; 670 671 /* 672 * Apply mask to change only the relevant values. 673 */ 674 if (i == 0) { 675 data_old = data_old & HERMON_UDAV_MODIFY_MASK0; 676 } else if (i == 1) { 677 data_old = data_old & HERMON_UDAV_MODIFY_MASK1; 678 } else { 679 data_old = 0; 680 } 681 682 /* Store the updated values to the UDAV */ 683 ((uint64_t *)ah->ah_udav)[i] |= data_old; 684 } 685 686 /* 687 * Put the valid PD number back into the UDAV entry, as it 688 * might have been clobbered above. 689 */ 690 ah->ah_udav->pd = old_udav.pd; 691 692 693 mutex_exit(&ah->ah_lock); 694 return (DDI_SUCCESS); 695 } 696 697 /* 698 * hermon_mcg_attach() 699 * Context: Can be called only from user or kernel context. 700 */ 701 int 702 hermon_mcg_attach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid, 703 ib_lid_t lid) 704 { 705 hermon_rsrc_t *rsrc; 706 hermon_hw_mcg_t *mcg_entry; 707 hermon_hw_mcg_qp_list_t *mcg_entry_qplist; 708 hermon_mcghdl_t mcg, newmcg; 709 uint64_t mgid_hash; 710 uint32_t end_indx; 711 int status; 712 uint_t qp_found; 713 714 /* 715 * It is only allowed to attach MCG to UD queue pairs. Verify 716 * that the intended QP is of the appropriate transport type 717 */ 718 if (qp->qp_serv_type != HERMON_QP_UD) { 719 return (IBT_QP_SRV_TYPE_INVALID); 720 } 721 722 /* 723 * Check for invalid Multicast DLID. Specifically, all Multicast 724 * LIDs should be within a well defined range. If the specified LID 725 * is outside of that range, then return an error. 726 */ 727 if (hermon_mlid_is_valid(lid) == 0) { 728 return (IBT_MC_MLID_INVALID); 729 } 730 /* 731 * Check for invalid Multicast GID. All Multicast GIDs should have 732 * a well-defined pattern of bits and flags that are allowable. If 733 * the specified GID does not meet the criteria, then return an error. 734 */ 735 if (hermon_mgid_is_valid(gid) == 0) { 736 return (IBT_MC_MGID_INVALID); 737 } 738 739 /* 740 * Compute the MGID hash value. Since the MCG table is arranged as 741 * a number of separate hash chains, this operation converts the 742 * specified MGID into the starting index of an entry in the hash 743 * table (i.e. the index for the start of the appropriate hash chain). 744 * Subsequent operations below will walk the chain searching for the 745 * right place to add this new QP. 746 */ 747 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 748 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT()); 749 if (status != HERMON_CMD_SUCCESS) { 750 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n", 751 status); 752 if (status == HERMON_CMD_INVALID_STATUS) { 753 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 754 } 755 return (ibc_get_ci_failure(0)); 756 } 757 758 /* 759 * Grab the multicast group mutex. Then grab the pre-allocated 760 * temporary buffer used for holding and/or modifying MCG entries. 761 * Zero out the temporary MCG entry before we begin. 762 */ 763 mutex_enter(&state->hs_mcglock); 764 mcg_entry = state->hs_mcgtmp; 765 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry); 766 bzero(mcg_entry, HERMON_MCGMEM_SZ(state)); 767 768 /* 769 * Walk through the array of MCG entries starting at "mgid_hash". 770 * Try to find the appropriate place for this new QP to be added. 771 * This could happen when the first entry of the chain has MGID == 0 772 * (which means that the hash chain is empty), or because we find 773 * an entry with the same MGID (in which case we'll add the QP to 774 * that MCG), or because we come to the end of the chain (in which 775 * case this is the first QP being added to the multicast group that 776 * corresponds to the MGID. The hermon_mcg_walk_mgid_hash() routine 777 * walks the list and returns an index into the MCG table. The entry 778 * at this index is then checked to determine which case we have 779 * fallen into (see below). Note: We are using the "shadow" MCG 780 * list (of hermon_mcg_t structs) for this lookup because the real 781 * MCG entries are in hardware (and the lookup process would be much 782 * more time consuming). 783 */ 784 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL); 785 mcg = &state->hs_mcghdl[end_indx]; 786 787 /* 788 * If MGID == 0, then the hash chain is empty. Just fill in the 789 * current entry. Note: No need to allocate an MCG table entry 790 * as all the hash chain "heads" are already preallocated. 791 */ 792 if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) { 793 794 /* Fill in the current entry in the "shadow" MCG list */ 795 hermon_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL); 796 797 /* 798 * Try to add the new QP number to the list. This (and the 799 * above) routine fills in a temporary MCG. The "mcg_entry" 800 * and "mcg_entry_qplist" pointers simply point to different 801 * offsets within the same temporary copy of the MCG (for 802 * convenience). Note: If this fails, we need to invalidate 803 * the entries we've already put into the "shadow" list entry 804 * above. 805 */ 806 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 807 &qp_found); 808 if (status != DDI_SUCCESS) { 809 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s)); 810 mutex_exit(&state->hs_mcglock); 811 return (status); 812 } 813 if (!qp_found) 814 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1); 815 /* set the member count */ 816 817 /* 818 * Once the temporary MCG has been filled in, write the entry 819 * into the appropriate location in the Hermon MCG entry table. 820 * If it's successful, then drop the lock and return success. 821 * Note: In general, this operation shouldn't fail. If it 822 * does, then it is an indication that something (probably in 823 * HW, but maybe in SW) has gone seriously wrong. We still 824 * want to zero out the entries that we've filled in above 825 * (in the hermon_mcg_setup_new_hdr() routine). 826 */ 827 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 828 HERMON_CMD_NOSLEEP_SPIN); 829 if (status != HERMON_CMD_SUCCESS) { 830 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s)); 831 mutex_exit(&state->hs_mcglock); 832 HERMON_WARNING(state, "failed to write MCG entry"); 833 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 834 "%08x\n", status); 835 if (status == HERMON_CMD_INVALID_STATUS) { 836 hermon_fm_ereport(state, HCA_SYS_ERR, 837 HCA_ERR_SRV_LOST); 838 } 839 return (ibc_get_ci_failure(0)); 840 } 841 842 /* 843 * Now that we know all the Hermon firmware accesses have been 844 * successful, we update the "shadow" MCG entry by incrementing 845 * the "number of attached QPs" count. 846 * 847 * We increment only if the QP is not already part of the 848 * MCG by checking the 'qp_found' flag returned from the 849 * qplist_add above. 850 */ 851 if (!qp_found) { 852 mcg->mcg_num_qps++; 853 854 /* 855 * Increment the refcnt for this QP. Because the QP 856 * was added to this MCG, the refcnt must be 857 * incremented. 858 */ 859 hermon_qp_mcg_refcnt_inc(qp); 860 } 861 862 /* 863 * We drop the lock and return success. 864 */ 865 mutex_exit(&state->hs_mcglock); 866 return (DDI_SUCCESS); 867 } 868 869 /* 870 * If the specified MGID matches the MGID in the current entry, then 871 * we need to try to add the QP to the current MCG entry. In this 872 * case, it means that we need to read the existing MCG entry (into 873 * the temporary MCG), add the new QP number to the temporary entry 874 * (using the same method we used above), and write the entry back 875 * to the hardware (same as above). 876 */ 877 if ((mcg->mcg_mgid_h == gid.gid_prefix) && 878 (mcg->mcg_mgid_l == gid.gid_guid)) { 879 880 /* 881 * Read the current MCG entry into the temporary MCG. Note: 882 * In general, this operation shouldn't fail. If it does, 883 * then it is an indication that something (probably in HW, 884 * but maybe in SW) has gone seriously wrong. 885 */ 886 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 887 HERMON_CMD_NOSLEEP_SPIN); 888 if (status != HERMON_CMD_SUCCESS) { 889 mutex_exit(&state->hs_mcglock); 890 HERMON_WARNING(state, "failed to read MCG entry"); 891 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: " 892 "%08x\n", status); 893 if (status == HERMON_CMD_INVALID_STATUS) { 894 hermon_fm_ereport(state, HCA_SYS_ERR, 895 HCA_ERR_SRV_LOST); 896 } 897 return (ibc_get_ci_failure(0)); 898 } 899 900 /* 901 * Try to add the new QP number to the list. This routine 902 * fills in the necessary pieces of the temporary MCG. The 903 * "mcg_entry_qplist" pointer is used to point to the portion 904 * of the temporary MCG that holds the QP numbers. 905 * 906 * Note: hermon_mcg_qplist_add() returns SUCCESS if it 907 * already found the QP in the list. In this case, the QP is 908 * not added on to the list again. Check the flag 'qp_found' 909 * if this value is needed to be known. 910 * 911 */ 912 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 913 &qp_found); 914 if (status != DDI_SUCCESS) { 915 mutex_exit(&state->hs_mcglock); 916 return (status); 917 } 918 if (!qp_found) 919 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1); 920 /* set the member count */ 921 922 /* 923 * Once the temporary MCG has been updated, write the entry 924 * into the appropriate location in the Hermon MCG entry table. 925 * If it's successful, then drop the lock and return success. 926 * Note: In general, this operation shouldn't fail. If it 927 * does, then it is an indication that something (probably in 928 * HW, but maybe in SW) has gone seriously wrong. 929 */ 930 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 931 HERMON_CMD_NOSLEEP_SPIN); 932 if (status != HERMON_CMD_SUCCESS) { 933 mutex_exit(&state->hs_mcglock); 934 HERMON_WARNING(state, "failed to write MCG entry"); 935 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 936 "%08x\n", status); 937 if (status == HERMON_CMD_INVALID_STATUS) { 938 hermon_fm_ereport(state, HCA_SYS_ERR, 939 HCA_ERR_SRV_LOST); 940 } 941 return (ibc_get_ci_failure(0)); 942 } 943 944 /* 945 * Now that we know all the Hermon firmware accesses have been 946 * successful, we update the current "shadow" MCG entry by 947 * incrementing the "number of attached QPs" count. 948 * 949 * We increment only if the QP is not already part of the 950 * MCG by checking the 'qp_found' flag returned 951 * hermon_mcg_walk_mgid_hashfrom the qplist_add above. 952 */ 953 if (!qp_found) { 954 mcg->mcg_num_qps++; 955 956 /* 957 * Increment the refcnt for this QP. Because the QP 958 * was added to this MCG, the refcnt must be 959 * incremented. 960 */ 961 hermon_qp_mcg_refcnt_inc(qp); 962 } 963 964 /* 965 * We drop the lock and return success. 966 */ 967 mutex_exit(&state->hs_mcglock); 968 return (DDI_SUCCESS); 969 } 970 971 /* 972 * If we've reached here, then we're at the end of the hash chain. 973 * We need to allocate a new MCG entry, fill it in, write it to Hermon, 974 * and update the previous entry to link the new one to the end of the 975 * chain. 976 */ 977 978 /* 979 * Allocate an MCG table entry. This will be filled in with all 980 * the necessary parameters to define the multicast group. Then it 981 * will be written to the hardware in the next-to-last step below. 982 */ 983 status = hermon_rsrc_alloc(state, HERMON_MCG, 1, HERMON_NOSLEEP, &rsrc); 984 if (status != DDI_SUCCESS) { 985 mutex_exit(&state->hs_mcglock); 986 return (IBT_INSUFF_RESOURCE); 987 } 988 989 /* 990 * Fill in the new entry in the "shadow" MCG list. Note: Just as 991 * it does above, hermon_mcg_setup_new_hdr() also fills in a portion 992 * of the temporary MCG entry (the rest of which will be filled in by 993 * hermon_mcg_qplist_add() below) 994 */ 995 newmcg = &state->hs_mcghdl[rsrc->hr_indx]; 996 hermon_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc); 997 998 /* 999 * Try to add the new QP number to the list. This routine fills in 1000 * the final necessary pieces of the temporary MCG. The 1001 * "mcg_entry_qplist" pointer is used to point to the portion of the 1002 * temporary MCG that holds the QP numbers. If we fail here, we 1003 * must undo the previous resource allocation. 1004 * 1005 * Note: hermon_mcg_qplist_add() can we return SUCCESS if it already 1006 * found the QP in the list. In this case, the QP is not added on to 1007 * the list again. Check the flag 'qp_found' if this value is needed 1008 * to be known. 1009 */ 1010 status = hermon_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp, 1011 &qp_found); 1012 if (status != DDI_SUCCESS) { 1013 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1014 hermon_rsrc_free(state, &rsrc); 1015 mutex_exit(&state->hs_mcglock); 1016 return (status); 1017 } 1018 mcg_entry->member_cnt = (newmcg->mcg_num_qps + 1); 1019 /* set the member count */ 1020 1021 /* 1022 * Once the temporary MCG has been updated, write the entry into the 1023 * appropriate location in the Hermon MCG entry table. If this is 1024 * successful, then we need to chain the previous entry to this one. 1025 * Note: In general, this operation shouldn't fail. If it does, then 1026 * it is an indication that something (probably in HW, but maybe in 1027 * SW) has gone seriously wrong. 1028 */ 1029 status = hermon_write_mgm_cmd_post(state, mcg_entry, rsrc->hr_indx, 1030 HERMON_CMD_NOSLEEP_SPIN); 1031 if (status != HERMON_CMD_SUCCESS) { 1032 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1033 hermon_rsrc_free(state, &rsrc); 1034 mutex_exit(&state->hs_mcglock); 1035 HERMON_WARNING(state, "failed to write MCG entry"); 1036 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1037 status); 1038 if (status == HERMON_CMD_INVALID_STATUS) { 1039 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1040 } 1041 return (ibc_get_ci_failure(0)); 1042 } 1043 1044 /* 1045 * Now read the current MCG entry (the one previously at the end of 1046 * hash chain) into the temporary MCG. We are going to update its 1047 * "next_gid_indx" now and write the entry back to the MCG table. 1048 * Note: In general, this operation shouldn't fail. If it does, then 1049 * it is an indication that something (probably in HW, but maybe in SW) 1050 * has gone seriously wrong. We will free up the MCG entry resource, 1051 * but we will not undo the previously written MCG entry in the HW. 1052 * This is OK, though, because the MCG entry is not currently attached 1053 * to any hash chain. 1054 */ 1055 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 1056 HERMON_CMD_NOSLEEP_SPIN); 1057 if (status != HERMON_CMD_SUCCESS) { 1058 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1059 hermon_rsrc_free(state, &rsrc); 1060 mutex_exit(&state->hs_mcglock); 1061 HERMON_WARNING(state, "failed to read MCG entry"); 1062 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1063 status); 1064 if (status == HERMON_CMD_INVALID_STATUS) { 1065 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1066 } 1067 return (ibc_get_ci_failure(0)); 1068 } 1069 1070 /* 1071 * Finally, we update the "next_gid_indx" field in the temporary MCG 1072 * and attempt to write the entry back into the Hermon MCG table. If 1073 * this succeeds, then we update the "shadow" list to reflect the 1074 * change, drop the lock, and return success. Note: In general, this 1075 * operation shouldn't fail. If it does, then it is an indication 1076 * that something (probably in HW, but maybe in SW) has gone seriously 1077 * wrong. Just as we do above, we will free up the MCG entry resource, 1078 * but we will not try to undo the previously written MCG entry. This 1079 * is OK, though, because (since we failed here to update the end of 1080 * the chain) that other entry is not currently attached to any chain. 1081 */ 1082 mcg_entry->next_gid_indx = rsrc->hr_indx; 1083 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 1084 HERMON_CMD_NOSLEEP_SPIN); 1085 if (status != HERMON_CMD_SUCCESS) { 1086 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1087 hermon_rsrc_free(state, &rsrc); 1088 mutex_exit(&state->hs_mcglock); 1089 HERMON_WARNING(state, "failed to write MCG entry"); 1090 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1091 status); 1092 if (status == HERMON_CMD_INVALID_STATUS) { 1093 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1094 } 1095 return (ibc_get_ci_failure(0)); 1096 } 1097 mcg = &state->hs_mcghdl[end_indx]; 1098 mcg->mcg_next_indx = rsrc->hr_indx; 1099 1100 /* 1101 * Now that we know all the Hermon firmware accesses have been 1102 * successful, we update the new "shadow" MCG entry by incrementing 1103 * the "number of attached QPs" count. Then we drop the lock and 1104 * return success. 1105 */ 1106 newmcg->mcg_num_qps++; 1107 1108 /* 1109 * Increment the refcnt for this QP. Because the QP 1110 * was added to this MCG, the refcnt must be 1111 * incremented. 1112 */ 1113 hermon_qp_mcg_refcnt_inc(qp); 1114 1115 mutex_exit(&state->hs_mcglock); 1116 return (DDI_SUCCESS); 1117 } 1118 1119 1120 /* 1121 * hermon_mcg_detach() 1122 * Context: Can be called only from user or kernel context. 1123 */ 1124 int 1125 hermon_mcg_detach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid, 1126 ib_lid_t lid) 1127 { 1128 hermon_hw_mcg_t *mcg_entry; 1129 hermon_hw_mcg_qp_list_t *mcg_entry_qplist; 1130 hermon_mcghdl_t mcg; 1131 uint64_t mgid_hash; 1132 uint32_t end_indx, prev_indx; 1133 int status; 1134 1135 /* 1136 * Check for invalid Multicast DLID. Specifically, all Multicast 1137 * LIDs should be within a well defined range. If the specified LID 1138 * is outside of that range, then return an error. 1139 */ 1140 if (hermon_mlid_is_valid(lid) == 0) { 1141 return (IBT_MC_MLID_INVALID); 1142 } 1143 1144 /* 1145 * Compute the MGID hash value. As described above, the MCG table is 1146 * arranged as a number of separate hash chains. This operation 1147 * converts the specified MGID into the starting index of an entry in 1148 * the hash table (i.e. the index for the start of the appropriate 1149 * hash chain). Subsequent operations below will walk the chain 1150 * searching for a matching entry from which to attempt to remove 1151 * the specified QP. 1152 */ 1153 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 1154 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT()); 1155 if (status != HERMON_CMD_SUCCESS) { 1156 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n", 1157 status); 1158 if (status == HERMON_CMD_INVALID_STATUS) { 1159 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1160 } 1161 return (ibc_get_ci_failure(0)); 1162 } 1163 1164 /* 1165 * Grab the multicast group mutex. Then grab the pre-allocated 1166 * temporary buffer used for holding and/or modifying MCG entries. 1167 */ 1168 mutex_enter(&state->hs_mcglock); 1169 mcg_entry = state->hs_mcgtmp; 1170 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry); 1171 1172 /* 1173 * Walk through the array of MCG entries starting at "mgid_hash". 1174 * Try to find an MCG entry with a matching MGID. The 1175 * hermon_mcg_walk_mgid_hash() routine walks the list and returns an 1176 * index into the MCG table. The entry at this index is checked to 1177 * determine whether it is a match or not. If it is a match, then 1178 * we continue on to attempt to remove the QP from the MCG. If it 1179 * is not a match (or not a valid MCG entry), then we return an error. 1180 */ 1181 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx); 1182 mcg = &state->hs_mcghdl[end_indx]; 1183 1184 /* 1185 * If MGID == 0 (the hash chain is empty) or if the specified MGID 1186 * does not match the MGID in the current entry, then return 1187 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not 1188 * valid). 1189 */ 1190 if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) || 1191 ((mcg->mcg_mgid_h != gid.gid_prefix) || 1192 (mcg->mcg_mgid_l != gid.gid_guid))) { 1193 mutex_exit(&state->hs_mcglock); 1194 return (IBT_MC_MGID_INVALID); 1195 } 1196 1197 /* 1198 * Read the current MCG entry into the temporary MCG. Note: In 1199 * general, this operation shouldn't fail. If it does, then it is 1200 * an indication that something (probably in HW, but maybe in SW) 1201 * has gone seriously wrong. 1202 */ 1203 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 1204 HERMON_CMD_NOSLEEP_SPIN); 1205 if (status != HERMON_CMD_SUCCESS) { 1206 mutex_exit(&state->hs_mcglock); 1207 HERMON_WARNING(state, "failed to read MCG entry"); 1208 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1209 status); 1210 if (status == HERMON_CMD_INVALID_STATUS) { 1211 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1212 } 1213 return (ibc_get_ci_failure(0)); 1214 } 1215 1216 /* 1217 * Search the QP number list for a match. If a match is found, then 1218 * remove the entry from the QP list. Otherwise, if no match is found, 1219 * return an error. 1220 */ 1221 status = hermon_mcg_qplist_remove(mcg, mcg_entry_qplist, qp); 1222 if (status != DDI_SUCCESS) { 1223 mutex_exit(&state->hs_mcglock); 1224 return (status); 1225 } 1226 1227 /* 1228 * Decrement the MCG count for this QP. When the 'qp_mcg' 1229 * field becomes 0, then this QP is no longer a member of any 1230 * MCG. 1231 */ 1232 hermon_qp_mcg_refcnt_dec(qp); 1233 1234 /* 1235 * If the current MCG's QP number list is about to be made empty 1236 * ("mcg_num_qps" == 1), then remove the entry itself from the hash 1237 * chain. Otherwise, just write the updated MCG entry back to the 1238 * hardware. In either case, once we successfully update the hardware 1239 * chain, then we decrement the "shadow" list entry's "mcg_num_qps" 1240 * count (or zero out the entire "shadow" list entry) before returning 1241 * success. Note: Zeroing out the "shadow" list entry is done 1242 * inside of hermon_mcg_hash_list_remove(). 1243 */ 1244 if (mcg->mcg_num_qps == 1) { 1245 1246 /* Remove an MCG entry from the hash chain */ 1247 status = hermon_mcg_hash_list_remove(state, end_indx, prev_indx, 1248 mcg_entry); 1249 if (status != DDI_SUCCESS) { 1250 mutex_exit(&state->hs_mcglock); 1251 return (status); 1252 } 1253 1254 } else { 1255 /* 1256 * Write the updated MCG entry back to the Hermon MCG table. 1257 * If this succeeds, then we update the "shadow" list to 1258 * reflect the change (i.e. decrement the "mcg_num_qps"), 1259 * drop the lock, and return success. Note: In general, 1260 * this operation shouldn't fail. If it does, then it is an 1261 * indication that something (probably in HW, but maybe in SW) 1262 * has gone seriously wrong. 1263 */ 1264 mcg_entry->member_cnt = (mcg->mcg_num_qps - 1); 1265 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 1266 HERMON_CMD_NOSLEEP_SPIN); 1267 if (status != HERMON_CMD_SUCCESS) { 1268 mutex_exit(&state->hs_mcglock); 1269 HERMON_WARNING(state, "failed to write MCG entry"); 1270 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 1271 "%08x\n", status); 1272 if (status == HERMON_CMD_INVALID_STATUS) { 1273 hermon_fm_ereport(state, HCA_SYS_ERR, 1274 HCA_ERR_SRV_LOST); 1275 } 1276 return (ibc_get_ci_failure(0)); 1277 } 1278 mcg->mcg_num_qps--; 1279 } 1280 1281 mutex_exit(&state->hs_mcglock); 1282 return (DDI_SUCCESS); 1283 } 1284 1285 /* 1286 * hermon_qp_mcg_refcnt_inc() 1287 * Context: Can be called from interrupt or base context. 1288 */ 1289 static void 1290 hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp) 1291 { 1292 /* Increment the QP's MCG reference count */ 1293 mutex_enter(&qp->qp_lock); 1294 qp->qp_mcg_refcnt++; 1295 mutex_exit(&qp->qp_lock); 1296 } 1297 1298 1299 /* 1300 * hermon_qp_mcg_refcnt_dec() 1301 * Context: Can be called from interrupt or base context. 1302 */ 1303 static void 1304 hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp) 1305 { 1306 /* Decrement the QP's MCG reference count */ 1307 mutex_enter(&qp->qp_lock); 1308 qp->qp_mcg_refcnt--; 1309 mutex_exit(&qp->qp_lock); 1310 } 1311 1312 1313 /* 1314 * hermon_mcg_qplist_add() 1315 * Context: Can be called from interrupt or base context. 1316 */ 1317 static int 1318 hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg, 1319 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, 1320 uint_t *qp_found) 1321 { 1322 uint_t qplist_indx; 1323 1324 ASSERT(MUTEX_HELD(&state->hs_mcglock)); 1325 1326 qplist_indx = mcg->mcg_num_qps; 1327 1328 /* 1329 * Determine if we have exceeded the maximum number of QP per 1330 * multicast group. If we have, then return an error 1331 */ 1332 if (qplist_indx >= state->hs_cfg_profile->cp_num_qp_per_mcg) { 1333 return (IBT_HCA_MCG_QP_EXCEEDED); 1334 } 1335 1336 /* 1337 * Determine if the QP is already attached to this MCG table. If it 1338 * is, then we break out and treat this operation as a NO-OP 1339 */ 1340 for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps; 1341 qplist_indx++) { 1342 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) { 1343 break; 1344 } 1345 } 1346 1347 /* 1348 * If the QP was already on the list, set 'qp_found' to TRUE. We still 1349 * return SUCCESS in this case, but the qplist will not have been 1350 * updated because the QP was already on the list. 1351 */ 1352 if (qplist_indx < mcg->mcg_num_qps) { 1353 *qp_found = 1; 1354 } else { 1355 /* 1356 * Otherwise, append the new QP number to the end of the 1357 * current QP list. Note: We will increment the "mcg_num_qps" 1358 * field on the "shadow" MCG list entry later (after we know 1359 * that all necessary Hermon firmware accesses have been 1360 * successful). 1361 * 1362 * Set 'qp_found' to 0 so we know the QP was added on to the 1363 * list for sure. 1364 */ 1365 mcg_qplist[qplist_indx].qpn = 1366 (qp->qp_qpnum | HERMON_MCG_QPN_BLOCK_LB); 1367 *qp_found = 0; 1368 } 1369 1370 return (DDI_SUCCESS); 1371 } 1372 1373 1374 1375 /* 1376 * hermon_mcg_qplist_remove() 1377 * Context: Can be called from interrupt or base context. 1378 */ 1379 static int 1380 hermon_mcg_qplist_remove(hermon_mcghdl_t mcg, 1381 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp) 1382 { 1383 uint_t i, qplist_indx; 1384 1385 /* 1386 * Search the MCG QP list for a matching QPN. When 1387 * it's found, we swap the last entry with the current 1388 * one, set the last entry to zero, decrement the last 1389 * entry, and return. If it's not found, then it's 1390 * and error. 1391 */ 1392 qplist_indx = mcg->mcg_num_qps; 1393 for (i = 0; i < qplist_indx; i++) { 1394 if (mcg_qplist[i].qpn == qp->qp_qpnum) { 1395 mcg_qplist[i] = mcg_qplist[qplist_indx - 1]; 1396 mcg_qplist[qplist_indx - 1].qpn = 0; 1397 1398 return (DDI_SUCCESS); 1399 } 1400 } 1401 1402 return (IBT_QP_HDL_INVALID); 1403 } 1404 1405 1406 /* 1407 * hermon_mcg_walk_mgid_hash() 1408 * Context: Can be called from interrupt or base context. 1409 */ 1410 static uint_t 1411 hermon_mcg_walk_mgid_hash(hermon_state_t *state, uint64_t start_indx, 1412 ib_gid_t mgid, uint_t *p_indx) 1413 { 1414 hermon_mcghdl_t curr_mcghdl; 1415 uint_t curr_indx, prev_indx; 1416 1417 ASSERT(MUTEX_HELD(&state->hs_mcglock)); 1418 1419 /* Start at the head of the hash chain */ 1420 curr_indx = (uint_t)start_indx; 1421 prev_indx = curr_indx; 1422 curr_mcghdl = &state->hs_mcghdl[curr_indx]; 1423 1424 /* If the first entry in the chain has MGID == 0, then stop */ 1425 if ((curr_mcghdl->mcg_mgid_h == 0) && 1426 (curr_mcghdl->mcg_mgid_l == 0)) { 1427 goto end_mgid_hash_walk; 1428 } 1429 1430 /* If the first entry in the chain matches the MGID, then stop */ 1431 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1432 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1433 goto end_mgid_hash_walk; 1434 } 1435 1436 /* Otherwise, walk the hash chain looking for a match */ 1437 while (curr_mcghdl->mcg_next_indx != 0) { 1438 prev_indx = curr_indx; 1439 curr_indx = curr_mcghdl->mcg_next_indx; 1440 curr_mcghdl = &state->hs_mcghdl[curr_indx]; 1441 1442 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1443 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1444 break; 1445 } 1446 } 1447 1448 end_mgid_hash_walk: 1449 /* 1450 * If necessary, return the index of the previous entry too. This 1451 * is primarily used for detaching a QP from a multicast group. It 1452 * may be necessary, in that case, to delete an MCG entry from the 1453 * hash chain and having the index of the previous entry is helpful. 1454 */ 1455 if (p_indx != NULL) { 1456 *p_indx = prev_indx; 1457 } 1458 return (curr_indx); 1459 } 1460 1461 1462 /* 1463 * hermon_mcg_setup_new_hdr() 1464 * Context: Can be called from interrupt or base context. 1465 */ 1466 static void 1467 hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, hermon_hw_mcg_t *mcg_hdr, 1468 ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc) 1469 { 1470 /* 1471 * Fill in the fields of the "shadow" entry used by software 1472 * to track MCG hardware entry 1473 */ 1474 mcg->mcg_mgid_h = mgid.gid_prefix; 1475 mcg->mcg_mgid_l = mgid.gid_guid; 1476 mcg->mcg_rsrcp = mcg_rsrc; 1477 mcg->mcg_next_indx = 0; 1478 mcg->mcg_num_qps = 0; 1479 1480 /* 1481 * Fill the header fields of the MCG entry (in the temporary copy) 1482 */ 1483 mcg_hdr->mgid_h = mgid.gid_prefix; 1484 mcg_hdr->mgid_l = mgid.gid_guid; 1485 mcg_hdr->next_gid_indx = 0; 1486 } 1487 1488 1489 /* 1490 * hermon_mcg_hash_list_remove() 1491 * Context: Can be called only from user or kernel context. 1492 */ 1493 static int 1494 hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx, 1495 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry) 1496 { 1497 hermon_mcghdl_t curr_mcg, prev_mcg, next_mcg; 1498 uint_t next_indx; 1499 int status; 1500 1501 /* Get the pointer to "shadow" list for current entry */ 1502 curr_mcg = &state->hs_mcghdl[curr_indx]; 1503 1504 /* 1505 * If this is the first entry on a hash chain, then attempt to replace 1506 * the entry with the next entry on the chain. If there are no 1507 * subsequent entries on the chain, then this is the only entry and 1508 * should be invalidated. 1509 */ 1510 if (curr_indx == prev_indx) { 1511 1512 /* 1513 * If this is the only entry on the chain, then invalidate it. 1514 * Note: Invalidating an MCG entry means writing all zeros 1515 * to the entry. This is only necessary for those MCG 1516 * entries that are the "head" entries of the individual hash 1517 * chains. Regardless of whether this operation returns 1518 * success or failure, return that result to the caller. 1519 */ 1520 next_indx = curr_mcg->mcg_next_indx; 1521 if (next_indx == 0) { 1522 status = hermon_mcg_entry_invalidate(state, mcg_entry, 1523 curr_indx); 1524 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1525 return (status); 1526 } 1527 1528 /* 1529 * Otherwise, this is just the first entry on the chain, so 1530 * grab the next one 1531 */ 1532 next_mcg = &state->hs_mcghdl[next_indx]; 1533 1534 /* 1535 * Read the next MCG entry into the temporary MCG. Note: 1536 * In general, this operation shouldn't fail. If it does, 1537 * then it is an indication that something (probably in HW, 1538 * but maybe in SW) has gone seriously wrong. 1539 */ 1540 status = hermon_read_mgm_cmd_post(state, mcg_entry, next_indx, 1541 HERMON_CMD_NOSLEEP_SPIN); 1542 if (status != HERMON_CMD_SUCCESS) { 1543 HERMON_WARNING(state, "failed to read MCG entry"); 1544 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: " 1545 "%08x\n", status); 1546 if (status == HERMON_CMD_INVALID_STATUS) { 1547 hermon_fm_ereport(state, HCA_SYS_ERR, 1548 HCA_ERR_SRV_LOST); 1549 } 1550 return (ibc_get_ci_failure(0)); 1551 } 1552 1553 /* 1554 * Copy/Write the temporary MCG back to the hardware MCG list 1555 * using the current index. This essentially removes the 1556 * current MCG entry from the list by writing over it with 1557 * the next one. If this is successful, then we can do the 1558 * same operation for the "shadow" list. And we can also 1559 * free up the Hermon MCG entry resource that was associated 1560 * with the (old) next entry. Note: In general, this 1561 * operation shouldn't fail. If it does, then it is an 1562 * indication that something (probably in HW, but maybe in SW) 1563 * has gone seriously wrong. 1564 */ 1565 status = hermon_write_mgm_cmd_post(state, mcg_entry, curr_indx, 1566 HERMON_CMD_NOSLEEP_SPIN); 1567 if (status != HERMON_CMD_SUCCESS) { 1568 HERMON_WARNING(state, "failed to write MCG entry"); 1569 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 1570 "%08x\n", status); 1571 if (status == HERMON_CMD_INVALID_STATUS) { 1572 hermon_fm_ereport(state, HCA_SYS_ERR, 1573 HCA_ERR_SRV_LOST); 1574 } 1575 return (ibc_get_ci_failure(0)); 1576 } 1577 1578 /* 1579 * Copy all the software tracking information from the next 1580 * entry on the "shadow" MCG list into the current entry on 1581 * the list. Then invalidate (zero out) the other "shadow" 1582 * list entry. 1583 */ 1584 bcopy(next_mcg, curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1585 bzero(next_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1586 1587 /* 1588 * Free up the Hermon MCG entry resource used by the "next" 1589 * MCG entry. That resource is no longer needed by any 1590 * MCG entry which is first on a hash chain (like the "next" 1591 * entry has just become). 1592 */ 1593 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1594 1595 return (DDI_SUCCESS); 1596 } 1597 1598 /* 1599 * Else if this is the last entry on the hash chain (or a middle 1600 * entry, then we update the previous entry's "next_gid_index" field 1601 * to make it point instead to the next entry on the chain. By 1602 * skipping over the removed entry in this way, we can then free up 1603 * any resources associated with the current entry. Note: We don't 1604 * need to invalidate the "skipped over" hardware entry because it 1605 * will no be longer connected to any hash chains, and if/when it is 1606 * finally re-used, it will be written with entirely new values. 1607 */ 1608 1609 /* 1610 * Read the next MCG entry into the temporary MCG. Note: In general, 1611 * this operation shouldn't fail. If it does, then it is an 1612 * indication that something (probably in HW, but maybe in SW) has 1613 * gone seriously wrong. 1614 */ 1615 status = hermon_read_mgm_cmd_post(state, mcg_entry, prev_indx, 1616 HERMON_CMD_NOSLEEP_SPIN); 1617 if (status != HERMON_CMD_SUCCESS) { 1618 HERMON_WARNING(state, "failed to read MCG entry"); 1619 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1620 status); 1621 if (status == HERMON_CMD_INVALID_STATUS) { 1622 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1623 } 1624 return (ibc_get_ci_failure(0)); 1625 } 1626 1627 /* 1628 * Finally, we update the "next_gid_indx" field in the temporary MCG 1629 * and attempt to write the entry back into the Hermon MCG table. If 1630 * this succeeds, then we update the "shadow" list to reflect the 1631 * change, free up the Hermon MCG entry resource that was associated 1632 * with the current entry, and return success. Note: In general, 1633 * this operation shouldn't fail. If it does, then it is an indication 1634 * that something (probably in HW, but maybe in SW) has gone seriously 1635 * wrong. 1636 */ 1637 mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx; 1638 status = hermon_write_mgm_cmd_post(state, mcg_entry, prev_indx, 1639 HERMON_CMD_NOSLEEP_SPIN); 1640 if (status != HERMON_CMD_SUCCESS) { 1641 HERMON_WARNING(state, "failed to write MCG entry"); 1642 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1643 status); 1644 if (status == HERMON_CMD_INVALID_STATUS) { 1645 hermon_fm_ereport(state, HCA_SYS_ERR, 1646 HCA_ERR_SRV_LOST); 1647 } 1648 return (ibc_get_ci_failure(0)); 1649 } 1650 1651 /* 1652 * Get the pointer to the "shadow" MCG list entry for the previous 1653 * MCG. Update its "mcg_next_indx" to point to the next entry 1654 * the one after the current entry. Note: This next index may be 1655 * zero, indicating the end of the list. 1656 */ 1657 prev_mcg = &state->hs_mcghdl[prev_indx]; 1658 prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx; 1659 1660 /* 1661 * Free up the Hermon MCG entry resource used by the current entry. 1662 * This resource is no longer needed because the chain now skips over 1663 * the current entry. Then invalidate (zero out) the current "shadow" 1664 * list entry. 1665 */ 1666 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1667 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1668 1669 return (DDI_SUCCESS); 1670 } 1671 1672 1673 /* 1674 * hermon_mcg_entry_invalidate() 1675 * Context: Can be called only from user or kernel context. 1676 */ 1677 static int 1678 hermon_mcg_entry_invalidate(hermon_state_t *state, hermon_hw_mcg_t *mcg_entry, 1679 uint_t indx) 1680 { 1681 int status; 1682 1683 /* 1684 * Invalidate the hardware MCG entry by zeroing out this temporary 1685 * MCG and writing it the the hardware. Note: In general, this 1686 * operation shouldn't fail. If it does, then it is an indication 1687 * that something (probably in HW, but maybe in SW) has gone seriously 1688 * wrong. 1689 */ 1690 bzero(mcg_entry, HERMON_MCGMEM_SZ(state)); 1691 status = hermon_write_mgm_cmd_post(state, mcg_entry, indx, 1692 HERMON_CMD_NOSLEEP_SPIN); 1693 if (status != HERMON_CMD_SUCCESS) { 1694 HERMON_WARNING(state, "failed to write MCG entry"); 1695 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1696 status); 1697 if (status == HERMON_CMD_INVALID_STATUS) { 1698 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1699 } 1700 return (ibc_get_ci_failure(0)); 1701 } 1702 1703 return (DDI_SUCCESS); 1704 } 1705 1706 1707 /* 1708 * hermon_mgid_is_valid() 1709 * Context: Can be called from interrupt or base context. 1710 */ 1711 static int 1712 hermon_mgid_is_valid(ib_gid_t gid) 1713 { 1714 uint_t topbits, flags, scope; 1715 1716 /* 1717 * According to IBA 1.1 specification (section 4.1.1) a valid 1718 * "multicast GID" must have its top eight bits set to all ones 1719 */ 1720 topbits = (gid.gid_prefix >> HERMON_MCG_TOPBITS_SHIFT) & 1721 HERMON_MCG_TOPBITS_MASK; 1722 if (topbits != HERMON_MCG_TOPBITS) { 1723 return (0); 1724 } 1725 1726 /* 1727 * The next 4 bits are the "flag" bits. These are valid only 1728 * if they are "0" (which correspond to permanently assigned/ 1729 * "well-known" multicast GIDs) or "1" (for so-called "transient" 1730 * multicast GIDs). All other values are reserved. 1731 */ 1732 flags = (gid.gid_prefix >> HERMON_MCG_FLAGS_SHIFT) & 1733 HERMON_MCG_FLAGS_MASK; 1734 if (!((flags == HERMON_MCG_FLAGS_PERM) || 1735 (flags == HERMON_MCG_FLAGS_NONPERM))) { 1736 return (0); 1737 } 1738 1739 /* 1740 * The next 4 bits are the "scope" bits. These are valid only 1741 * if they are "2" (Link-local), "5" (Site-local), "8" 1742 * (Organization-local) or "E" (Global). All other values 1743 * are reserved (or currently unassigned). 1744 */ 1745 scope = (gid.gid_prefix >> HERMON_MCG_SCOPE_SHIFT) & 1746 HERMON_MCG_SCOPE_MASK; 1747 if (!((scope == HERMON_MCG_SCOPE_LINKLOC) || 1748 (scope == HERMON_MCG_SCOPE_SITELOC) || 1749 (scope == HERMON_MCG_SCOPE_ORGLOC) || 1750 (scope == HERMON_MCG_SCOPE_GLOBAL))) { 1751 return (0); 1752 } 1753 1754 /* 1755 * If it passes all of the above checks, then we will consider it 1756 * a valid multicast GID. 1757 */ 1758 return (1); 1759 } 1760 1761 1762 /* 1763 * hermon_mlid_is_valid() 1764 * Context: Can be called from interrupt or base context. 1765 */ 1766 static int 1767 hermon_mlid_is_valid(ib_lid_t lid) 1768 { 1769 /* 1770 * According to IBA 1.1 specification (section 4.1.1) a valid 1771 * "multicast DLID" must be between 0xC000 and 0xFFFE. 1772 */ 1773 if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) { 1774 return (0); 1775 } 1776 1777 return (1); 1778 } 1779 1780 1781 /* 1782 * hermon_pd_alloc() 1783 * Context: Can be called only from user or kernel context. 1784 */ 1785 int 1786 hermon_pd_alloc(hermon_state_t *state, hermon_pdhdl_t *pdhdl, uint_t sleepflag) 1787 { 1788 hermon_rsrc_t *rsrc; 1789 hermon_pdhdl_t pd; 1790 int status; 1791 1792 /* 1793 * Allocate the software structure for tracking the protection domain 1794 * (i.e. the Hermon Protection Domain handle). By default each PD 1795 * structure will have a unique PD number assigned to it. All that 1796 * is necessary is for software to initialize the PD reference count 1797 * (to zero) and return success. 1798 */ 1799 status = hermon_rsrc_alloc(state, HERMON_PDHDL, 1, sleepflag, &rsrc); 1800 if (status != DDI_SUCCESS) { 1801 return (IBT_INSUFF_RESOURCE); 1802 } 1803 pd = (hermon_pdhdl_t)rsrc->hr_addr; 1804 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1805 1806 pd->pd_refcnt = 0; 1807 *pdhdl = pd; 1808 1809 return (DDI_SUCCESS); 1810 } 1811 1812 1813 /* 1814 * hermon_pd_free() 1815 * Context: Can be called only from user or kernel context. 1816 */ 1817 int 1818 hermon_pd_free(hermon_state_t *state, hermon_pdhdl_t *pdhdl) 1819 { 1820 hermon_rsrc_t *rsrc; 1821 hermon_pdhdl_t pd; 1822 1823 /* 1824 * Pull all the necessary information from the Hermon Protection Domain 1825 * handle. This is necessary here because the resource for the 1826 * PD is going to be freed up as part of this operation. 1827 */ 1828 pd = *pdhdl; 1829 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1830 rsrc = pd->pd_rsrcp; 1831 1832 /* 1833 * Check the PD reference count. If the reference count is non-zero, 1834 * then it means that this protection domain is still referenced by 1835 * some memory region, queue pair, address handle, or other IB object 1836 * If it is non-zero, then return an error. Otherwise, free the 1837 * Hermon resource and return success. 1838 */ 1839 if (pd->pd_refcnt != 0) { 1840 return (IBT_PD_IN_USE); 1841 } 1842 1843 /* Free the Hermon Protection Domain handle */ 1844 hermon_rsrc_free(state, &rsrc); 1845 1846 /* Set the pdhdl pointer to NULL and return success */ 1847 *pdhdl = (hermon_pdhdl_t)NULL; 1848 1849 return (DDI_SUCCESS); 1850 } 1851 1852 1853 /* 1854 * hermon_pd_refcnt_inc() 1855 * Context: Can be called from interrupt or base context. 1856 */ 1857 void 1858 hermon_pd_refcnt_inc(hermon_pdhdl_t pd) 1859 { 1860 /* Increment the protection domain's reference count */ 1861 atomic_inc_32(&pd->pd_refcnt); 1862 } 1863 1864 1865 /* 1866 * hermon_pd_refcnt_dec() 1867 * Context: Can be called from interrupt or base context. 1868 */ 1869 void 1870 hermon_pd_refcnt_dec(hermon_pdhdl_t pd) 1871 { 1872 /* Decrement the protection domain's reference count */ 1873 atomic_dec_32(&pd->pd_refcnt); 1874 } 1875 1876 1877 /* 1878 * hermon_port_query() 1879 * Context: Can be called only from user or kernel context. 1880 */ 1881 int 1882 hermon_port_query(hermon_state_t *state, uint_t port, ibt_hca_portinfo_t *pi) 1883 { 1884 sm_portinfo_t portinfo; 1885 sm_guidinfo_t guidinfo; 1886 sm_pkey_table_t pkeytable; 1887 ib_gid_t *sgid; 1888 uint_t sgid_max, pkey_max, tbl_size; 1889 int i, j, indx, status; 1890 ib_pkey_t *pkeyp; 1891 ib_guid_t *guidp; 1892 1893 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi)) 1894 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state)) 1895 1896 /* Validate that specified port number is legal */ 1897 if (!hermon_portnum_is_valid(state, port)) { 1898 return (IBT_HCA_PORT_INVALID); 1899 } 1900 pkeyp = state->hs_pkey[port - 1]; 1901 guidp = state->hs_guid[port - 1]; 1902 1903 /* 1904 * We use the Hermon MAD_IFC command to post a GetPortInfo MAD 1905 * to the firmware (for the specified port number). This returns 1906 * a full PortInfo MAD (in "portinfo") which we subsequently 1907 * parse to fill in the "ibt_hca_portinfo_t" structure returned 1908 * to the IBTF. 1909 */ 1910 status = hermon_getportinfo_cmd_post(state, port, 1911 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 1912 if (status != HERMON_CMD_SUCCESS) { 1913 cmn_err(CE_CONT, "Hermon: GetPortInfo (port %02d) command " 1914 "failed: %08x\n", port, status); 1915 if (status == HERMON_CMD_INVALID_STATUS) { 1916 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1917 } 1918 return (ibc_get_ci_failure(0)); 1919 } 1920 1921 /* 1922 * Parse the PortInfo MAD and fill in the IBTF structure 1923 */ 1924 pi->p_base_lid = portinfo.LID; 1925 pi->p_qkey_violations = portinfo.Q_KeyViolations; 1926 pi->p_pkey_violations = portinfo.P_KeyViolations; 1927 pi->p_sm_sl = portinfo.MasterSMSL; 1928 pi->p_sm_lid = portinfo.MasterSMLID; 1929 pi->p_linkstate = portinfo.PortState; 1930 pi->p_port_num = portinfo.LocalPortNum; 1931 pi->p_phys_state = portinfo.PortPhysicalState; 1932 pi->p_width_supported = portinfo.LinkWidthSupported; 1933 pi->p_width_enabled = portinfo.LinkWidthEnabled; 1934 pi->p_width_active = portinfo.LinkWidthActive; 1935 pi->p_speed_supported = portinfo.LinkSpeedSupported; 1936 pi->p_speed_enabled = portinfo.LinkSpeedEnabled; 1937 pi->p_speed_active = portinfo.LinkSpeedActive; 1938 pi->p_mtu = portinfo.MTUCap; 1939 pi->p_lmc = portinfo.LMC; 1940 pi->p_max_vl = portinfo.VLCap; 1941 pi->p_subnet_timeout = portinfo.SubnetTimeOut; 1942 pi->p_msg_sz = ((uint32_t)1 << HERMON_QP_LOG_MAX_MSGSZ); 1943 tbl_size = state->hs_cfg_profile->cp_log_max_gidtbl; 1944 pi->p_sgid_tbl_sz = (1 << tbl_size); 1945 tbl_size = state->hs_cfg_profile->cp_log_max_pkeytbl; 1946 pi->p_pkey_tbl_sz = (1 << tbl_size); 1947 state->hs_sn_prefix[port - 1] = portinfo.GidPrefix; 1948 1949 /* 1950 * Convert InfiniBand-defined port capability flags to the format 1951 * specified by the IBTF 1952 */ 1953 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM) 1954 pi->p_capabilities |= IBT_PORT_CAP_SM; 1955 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED) 1956 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED; 1957 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD) 1958 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL; 1959 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD) 1960 pi->p_capabilities |= IBT_PORT_CAP_DM; 1961 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD) 1962 pi->p_capabilities |= IBT_PORT_CAP_VENDOR; 1963 1964 /* 1965 * Fill in the SGID table. Since the only access to the Hermon 1966 * GID tables is through the firmware's MAD_IFC interface, we 1967 * post as many GetGUIDInfo MADs as necessary to read in the entire 1968 * contents of the SGID table (for the specified port). Note: The 1969 * GetGUIDInfo command only gets eight GUIDs per operation. These 1970 * GUIDs are then appended to the GID prefix for the port (from the 1971 * GetPortInfo above) to form the entire SGID table. 1972 */ 1973 for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) { 1974 status = hermon_getguidinfo_cmd_post(state, port, i >> 3, 1975 HERMON_SLEEPFLAG_FOR_CONTEXT(), &guidinfo); 1976 if (status != HERMON_CMD_SUCCESS) { 1977 cmn_err(CE_CONT, "Hermon: GetGUIDInfo (port %02d) " 1978 "command failed: %08x\n", port, status); 1979 if (status == HERMON_CMD_INVALID_STATUS) { 1980 hermon_fm_ereport(state, HCA_SYS_ERR, 1981 HCA_ERR_SRV_LOST); 1982 } 1983 return (ibc_get_ci_failure(0)); 1984 } 1985 1986 /* Figure out how many of the entries are valid */ 1987 sgid_max = min((pi->p_sgid_tbl_sz - i), 8); 1988 for (j = 0; j < sgid_max; j++) { 1989 indx = (i + j); 1990 sgid = &pi->p_sgid_tbl[indx]; 1991 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid)) 1992 sgid->gid_prefix = portinfo.GidPrefix; 1993 guidp[indx] = sgid->gid_guid = 1994 guidinfo.GUIDBlocks[j]; 1995 } 1996 } 1997 1998 /* 1999 * Fill in the PKey table. Just as for the GID tables above, the 2000 * only access to the Hermon PKey tables is through the firmware's 2001 * MAD_IFC interface. We post as many GetPKeyTable MADs as necessary 2002 * to read in the entire contents of the PKey table (for the specified 2003 * port). Note: The GetPKeyTable command only gets 32 PKeys per 2004 * operation. 2005 */ 2006 for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) { 2007 status = hermon_getpkeytable_cmd_post(state, port, i, 2008 HERMON_SLEEPFLAG_FOR_CONTEXT(), &pkeytable); 2009 if (status != HERMON_CMD_SUCCESS) { 2010 cmn_err(CE_CONT, "Hermon: GetPKeyTable (port %02d) " 2011 "command failed: %08x\n", port, status); 2012 if (status == HERMON_CMD_INVALID_STATUS) { 2013 hermon_fm_ereport(state, HCA_SYS_ERR, 2014 HCA_ERR_SRV_LOST); 2015 } 2016 return (ibc_get_ci_failure(0)); 2017 } 2018 2019 /* Figure out how many of the entries are valid */ 2020 pkey_max = min((pi->p_pkey_tbl_sz - i), 32); 2021 for (j = 0; j < pkey_max; j++) { 2022 indx = (i + j); 2023 pkeyp[indx] = pi->p_pkey_tbl[indx] = 2024 pkeytable.P_KeyTableBlocks[j]; 2025 } 2026 } 2027 2028 return (DDI_SUCCESS); 2029 } 2030 2031 2032 /* 2033 * hermon_port_modify() 2034 * Context: Can be called only from user or kernel context. 2035 */ 2036 /* ARGSUSED */ 2037 int 2038 hermon_port_modify(hermon_state_t *state, uint8_t port, 2039 ibt_port_modify_flags_t flags, uint8_t init_type) 2040 { 2041 sm_portinfo_t portinfo; 2042 uint32_t capmask; 2043 int status; 2044 hermon_hw_set_port_t set_port; 2045 2046 /* 2047 * Return an error if either of the unsupported flags are set 2048 */ 2049 if ((flags & IBT_PORT_SHUTDOWN) || 2050 (flags & IBT_PORT_SET_INIT_TYPE)) { 2051 return (IBT_NOT_SUPPORTED); 2052 } 2053 2054 bzero(&set_port, sizeof (set_port)); 2055 2056 /* 2057 * Determine whether we are trying to reset the QKey counter 2058 */ 2059 if (flags & IBT_PORT_RESET_QKEY) 2060 set_port.rqk = 1; 2061 2062 /* Validate that specified port number is legal */ 2063 if (!hermon_portnum_is_valid(state, port)) { 2064 return (IBT_HCA_PORT_INVALID); 2065 } 2066 2067 /* 2068 * Use the Hermon MAD_IFC command to post a GetPortInfo MAD to the 2069 * firmware (for the specified port number). This returns a full 2070 * PortInfo MAD (in "portinfo") from which we pull the current 2071 * capability mask. We then modify the capability mask as directed 2072 * by the "pmod_flags" field, and write the updated capability mask 2073 * using the Hermon SET_IB command (below). 2074 */ 2075 status = hermon_getportinfo_cmd_post(state, port, 2076 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 2077 if (status != HERMON_CMD_SUCCESS) { 2078 if (status == HERMON_CMD_INVALID_STATUS) { 2079 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2080 } 2081 return (ibc_get_ci_failure(0)); 2082 } 2083 2084 /* 2085 * Convert InfiniBand-defined port capability flags to the format 2086 * specified by the IBTF. Specifically, we modify the capability 2087 * mask based on the specified values. 2088 */ 2089 capmask = portinfo.CapabilityMask; 2090 2091 if (flags & IBT_PORT_RESET_SM) 2092 capmask &= ~SM_CAP_MASK_IS_SM; 2093 else if (flags & IBT_PORT_SET_SM) 2094 capmask |= SM_CAP_MASK_IS_SM; 2095 2096 if (flags & IBT_PORT_RESET_SNMP) 2097 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD; 2098 else if (flags & IBT_PORT_SET_SNMP) 2099 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD; 2100 2101 if (flags & IBT_PORT_RESET_DEVMGT) 2102 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD; 2103 else if (flags & IBT_PORT_SET_DEVMGT) 2104 capmask |= SM_CAP_MASK_IS_DM_SUPPD; 2105 2106 if (flags & IBT_PORT_RESET_VENDOR) 2107 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD; 2108 else if (flags & IBT_PORT_SET_VENDOR) 2109 capmask |= SM_CAP_MASK_IS_VM_SUPPD; 2110 2111 set_port.cap_mask = capmask; 2112 2113 /* 2114 * Use the Hermon SET_PORT command to update the capability mask and 2115 * (possibly) reset the QKey violation counter for the specified port. 2116 * Note: In general, this operation shouldn't fail. If it does, then 2117 * it is an indication that something (probably in HW, but maybe in 2118 * SW) has gone seriously wrong. 2119 */ 2120 status = hermon_set_port_cmd_post(state, &set_port, port, 2121 HERMON_SLEEPFLAG_FOR_CONTEXT()); 2122 if (status != HERMON_CMD_SUCCESS) { 2123 HERMON_WARNING(state, "failed to modify port capabilities"); 2124 cmn_err(CE_CONT, "Hermon: SET_IB (port %02d) command failed: " 2125 "%08x\n", port, status); 2126 if (status == HERMON_CMD_INVALID_STATUS) { 2127 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2128 } 2129 return (ibc_get_ci_failure(0)); 2130 } 2131 2132 return (DDI_SUCCESS); 2133 } 2134 2135 2136 /* 2137 * hermon_set_addr_path() 2138 * Context: Can be called from interrupt or base context. 2139 * 2140 * Note: This routine is used for two purposes. It is used to fill in the 2141 * Hermon UDAV fields, and it is used to fill in the address path information 2142 * for QPs. Because the two Hermon structures are similar, common fields can 2143 * be filled in here. Because they are different, however, we pass 2144 * an additional flag to indicate which type is being filled and do each one 2145 * uniquely 2146 */ 2147 2148 int hermon_srate_override = -1; /* allows ease of testing */ 2149 2150 int 2151 hermon_set_addr_path(hermon_state_t *state, ibt_adds_vect_t *av, 2152 hermon_hw_addr_path_t *path, uint_t type) 2153 { 2154 uint_t gidtbl_sz; 2155 hermon_hw_udav_t *udav; 2156 2157 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2158 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2159 2160 udav = (hermon_hw_udav_t *)(void *)path; 2161 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav)) 2162 path->mlid = av->av_src_path; 2163 path->rlid = av->av_dlid; 2164 2165 switch (av->av_srate) { 2166 case IBT_SRATE_2: /* 1xSDR-2.5Gb/s injection rate */ 2167 path->max_stat_rate = 7; break; 2168 case IBT_SRATE_10: /* 4xSDR-10.0Gb/s injection rate */ 2169 path->max_stat_rate = 8; break; 2170 case IBT_SRATE_30: /* 12xSDR-30Gb/s injection rate */ 2171 path->max_stat_rate = 9; break; 2172 case IBT_SRATE_5: /* 1xDDR-5Gb/s injection rate */ 2173 path->max_stat_rate = 10; break; 2174 case IBT_SRATE_20: /* 4xDDR-20Gb/s injection rate */ 2175 path->max_stat_rate = 11; break; 2176 case IBT_SRATE_40: /* 4xQDR-40Gb/s injection rate */ 2177 path->max_stat_rate = 12; break; 2178 case IBT_SRATE_60: /* 12xDDR-60Gb/s injection rate */ 2179 path->max_stat_rate = 13; break; 2180 case IBT_SRATE_80: /* 8xQDR-80Gb/s injection rate */ 2181 path->max_stat_rate = 14; break; 2182 case IBT_SRATE_120: /* 12xQDR-120Gb/s injection rate */ 2183 path->max_stat_rate = 15; break; 2184 case IBT_SRATE_NOT_SPECIFIED: /* Max */ 2185 path->max_stat_rate = 0; break; 2186 default: 2187 return (IBT_STATIC_RATE_INVALID); 2188 } 2189 if (hermon_srate_override != -1) /* for evaluating HCA firmware */ 2190 path->max_stat_rate = hermon_srate_override; 2191 2192 /* If "grh" flag is set, then check for valid SGID index too */ 2193 gidtbl_sz = (1 << state->hs_queryport.log_max_gid); 2194 if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) { 2195 return (IBT_SGID_INVALID); 2196 } 2197 2198 /* 2199 * Fill in all "global" values regardless of the value in the GRH 2200 * flag. Because "grh" is not set unless "av_send_grh" is set, the 2201 * hardware will ignore the other "global" values as necessary. Note: 2202 * SW does this here to enable later query operations to return 2203 * exactly the same params that were passed when the addr path was 2204 * last written. 2205 */ 2206 path->grh = av->av_send_grh; 2207 if (type == HERMON_ADDRPATH_QP) { 2208 path->mgid_index = av->av_sgid_ix; 2209 } else { 2210 /* 2211 * For Hermon UDAV, the "mgid_index" field is the index into 2212 * a combined table (not a per-port table), but having sections 2213 * for each port. So some extra calculations are necessary. 2214 */ 2215 2216 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) + 2217 av->av_sgid_ix; 2218 2219 udav->portnum = av->av_port_num; 2220 } 2221 2222 /* 2223 * According to Hermon PRM, the (31:0) part of rgid_l must be set to 2224 * "0x2" if the 'grh' or 'g' bit is cleared. It also says that we 2225 * only need to do it for UDAV's. So we enforce that here. 2226 * 2227 * NOTE: The entire 64 bits worth of GUID info is actually being 2228 * preserved (for UDAVs) by the callers of this function 2229 * (hermon_ah_alloc() and hermon_ah_modify()) and as long as the 2230 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are 2231 * "don't care". 2232 */ 2233 if ((path->grh) || (type == HERMON_ADDRPATH_QP)) { 2234 path->flow_label = av->av_flow; 2235 path->tclass = av->av_tclass; 2236 path->hop_limit = av->av_hop; 2237 bcopy(&(av->av_dgid.gid_prefix), &(path->rgid_h), 2238 sizeof (uint64_t)); 2239 bcopy(&(av->av_dgid.gid_guid), &(path->rgid_l), 2240 sizeof (uint64_t)); 2241 } else { 2242 path->rgid_l = 0x2; 2243 path->flow_label = 0; 2244 path->tclass = 0; 2245 path->hop_limit = 0; 2246 path->rgid_h = 0; 2247 } 2248 /* extract the default service level */ 2249 udav->sl = (HERMON_DEF_SCHED_SELECTION & 0x3C) >> 2; 2250 2251 return (DDI_SUCCESS); 2252 } 2253 2254 2255 /* 2256 * hermon_get_addr_path() 2257 * Context: Can be called from interrupt or base context. 2258 * 2259 * Note: Just like hermon_set_addr_path() above, this routine is used for two 2260 * purposes. It is used to read in the Hermon UDAV fields, and it is used to 2261 * read in the address path information for QPs. Because the two Hermon 2262 * structures are similar, common fields can be read in here. But because 2263 * they are slightly different, we pass an additional flag to indicate which 2264 * type is being read. 2265 */ 2266 void 2267 hermon_get_addr_path(hermon_state_t *state, hermon_hw_addr_path_t *path, 2268 ibt_adds_vect_t *av, uint_t type) 2269 { 2270 uint_t gidtbl_sz; 2271 2272 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2273 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2274 2275 av->av_src_path = path->mlid; 2276 av->av_dlid = path->rlid; 2277 2278 /* Set "av_ipd" value from max_stat_rate */ 2279 switch (path->max_stat_rate) { 2280 case 7: /* 1xSDR-2.5Gb/s injection rate */ 2281 av->av_srate = IBT_SRATE_2; break; 2282 case 8: /* 4xSDR-10.0Gb/s injection rate */ 2283 av->av_srate = IBT_SRATE_10; break; 2284 case 9: /* 12xSDR-30Gb/s injection rate */ 2285 av->av_srate = IBT_SRATE_30; break; 2286 case 10: /* 1xDDR-5Gb/s injection rate */ 2287 av->av_srate = IBT_SRATE_5; break; 2288 case 11: /* 4xDDR-20Gb/s injection rate */ 2289 av->av_srate = IBT_SRATE_20; break; 2290 case 12: /* xQDR-40Gb/s injection rate */ 2291 av->av_srate = IBT_SRATE_40; break; 2292 case 13: /* 12xDDR-60Gb/s injection rate */ 2293 av->av_srate = IBT_SRATE_60; break; 2294 case 14: /* 8xQDR-80Gb/s injection rate */ 2295 av->av_srate = IBT_SRATE_80; break; 2296 case 15: /* 12xQDR-120Gb/s injection rate */ 2297 av->av_srate = IBT_SRATE_120; break; 2298 case 0: /* max */ 2299 av->av_srate = IBT_SRATE_10; break; 2300 default: /* 1x injection rate */ 2301 av->av_srate = IBT_SRATE_1X; 2302 } 2303 2304 /* 2305 * Extract all "global" values regardless of the value in the GRH 2306 * flag. Because "av_send_grh" is set only if "grh" is set, software 2307 * knows to ignore the other "global" values as necessary. Note: SW 2308 * does it this way to enable these query operations to return exactly 2309 * the same params that were passed when the addr path was last written. 2310 */ 2311 av->av_send_grh = path->grh; 2312 if (type == HERMON_ADDRPATH_QP) { 2313 av->av_sgid_ix = path->mgid_index; 2314 } else { 2315 /* 2316 * For Hermon UDAV, the "mgid_index" field is the index into 2317 * a combined table (not a per-port table). 2318 */ 2319 gidtbl_sz = (1 << state->hs_queryport.log_max_gid); 2320 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) * 2321 gidtbl_sz); 2322 2323 av->av_port_num = ((hermon_hw_udav_t *)(void *)path)->portnum; 2324 } 2325 av->av_flow = path->flow_label; 2326 av->av_tclass = path->tclass; 2327 av->av_hop = path->hop_limit; 2328 /* this is for alignment issue w/ the addr path struct in Hermon */ 2329 bcopy(&(path->rgid_h), &(av->av_dgid.gid_prefix), sizeof (uint64_t)); 2330 bcopy(&(path->rgid_l), &(av->av_dgid.gid_guid), sizeof (uint64_t)); 2331 } 2332 2333 2334 /* 2335 * hermon_portnum_is_valid() 2336 * Context: Can be called from interrupt or base context. 2337 */ 2338 int 2339 hermon_portnum_is_valid(hermon_state_t *state, uint_t portnum) 2340 { 2341 uint_t max_port; 2342 2343 max_port = state->hs_cfg_profile->cp_num_ports; 2344 if ((portnum <= max_port) && (portnum != 0)) { 2345 return (1); 2346 } else { 2347 return (0); 2348 } 2349 } 2350 2351 2352 /* 2353 * hermon_pkeyindex_is_valid() 2354 * Context: Can be called from interrupt or base context. 2355 */ 2356 int 2357 hermon_pkeyindex_is_valid(hermon_state_t *state, uint_t pkeyindx) 2358 { 2359 uint_t max_pkeyindx; 2360 2361 max_pkeyindx = 1 << state->hs_cfg_profile->cp_log_max_pkeytbl; 2362 if (pkeyindx < max_pkeyindx) { 2363 return (1); 2364 } else { 2365 return (0); 2366 } 2367 } 2368 2369 2370 /* 2371 * hermon_queue_alloc() 2372 * Context: Can be called from interrupt or base context. 2373 */ 2374 int 2375 hermon_queue_alloc(hermon_state_t *state, hermon_qalloc_info_t *qa_info, 2376 uint_t sleepflag) 2377 { 2378 ddi_dma_attr_t dma_attr; 2379 int (*callback)(caddr_t); 2380 uint64_t realsize, alloc_mask; 2381 uint_t type; 2382 int flag, status; 2383 2384 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2385 2386 /* Set the callback flag appropriately */ 2387 callback = (sleepflag == HERMON_SLEEP) ? DDI_DMA_SLEEP : 2388 DDI_DMA_DONTWAIT; 2389 2390 /* 2391 * Initialize many of the default DMA attributes. Then set additional 2392 * alignment restrictions as necessary for the queue memory. Also 2393 * respect the configured value for IOMMU bypass 2394 */ 2395 hermon_dma_attr_init(state, &dma_attr); 2396 dma_attr.dma_attr_align = qa_info->qa_bind_align; 2397 type = state->hs_cfg_profile->cp_iommu_bypass; 2398 if (type == HERMON_BINDMEM_BYPASS) { 2399 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2400 } 2401 2402 /* Allocate a DMA handle */ 2403 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, callback, NULL, 2404 &qa_info->qa_dmahdl); 2405 if (status != DDI_SUCCESS) { 2406 return (DDI_FAILURE); 2407 } 2408 2409 /* 2410 * Determine the amount of memory to allocate, depending on the values 2411 * in "qa_bind_align" and "qa_alloc_align". The problem we are trying 2412 * to solve here is that allocating a DMA handle with IOMMU bypass 2413 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments 2414 * that are less restrictive than the page size. Since we may need 2415 * stricter alignments on the memory allocated by ddi_dma_mem_alloc() 2416 * (e.g. in Hermon QP work queue memory allocation), we use the 2417 * following method to calculate how much additional memory to request, 2418 * and we enforce our own alignment on the allocated result. 2419 */ 2420 alloc_mask = qa_info->qa_alloc_align - 1; 2421 if (qa_info->qa_bind_align == qa_info->qa_alloc_align) { 2422 realsize = qa_info->qa_size; 2423 } else { 2424 realsize = qa_info->qa_size + alloc_mask; 2425 } 2426 2427 /* 2428 * If we are to allocate the queue from system memory, then use 2429 * ddi_dma_mem_alloc() to find the space. Otherwise, this is a 2430 * host memory allocation, use ddi_umem_alloc(). In either case, 2431 * return a pointer to the memory range allocated (including any 2432 * necessary alignment adjustments), the "real" memory pointer, 2433 * the "real" size, and a ddi_acc_handle_t to use when reading 2434 * from/writing to the memory. 2435 */ 2436 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) { 2437 /* Allocate system memory for the queue */ 2438 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize, 2439 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, callback, NULL, 2440 (caddr_t *)&qa_info->qa_buf_real, 2441 (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl); 2442 if (status != DDI_SUCCESS) { 2443 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2444 return (DDI_FAILURE); 2445 } 2446 2447 /* 2448 * Save temporary copy of the real pointer. (This may be 2449 * modified in the last step below). 2450 */ 2451 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2452 2453 bzero(qa_info->qa_buf_real, qa_info->qa_buf_realsz); 2454 2455 } else { /* HERMON_QUEUE_LOCATION_USERLAND */ 2456 2457 /* Allocate userland mappable memory for the queue */ 2458 flag = (sleepflag == HERMON_SLEEP) ? DDI_UMEM_SLEEP : 2459 DDI_UMEM_NOSLEEP; 2460 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag, 2461 &qa_info->qa_umemcookie); 2462 if (qa_info->qa_buf_real == NULL) { 2463 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2464 return (DDI_FAILURE); 2465 } 2466 2467 /* 2468 * Save temporary copy of the real pointer. (This may be 2469 * modified in the last step below). 2470 */ 2471 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2472 2473 } 2474 2475 /* 2476 * The next to last step is to ensure that the final address 2477 * ("qa_buf_aligned") has the appropriate "alloc" alignment 2478 * restriction applied to it (if necessary). 2479 */ 2480 if (qa_info->qa_bind_align != qa_info->qa_alloc_align) { 2481 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t) 2482 qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask); 2483 } 2484 /* 2485 * The last step is to figure out the offset of the start relative 2486 * to the first page of the region - will be used in the eqc/cqc 2487 * passed to the HW 2488 */ 2489 qa_info->qa_pgoffs = (uint_t)((uintptr_t) 2490 qa_info->qa_buf_aligned & HERMON_PAGEMASK); 2491 2492 return (DDI_SUCCESS); 2493 } 2494 2495 2496 /* 2497 * hermon_queue_free() 2498 * Context: Can be called from interrupt or base context. 2499 */ 2500 void 2501 hermon_queue_free(hermon_qalloc_info_t *qa_info) 2502 { 2503 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2504 2505 /* 2506 * Depending on how (i.e. from where) we allocated the memory for 2507 * this queue, we choose the appropriate method for releasing the 2508 * resources. 2509 */ 2510 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) { 2511 2512 ddi_dma_mem_free(&qa_info->qa_acchdl); 2513 2514 } else if (qa_info->qa_location == HERMON_QUEUE_LOCATION_USERLAND) { 2515 2516 ddi_umem_free(qa_info->qa_umemcookie); 2517 2518 } 2519 2520 /* Always free the dma handle */ 2521 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2522 } 2523 2524 /* 2525 * hermon_destroy_fmr_pool() 2526 * Create a pool of FMRs. 2527 * Context: Can be called from kernel context only. 2528 */ 2529 int 2530 hermon_create_fmr_pool(hermon_state_t *state, hermon_pdhdl_t pd, 2531 ibt_fmr_pool_attr_t *fmr_attr, hermon_fmrhdl_t *fmrpoolp) 2532 { 2533 hermon_fmrhdl_t fmrpool; 2534 hermon_fmr_list_t *fmr, *fmr_next; 2535 hermon_mrhdl_t mr; 2536 char taskqname[48]; 2537 int status; 2538 int sleep; 2539 int i; 2540 2541 sleep = (fmr_attr->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP : 2542 HERMON_NOSLEEP; 2543 if ((sleep == HERMON_SLEEP) && 2544 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 2545 return (IBT_INVALID_PARAM); 2546 } 2547 2548 fmrpool = (hermon_fmrhdl_t)kmem_zalloc(sizeof (*fmrpool), sleep); 2549 if (fmrpool == NULL) { 2550 status = IBT_INSUFF_RESOURCE; 2551 goto fail; 2552 } 2553 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmrpool)) 2554 2555 mutex_init(&fmrpool->fmr_lock, NULL, MUTEX_DRIVER, 2556 DDI_INTR_PRI(state->hs_intrmsi_pri)); 2557 2558 fmrpool->fmr_state = state; 2559 fmrpool->fmr_flush_function = fmr_attr->fmr_func_hdlr; 2560 fmrpool->fmr_flush_arg = fmr_attr->fmr_func_arg; 2561 fmrpool->fmr_pool_size = 0; 2562 fmrpool->fmr_cache = 0; 2563 fmrpool->fmr_max_pages = fmr_attr->fmr_max_pages_per_fmr; 2564 fmrpool->fmr_page_sz = fmr_attr->fmr_page_sz; 2565 fmrpool->fmr_dirty_watermark = fmr_attr->fmr_dirty_watermark; 2566 fmrpool->fmr_dirty_len = 0; 2567 fmrpool->fmr_flags = fmr_attr->fmr_flags; 2568 2569 /* Create taskq to handle cleanup and flush processing */ 2570 (void) snprintf(taskqname, 50, "fmrpool/%d/%d @ 0x%" PRIx64, 2571 fmr_attr->fmr_pool_size, hermon_debug_fmrpool_cnt, 2572 (uint64_t)(uintptr_t)fmrpool); 2573 fmrpool->fmr_taskq = ddi_taskq_create(state->hs_dip, taskqname, 2574 HERMON_TASKQ_NTHREADS, TASKQ_DEFAULTPRI, 0); 2575 if (fmrpool->fmr_taskq == NULL) { 2576 status = IBT_INSUFF_RESOURCE; 2577 goto fail1; 2578 } 2579 2580 fmrpool->fmr_free_list = NULL; 2581 fmrpool->fmr_dirty_list = NULL; 2582 2583 if (fmr_attr->fmr_cache) { 2584 hermon_fmr_cache_init(fmrpool); 2585 } 2586 2587 for (i = 0; i < fmr_attr->fmr_pool_size; i++) { 2588 status = hermon_mr_alloc_fmr(state, pd, fmrpool, &mr); 2589 if (status != DDI_SUCCESS) { 2590 goto fail2; 2591 } 2592 2593 fmr = (hermon_fmr_list_t *)kmem_zalloc( 2594 sizeof (hermon_fmr_list_t), sleep); 2595 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2596 2597 fmr->fmr = mr; 2598 fmr->fmr_refcnt = 0; 2599 fmr->fmr_remaps = 0; 2600 fmr->fmr_pool = fmrpool; 2601 fmr->fmr_in_cache = 0; 2602 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 2603 mr->mr_fmr = fmr; 2604 2605 fmr->fmr_next = fmrpool->fmr_free_list; 2606 fmrpool->fmr_free_list = fmr; 2607 fmrpool->fmr_pool_size++; 2608 } 2609 2610 /* Set to return pool */ 2611 *fmrpoolp = fmrpool; 2612 2613 return (IBT_SUCCESS); 2614 fail2: 2615 hermon_fmr_cache_fini(fmrpool); 2616 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { 2617 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2618 fmr_next = fmr->fmr_next; 2619 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr); 2620 kmem_free(fmr, sizeof (hermon_fmr_list_t)); 2621 } 2622 ddi_taskq_destroy(fmrpool->fmr_taskq); 2623 fail1: 2624 kmem_free(fmrpool, sizeof (*fmrpool)); 2625 fail: 2626 if (status == DDI_FAILURE) { 2627 return (ibc_get_ci_failure(0)); 2628 } else { 2629 return (status); 2630 } 2631 } 2632 2633 /* 2634 * hermon_destroy_fmr_pool() 2635 * Destroy an FMR pool and free all associated resources. 2636 * Context: Can be called from kernel context only. 2637 */ 2638 int 2639 hermon_destroy_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2640 { 2641 hermon_fmr_list_t *fmr, *fmr_next; 2642 int status; 2643 2644 mutex_enter(&fmrpool->fmr_lock); 2645 status = hermon_fmr_cleanup(state, fmrpool); 2646 if (status != DDI_SUCCESS) { 2647 mutex_exit(&fmrpool->fmr_lock); 2648 return (status); 2649 } 2650 2651 if (fmrpool->fmr_cache) { 2652 hermon_fmr_cache_fini(fmrpool); 2653 } 2654 2655 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { 2656 fmr_next = fmr->fmr_next; 2657 2658 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr); 2659 kmem_free(fmr, sizeof (hermon_fmr_list_t)); 2660 } 2661 mutex_exit(&fmrpool->fmr_lock); 2662 2663 ddi_taskq_destroy(fmrpool->fmr_taskq); 2664 mutex_destroy(&fmrpool->fmr_lock); 2665 2666 kmem_free(fmrpool, sizeof (*fmrpool)); 2667 return (DDI_SUCCESS); 2668 } 2669 2670 /* 2671 * hermon_flush_fmr_pool() 2672 * Ensure that all unmapped FMRs are fully invalidated. 2673 * Context: Can be called from kernel context only. 2674 */ 2675 int 2676 hermon_flush_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2677 { 2678 int status; 2679 2680 /* 2681 * Force the unmapping of all entries on the dirty list, regardless of 2682 * whether the watermark has been hit yet. 2683 */ 2684 /* grab the pool lock */ 2685 mutex_enter(&fmrpool->fmr_lock); 2686 status = hermon_fmr_cleanup(state, fmrpool); 2687 mutex_exit(&fmrpool->fmr_lock); 2688 return (status); 2689 } 2690 2691 /* 2692 * hermon_deregister_fmr() 2693 * Map memory into FMR 2694 * Context: Can be called from interrupt or base context. 2695 */ 2696 int 2697 hermon_register_physical_fmr(hermon_state_t *state, hermon_fmrhdl_t fmrpool, 2698 ibt_pmr_attr_t *mem_pattr, hermon_mrhdl_t *mr, 2699 ibt_pmr_desc_t *mem_desc_p) 2700 { 2701 hermon_fmr_list_t *fmr; 2702 hermon_fmr_list_t query; 2703 avl_index_t where; 2704 int status; 2705 2706 /* Check length */ 2707 mutex_enter(&fmrpool->fmr_lock); 2708 if (mem_pattr->pmr_len < 1 || (mem_pattr->pmr_num_buf > 2709 fmrpool->fmr_max_pages)) { 2710 mutex_exit(&fmrpool->fmr_lock); 2711 return (IBT_MR_LEN_INVALID); 2712 } 2713 2714 mutex_enter(&fmrpool->fmr_cachelock); 2715 /* lookup in fmr cache */ 2716 /* if exists, grab it, and return it */ 2717 if (fmrpool->fmr_cache) { 2718 query.fmr_desc.pmd_iova = mem_pattr->pmr_iova; 2719 query.fmr_desc.pmd_phys_buf_list_sz = mem_pattr->pmr_len; 2720 fmr = (hermon_fmr_list_t *)avl_find(&fmrpool->fmr_cache_avl, 2721 &query, &where); 2722 2723 /* 2724 * If valid FMR was found in cache, return that fmr info 2725 */ 2726 if (fmr != NULL) { 2727 fmr->fmr_refcnt++; 2728 /* Store pmr desc for use in cache */ 2729 (void) memcpy(mem_desc_p, &fmr->fmr_desc, 2730 sizeof (ibt_pmr_desc_t)); 2731 *mr = (hermon_mrhdl_t)fmr->fmr; 2732 mutex_exit(&fmrpool->fmr_cachelock); 2733 mutex_exit(&fmrpool->fmr_lock); 2734 return (DDI_SUCCESS); 2735 } 2736 } 2737 2738 /* FMR does not exist in cache, proceed with registration */ 2739 2740 /* grab next free entry */ 2741 fmr = fmrpool->fmr_free_list; 2742 if (fmr == NULL) { 2743 mutex_exit(&fmrpool->fmr_cachelock); 2744 mutex_exit(&fmrpool->fmr_lock); 2745 return (IBT_INSUFF_RESOURCE); 2746 } 2747 2748 fmrpool->fmr_free_list = fmrpool->fmr_free_list->fmr_next; 2749 fmr->fmr_next = NULL; 2750 2751 status = hermon_mr_register_physical_fmr(state, mem_pattr, fmr->fmr, 2752 mem_desc_p); 2753 if (status != DDI_SUCCESS) { 2754 mutex_exit(&fmrpool->fmr_cachelock); 2755 mutex_exit(&fmrpool->fmr_lock); 2756 return (status); 2757 } 2758 2759 fmr->fmr_refcnt = 1; 2760 fmr->fmr_remaps++; 2761 2762 /* Store pmr desc for use in cache */ 2763 (void) memcpy(&fmr->fmr_desc, mem_desc_p, sizeof (ibt_pmr_desc_t)); 2764 *mr = (hermon_mrhdl_t)fmr->fmr; 2765 2766 /* Store in cache */ 2767 if (fmrpool->fmr_cache) { 2768 if (!fmr->fmr_in_cache) { 2769 avl_insert(&fmrpool->fmr_cache_avl, fmr, where); 2770 fmr->fmr_in_cache = 1; 2771 } 2772 } 2773 2774 mutex_exit(&fmrpool->fmr_cachelock); 2775 mutex_exit(&fmrpool->fmr_lock); 2776 return (DDI_SUCCESS); 2777 } 2778 2779 /* 2780 * hermon_deregister_fmr() 2781 * Unmap FMR 2782 * Context: Can be called from kernel context only. 2783 */ 2784 int 2785 hermon_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr) 2786 { 2787 hermon_fmr_list_t *fmr; 2788 hermon_fmrhdl_t fmrpool; 2789 int status; 2790 2791 fmr = mr->mr_fmr; 2792 fmrpool = fmr->fmr_pool; 2793 2794 /* Grab pool lock */ 2795 mutex_enter(&fmrpool->fmr_lock); 2796 fmr->fmr_refcnt--; 2797 2798 if (fmr->fmr_refcnt == 0) { 2799 /* 2800 * First, do some bit of invalidation, reducing our exposure to 2801 * having this region still registered in hardware. 2802 */ 2803 (void) hermon_mr_invalidate_fmr(state, mr); 2804 2805 /* 2806 * If we've exhausted our remaps then add the FMR to the dirty 2807 * list, not allowing it to be re-used until we have done a 2808 * flush. Otherwise, simply add it back to the free list for 2809 * re-mapping. 2810 */ 2811 if (fmr->fmr_remaps < 2812 state->hs_cfg_profile->cp_fmr_max_remaps) { 2813 /* add to free list */ 2814 fmr->fmr_next = fmrpool->fmr_free_list; 2815 fmrpool->fmr_free_list = fmr; 2816 } else { 2817 /* add to dirty list */ 2818 fmr->fmr_next = fmrpool->fmr_dirty_list; 2819 fmrpool->fmr_dirty_list = fmr; 2820 fmrpool->fmr_dirty_len++; 2821 2822 status = ddi_taskq_dispatch(fmrpool->fmr_taskq, 2823 hermon_fmr_processing, fmrpool, DDI_NOSLEEP); 2824 if (status == DDI_FAILURE) { 2825 mutex_exit(&fmrpool->fmr_lock); 2826 return (IBT_INSUFF_RESOURCE); 2827 } 2828 } 2829 } 2830 /* Release pool lock */ 2831 mutex_exit(&fmrpool->fmr_lock); 2832 2833 return (DDI_SUCCESS); 2834 } 2835 2836 2837 /* 2838 * hermon_fmr_processing() 2839 * If required, perform cleanup. 2840 * Context: Called from taskq context only. 2841 */ 2842 static void 2843 hermon_fmr_processing(void *fmr_args) 2844 { 2845 hermon_fmrhdl_t fmrpool; 2846 int status; 2847 2848 ASSERT(fmr_args != NULL); 2849 2850 fmrpool = (hermon_fmrhdl_t)fmr_args; 2851 2852 /* grab pool lock */ 2853 mutex_enter(&fmrpool->fmr_lock); 2854 if (fmrpool->fmr_dirty_len >= fmrpool->fmr_dirty_watermark) { 2855 status = hermon_fmr_cleanup(fmrpool->fmr_state, fmrpool); 2856 if (status != DDI_SUCCESS) { 2857 mutex_exit(&fmrpool->fmr_lock); 2858 return; 2859 } 2860 2861 if (fmrpool->fmr_flush_function != NULL) { 2862 (void) fmrpool->fmr_flush_function( 2863 (ibc_fmr_pool_hdl_t)fmrpool, 2864 fmrpool->fmr_flush_arg); 2865 } 2866 } 2867 2868 /* let pool lock go */ 2869 mutex_exit(&fmrpool->fmr_lock); 2870 } 2871 2872 /* 2873 * hermon_fmr_cleanup() 2874 * Perform cleaning processing, walking the list and performing the MTT sync 2875 * operation if required. 2876 * Context: can be called from taskq or base context. 2877 */ 2878 static int 2879 hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2880 { 2881 hermon_fmr_list_t *fmr; 2882 hermon_fmr_list_t *fmr_next; 2883 int sync_needed; 2884 int status; 2885 2886 ASSERT(MUTEX_HELD(&fmrpool->fmr_lock)); 2887 2888 sync_needed = 0; 2889 for (fmr = fmrpool->fmr_dirty_list; fmr; fmr = fmr_next) { 2890 fmr_next = fmr->fmr_next; 2891 fmr->fmr_remaps = 0; 2892 2893 (void) hermon_mr_deregister_fmr(state, fmr->fmr); 2894 2895 /* 2896 * Update lists. 2897 * - add fmr back to free list 2898 * - remove fmr from dirty list 2899 */ 2900 fmr->fmr_next = fmrpool->fmr_free_list; 2901 fmrpool->fmr_free_list = fmr; 2902 2903 2904 /* 2905 * Because we have updated the dirty list, and deregistered the 2906 * FMR entry, we do need to sync the TPT, so we set the 2907 * 'sync_needed' flag here so we sync once we finish dirty_list 2908 * processing. 2909 */ 2910 sync_needed = 1; 2911 } 2912 2913 fmrpool->fmr_dirty_list = NULL; 2914 fmrpool->fmr_dirty_len = 0; 2915 2916 if (sync_needed) { 2917 status = hermon_sync_tpt_cmd_post(state, 2918 HERMON_CMD_NOSLEEP_SPIN); 2919 if (status != HERMON_CMD_SUCCESS) { 2920 return (status); 2921 } 2922 } 2923 2924 return (DDI_SUCCESS); 2925 } 2926 2927 /* 2928 * hermon_fmr_avl_compare() 2929 * Context: Can be called from user or kernel context. 2930 */ 2931 static int 2932 hermon_fmr_avl_compare(const void *q, const void *e) 2933 { 2934 hermon_fmr_list_t *entry, *query; 2935 2936 entry = (hermon_fmr_list_t *)e; 2937 query = (hermon_fmr_list_t *)q; 2938 2939 if (query->fmr_desc.pmd_iova < entry->fmr_desc.pmd_iova) { 2940 return (-1); 2941 } else if (query->fmr_desc.pmd_iova > entry->fmr_desc.pmd_iova) { 2942 return (+1); 2943 } else { 2944 return (0); 2945 } 2946 } 2947 2948 2949 /* 2950 * hermon_fmr_cache_init() 2951 * Context: Can be called from user or kernel context. 2952 */ 2953 static void 2954 hermon_fmr_cache_init(hermon_fmrhdl_t fmr) 2955 { 2956 /* Initialize the lock used for FMR cache AVL tree access */ 2957 mutex_init(&fmr->fmr_cachelock, NULL, MUTEX_DRIVER, 2958 DDI_INTR_PRI(fmr->fmr_state->hs_intrmsi_pri)); 2959 2960 /* Initialize the AVL tree for the FMR cache */ 2961 avl_create(&fmr->fmr_cache_avl, hermon_fmr_avl_compare, 2962 sizeof (hermon_fmr_list_t), 2963 offsetof(hermon_fmr_list_t, fmr_avlnode)); 2964 2965 fmr->fmr_cache = 1; 2966 } 2967 2968 2969 /* 2970 * hermon_fmr_cache_fini() 2971 * Context: Can be called from user or kernel context. 2972 */ 2973 static void 2974 hermon_fmr_cache_fini(hermon_fmrhdl_t fmr) 2975 { 2976 void *cookie; 2977 2978 /* 2979 * Empty all entries (if necessary) and destroy the AVL tree. 2980 * The FMRs themselves are freed as part of destroy_pool() 2981 */ 2982 cookie = NULL; 2983 while (((void *)(hermon_fmr_list_t *)avl_destroy_nodes( 2984 &fmr->fmr_cache_avl, &cookie)) != NULL) { 2985 /* loop through */ 2986 } 2987 avl_destroy(&fmr->fmr_cache_avl); 2988 2989 /* Destroy the lock used for FMR cache */ 2990 mutex_destroy(&fmr->fmr_cachelock); 2991 } 2992 2993 /* 2994 * hermon_get_dma_cookies() 2995 * Return DMA cookies in the pre-allocated paddr_list_p based on the length 2996 * needed. 2997 * Context: Can be called from interrupt or base context. 2998 */ 2999 int 3000 hermon_get_dma_cookies(hermon_state_t *state, ibt_phys_buf_t *paddr_list_p, 3001 ibt_va_attr_t *va_attrs, uint_t list_len, uint_t *cookiecnt, 3002 ibc_ma_hdl_t *ibc_ma_hdl_p) 3003 { 3004 ddi_dma_handle_t dma_hdl; 3005 ddi_dma_attr_t dma_attr; 3006 ddi_dma_cookie_t dmacookie; 3007 int (*callback)(caddr_t); 3008 int status; 3009 int i; 3010 3011 /* Set the callback flag appropriately */ 3012 callback = (va_attrs->va_flags & IBT_VA_NOSLEEP) ? DDI_DMA_DONTWAIT : 3013 DDI_DMA_SLEEP; 3014 if ((callback == DDI_DMA_SLEEP) && 3015 (HERMON_SLEEP != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 3016 return (IBT_INVALID_PARAM); 3017 } 3018 3019 /* 3020 * Initialize many of the default DMA attributes and allocate the DMA 3021 * handle. Then, if we're bypassing the IOMMU, set the 3022 * DDI_DMA_FORCE_PHYSICAL flag. 3023 */ 3024 hermon_dma_attr_init(state, &dma_attr); 3025 3026 #ifdef __x86 3027 /* 3028 * On x86 we can specify a maximum segment length for our returned 3029 * cookies. 3030 */ 3031 if (va_attrs->va_flags & IBT_VA_FMR) { 3032 dma_attr.dma_attr_seg = PAGESIZE - 1; 3033 } 3034 #endif 3035 3036 /* 3037 * Check to see if the RO flag is set, and if so, 3038 * set that bit in the attr structure as well. 3039 * 3040 * NOTE 1: This function is ONLY called by consumers, and only for 3041 * data buffers 3042 */ 3043 if (hermon_kernel_data_ro == HERMON_RO_ENABLED) { 3044 dma_attr.dma_attr_flags |= DDI_DMA_RELAXED_ORDERING; 3045 } 3046 3047 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 3048 callback, NULL, &dma_hdl); 3049 if (status != DDI_SUCCESS) { 3050 switch (status) { 3051 case DDI_DMA_NORESOURCES: 3052 return (IBT_INSUFF_RESOURCE); 3053 case DDI_DMA_BADATTR: 3054 default: 3055 return (ibc_get_ci_failure(0)); 3056 } 3057 } 3058 3059 /* 3060 * Now bind the handle with the correct DMA attributes. 3061 */ 3062 if (va_attrs->va_flags & IBT_VA_BUF) { 3063 status = ddi_dma_buf_bind_handle(dma_hdl, va_attrs->va_buf, 3064 DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, 3065 NULL, &dmacookie, cookiecnt); 3066 } else { 3067 status = ddi_dma_addr_bind_handle(dma_hdl, NULL, 3068 (caddr_t)(uintptr_t)va_attrs->va_vaddr, va_attrs->va_len, 3069 DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, 3070 NULL, &dmacookie, cookiecnt); 3071 } 3072 if (status != DDI_SUCCESS) { 3073 ddi_dma_free_handle(&dma_hdl); 3074 3075 switch (status) { 3076 case DDI_DMA_NORESOURCES: 3077 return (IBT_INSUFF_RESOURCE); 3078 case DDI_DMA_TOOBIG: 3079 return (IBT_INVALID_PARAM); 3080 case DDI_DMA_PARTIAL_MAP: 3081 case DDI_DMA_INUSE: 3082 case DDI_DMA_NOMAPPING: 3083 default: 3084 return (ibc_get_ci_failure(0)); 3085 } 3086 } 3087 3088 /* 3089 * Verify our physical buffer list (PBL) is large enough to handle the 3090 * number of cookies that were returned. 3091 */ 3092 if (*cookiecnt > list_len) { 3093 (void) ddi_dma_unbind_handle(dma_hdl); 3094 ddi_dma_free_handle(&dma_hdl); 3095 return (IBT_PBL_TOO_SMALL); 3096 } 3097 3098 /* 3099 * We store the cookies returned by the DDI into our own PBL. This 3100 * sets the cookies up for later processing (for example, if we want to 3101 * split up the cookies into smaller chunks). We use the laddr and 3102 * size fields in each cookie to create each individual entry (PBE). 3103 */ 3104 3105 /* 3106 * Store first cookie info first 3107 */ 3108 paddr_list_p[0].p_laddr = dmacookie.dmac_laddress; 3109 paddr_list_p[0].p_size = dmacookie.dmac_size; 3110 3111 /* 3112 * Loop through each cookie, storing each cookie into our physical 3113 * buffer list. 3114 */ 3115 for (i = 1; i < *cookiecnt; i++) { 3116 ddi_dma_nextcookie(dma_hdl, &dmacookie); 3117 3118 paddr_list_p[i].p_laddr = dmacookie.dmac_laddress; 3119 paddr_list_p[i].p_size = dmacookie.dmac_size; 3120 } 3121 3122 /* return handle */ 3123 *ibc_ma_hdl_p = (ibc_ma_hdl_t)dma_hdl; 3124 return (DDI_SUCCESS); 3125 } 3126 3127 /* 3128 * hermon_split_dma_cookies() 3129 * Split up cookies passed in from paddr_list_p, returning the new list in the 3130 * same buffers, based on the pagesize to split the cookies into. 3131 * Context: Can be called from interrupt or base context. 3132 */ 3133 /* ARGSUSED */ 3134 int 3135 hermon_split_dma_cookies(hermon_state_t *state, ibt_phys_buf_t *paddr_list, 3136 ib_memlen_t *paddr_offset, uint_t list_len, uint_t *cookiecnt, 3137 uint_t pagesize) 3138 { 3139 uint64_t pageoffset; 3140 uint64_t pagemask; 3141 uint_t pageshift; 3142 uint_t current_cookiecnt; 3143 uint_t cookies_needed; 3144 uint64_t last_size, extra_cookie; 3145 int i_increment; 3146 int i, k; 3147 int status; 3148 3149 /* Setup pagesize calculations */ 3150 pageoffset = pagesize - 1; 3151 pagemask = (~pageoffset); 3152 pageshift = highbit(pagesize) - 1; 3153 3154 /* 3155 * Setup first cookie offset based on pagesize requested. 3156 */ 3157 *paddr_offset = paddr_list[0].p_laddr & pageoffset; 3158 paddr_list[0].p_laddr &= pagemask; 3159 3160 /* Save away the current number of cookies that are passed in */ 3161 current_cookiecnt = *cookiecnt; 3162 3163 /* Perform splitting up of current cookies into pagesize blocks */ 3164 for (i = 0; i < current_cookiecnt; i += i_increment) { 3165 /* 3166 * If the cookie is smaller than pagesize, or already is 3167 * pagesize, then we are already within our limits, so we skip 3168 * it. 3169 */ 3170 if (paddr_list[i].p_size <= pagesize) { 3171 i_increment = 1; 3172 continue; 3173 } 3174 3175 /* 3176 * If this is our first cookie, then we have to deal with the 3177 * offset that may be present in the first address. So add 3178 * that to our size, to calculate potential change to the last 3179 * cookie's size. 3180 * 3181 * Also, calculate the number of cookies that we'll need to 3182 * split up this block into. 3183 */ 3184 if (i == 0) { 3185 last_size = (paddr_list[i].p_size + *paddr_offset) & 3186 pageoffset; 3187 cookies_needed = (paddr_list[i].p_size + 3188 *paddr_offset) >> pageshift; 3189 } else { 3190 last_size = 0; 3191 cookies_needed = paddr_list[i].p_size >> pageshift; 3192 } 3193 3194 /* 3195 * If our size is not a multiple of pagesize, we need one more 3196 * cookie. 3197 */ 3198 if (last_size) { 3199 extra_cookie = 1; 3200 } else { 3201 extra_cookie = 0; 3202 } 3203 3204 /* 3205 * Split cookie into pagesize chunks, shifting list of cookies 3206 * down, using more cookie slots in the PBL if necessary. 3207 */ 3208 status = hermon_dma_cookie_shift(paddr_list, i, list_len, 3209 current_cookiecnt - i, cookies_needed + extra_cookie); 3210 if (status != 0) { 3211 return (status); 3212 } 3213 3214 /* 3215 * If the very first cookie, we must take possible offset into 3216 * account. 3217 */ 3218 if (i == 0) { 3219 paddr_list[i].p_size = pagesize - *paddr_offset; 3220 } else { 3221 paddr_list[i].p_size = pagesize; 3222 } 3223 3224 /* 3225 * We have shifted the existing cookies down the PBL, now fill 3226 * in the blank entries by splitting up our current block. 3227 */ 3228 for (k = 1; k < cookies_needed; k++) { 3229 paddr_list[i + k].p_laddr = 3230 paddr_list[i + k - 1].p_laddr + pagesize; 3231 paddr_list[i + k].p_size = pagesize; 3232 } 3233 3234 /* If we have one extra cookie (of less than pagesize...) */ 3235 if (extra_cookie) { 3236 paddr_list[i + k].p_laddr = 3237 paddr_list[i + k - 1].p_laddr + pagesize; 3238 paddr_list[i + k].p_size = (size_t)last_size; 3239 } 3240 3241 /* Increment cookiecnt appropriately based on cookies used */ 3242 i_increment = cookies_needed + extra_cookie; 3243 current_cookiecnt += i_increment - 1; 3244 } 3245 3246 /* Update to new cookie count */ 3247 *cookiecnt = current_cookiecnt; 3248 return (DDI_SUCCESS); 3249 } 3250 3251 /* 3252 * hermon_dma_cookie_shift() 3253 * Context: Can be called from interrupt or base context. 3254 */ 3255 int 3256 hermon_dma_cookie_shift(ibt_phys_buf_t *paddr_list, int start, int end, 3257 int cookiecnt, int num_shift) 3258 { 3259 int shift_start; 3260 int i; 3261 3262 /* Calculating starting point in the PBL list */ 3263 shift_start = start + cookiecnt - 1; 3264 3265 /* Check if we're at the end of our PBL list */ 3266 if ((shift_start + num_shift - 1) >= end) { 3267 return (IBT_PBL_TOO_SMALL); 3268 } 3269 3270 for (i = shift_start; i > start; i--) { 3271 paddr_list[i + num_shift - 1] = paddr_list[i]; 3272 } 3273 3274 return (DDI_SUCCESS); 3275 } 3276 3277 3278 /* 3279 * hermon_free_dma_cookies() 3280 * Context: Can be called from interrupt or base context. 3281 */ 3282 int 3283 hermon_free_dma_cookies(ibc_ma_hdl_t ma_hdl) 3284 { 3285 ddi_dma_handle_t dma_hdl; 3286 int status; 3287 3288 dma_hdl = (ddi_dma_handle_t)ma_hdl; 3289 3290 status = ddi_dma_unbind_handle(dma_hdl); 3291 if (status != DDI_SUCCESS) { 3292 return (ibc_get_ci_failure(0)); 3293 } 3294 ddi_dma_free_handle(&dma_hdl); 3295 3296 return (DDI_SUCCESS); 3297 } 3298