1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * hermon_misc.c 29 * Hermon Miscellaneous routines - Address Handle, Multicast, Protection 30 * Domain, and port-related operations 31 * 32 * Implements all the routines necessary for allocating, freeing, querying 33 * and modifying Address Handles and Protection Domains. Also implements 34 * all the routines necessary for adding and removing Queue Pairs to/from 35 * Multicast Groups. Lastly, it implements the routines necessary for 36 * port-related query and modify operations. 37 */ 38 39 #include <sys/types.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/bitmap.h> 45 #include <sys/sysmacros.h> 46 47 #include <sys/ib/adapters/hermon/hermon.h> 48 49 extern uint32_t hermon_kernel_data_ro; 50 51 /* used for helping uniquify fmr pool taskq name */ 52 static uint_t hermon_debug_fmrpool_cnt = 0x00000000; 53 54 static int hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg, 55 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, uint_t *qp_found); 56 static int hermon_mcg_qplist_remove(hermon_mcghdl_t mcg, 57 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp); 58 static void hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp); 59 static void hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp); 60 static uint_t hermon_mcg_walk_mgid_hash(hermon_state_t *state, 61 uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx); 62 static void hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, 63 hermon_hw_mcg_t *mcg_hdr, ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc); 64 static int hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx, 65 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry); 66 static int hermon_mcg_entry_invalidate(hermon_state_t *state, 67 hermon_hw_mcg_t *mcg_entry, uint_t indx); 68 static int hermon_mgid_is_valid(ib_gid_t gid); 69 static int hermon_mlid_is_valid(ib_lid_t lid); 70 static void hermon_fmr_processing(void *fmr_args); 71 static int hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t pool); 72 static void hermon_fmr_cache_init(hermon_fmrhdl_t fmr); 73 static void hermon_fmr_cache_fini(hermon_fmrhdl_t fmr); 74 static int hermon_fmr_avl_compare(const void *q, const void *e); 75 76 77 #define HERMON_MAX_DBR_PAGES_PER_USER 64 78 #define HERMON_DBR_KEY(index, page) \ 79 (((uint64_t)index) * HERMON_MAX_DBR_PAGES_PER_USER + (page)) 80 81 static hermon_udbr_page_t * 82 hermon_dbr_new_user_page(hermon_state_t *state, uint_t index, 83 uint_t page) 84 { 85 hermon_udbr_page_t *pagep; 86 ddi_dma_attr_t dma_attr; 87 uint_t cookiecnt; 88 int i, status; 89 uint64_t *p; 90 hermon_umap_db_entry_t *umapdb; 91 92 pagep = kmem_alloc(sizeof (*pagep), KM_SLEEP); 93 pagep->upg_index = page; 94 pagep->upg_nfree = PAGESIZE / sizeof (hermon_dbr_t); 95 pagep->upg_firstfree = 0; 96 pagep->upg_kvaddr = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP, 97 &pagep->upg_umemcookie); /* not HERMON_PAGESIZE here */ 98 99 /* link free entries */ 100 p = (uint64_t *)(void *)pagep->upg_kvaddr; 101 for (i = pagep->upg_firstfree; i < pagep->upg_nfree; i++) 102 p[i] = i + 1; 103 pagep->upg_buf = ddi_umem_iosetup(pagep->upg_umemcookie, 0, 104 PAGESIZE, B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 105 106 hermon_dma_attr_init(state, &dma_attr); 107 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 108 DDI_DMA_SLEEP, NULL, &pagep->upg_dmahdl); 109 if (status != DDI_SUCCESS) { 110 IBTF_DPRINTF_L2("hermon", "hermon_new_user_page: " 111 "ddi_dma_buf_bind_handle failed: %d", status); 112 return (NULL); 113 } 114 status = ddi_dma_buf_bind_handle(pagep->upg_dmahdl, 115 pagep->upg_buf, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 116 DDI_DMA_SLEEP, NULL, &pagep->upg_dmacookie, &cookiecnt); 117 if (status != DDI_SUCCESS) { 118 IBTF_DPRINTF_L2("hermon", "hermon_dbr_new_user_page: " 119 "ddi_dma_buf_bind_handle failed: %d", status); 120 ddi_dma_free_handle(&pagep->upg_dmahdl); 121 return (NULL); 122 } 123 ASSERT(cookiecnt == 1); 124 125 /* create db entry for mmap */ 126 umapdb = hermon_umap_db_alloc(state->hs_instance, 127 HERMON_DBR_KEY(index, page), MLNX_UMAP_DBRMEM_RSRC, 128 (uint64_t)(uintptr_t)pagep); 129 hermon_umap_db_add(umapdb); 130 return (pagep); 131 } 132 133 134 /*ARGSUSED*/ 135 static int 136 hermon_user_dbr_alloc(hermon_state_t *state, uint_t index, 137 ddi_acc_handle_t *acchdl, hermon_dbr_t **vdbr, uint64_t *pdbr, 138 uint64_t *mapoffset) 139 { 140 hermon_user_dbr_t *udbr; 141 hermon_udbr_page_t *pagep; 142 uint_t next_page; 143 int j; 144 145 mutex_enter(&state->hs_dbr_lock); 146 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link) 147 if (udbr->udbr_index == index) 148 break; 149 if (udbr == NULL) { 150 udbr = kmem_alloc(sizeof (*udbr), KM_SLEEP); 151 udbr->udbr_link = state->hs_user_dbr; 152 state->hs_user_dbr = udbr; 153 udbr->udbr_index = index; 154 udbr->udbr_pagep = NULL; 155 } 156 pagep = udbr->udbr_pagep; 157 next_page = (pagep == NULL) ? 0 : (pagep->upg_index + 1); 158 while (pagep != NULL) 159 if (pagep->upg_nfree > 0) 160 break; 161 else 162 pagep = pagep->upg_link; 163 if (pagep == NULL) { 164 pagep = hermon_dbr_new_user_page(state, index, next_page); 165 if (pagep == NULL) { 166 mutex_exit(&state->hs_dbr_lock); 167 return (DDI_FAILURE); 168 } 169 pagep->upg_link = udbr->udbr_pagep; 170 udbr->udbr_pagep = pagep; 171 } 172 j = pagep->upg_firstfree; /* index within page */ 173 pagep->upg_firstfree = ((uint64_t *)(void *)pagep->upg_kvaddr)[j]; 174 pagep->upg_nfree--; 175 ((uint64_t *)(void *)pagep->upg_kvaddr)[j] = 0; /* clear dbr */ 176 *mapoffset = ((HERMON_DBR_KEY(index, pagep->upg_index) << 177 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_DBRMEM_RSRC) << PAGESHIFT; 178 *vdbr = (hermon_dbr_t *)((uint64_t *)(void *)pagep->upg_kvaddr + j); 179 *pdbr = pagep->upg_dmacookie.dmac_laddress + j * sizeof (uint64_t); 180 181 mutex_exit(&state->hs_dbr_lock); 182 return (DDI_SUCCESS); 183 } 184 185 static void 186 hermon_user_dbr_free(hermon_state_t *state, uint_t index, hermon_dbr_t *record) 187 { 188 hermon_user_dbr_t *udbr; 189 hermon_udbr_page_t *pagep; 190 caddr_t kvaddr; 191 uint_t dbr_index; 192 uint_t max_free = PAGESIZE / sizeof (hermon_dbr_t); 193 194 dbr_index = (uintptr_t)record & PAGEOFFSET; /* offset (not yet index) */ 195 kvaddr = (caddr_t)record - dbr_index; 196 dbr_index /= sizeof (hermon_dbr_t); /* now it's the index */ 197 198 mutex_enter(&state->hs_dbr_lock); 199 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link) 200 if (udbr->udbr_index == index) 201 break; 202 if (udbr == NULL) { 203 IBTF_DPRINTF_L2("hermon", "free user dbr: udbr struct not " 204 "found for index %x", index); 205 mutex_exit(&state->hs_dbr_lock); 206 return; 207 } 208 for (pagep = udbr->udbr_pagep; pagep != NULL; pagep = pagep->upg_link) 209 if (pagep->upg_kvaddr == kvaddr) 210 break; 211 if (pagep == NULL) { 212 IBTF_DPRINTF_L2("hermon", "free user dbr: pagep struct not" 213 " found for index %x, kvaddr %p, DBR index %x", 214 index, kvaddr, dbr_index); 215 mutex_exit(&state->hs_dbr_lock); 216 return; 217 } 218 if (pagep->upg_nfree >= max_free) { 219 IBTF_DPRINTF_L2("hermon", "free user dbr: overflow: " 220 "UCE index %x, DBR index %x", index, dbr_index); 221 mutex_exit(&state->hs_dbr_lock); 222 return; 223 } 224 ASSERT(dbr_index < max_free); 225 ((uint64_t *)(void *)kvaddr)[dbr_index] = pagep->upg_firstfree; 226 pagep->upg_firstfree = dbr_index; 227 pagep->upg_nfree++; 228 mutex_exit(&state->hs_dbr_lock); 229 230 /* XXX still need to unlink and free struct */ 231 /* XXX munmap needs to be managed */ 232 } 233 234 /* 235 * hermon_dbr_page_alloc() 236 * first page allocation - called from attach or open 237 * in this case, we want exactly one page per call, and aligned on a 238 * page - and may need to be mapped to the user for access 239 */ 240 241 int 242 hermon_dbr_page_alloc(hermon_state_t *state, hermon_dbr_info_t **dinfo) 243 { 244 int status; 245 ddi_dma_handle_t dma_hdl; 246 ddi_acc_handle_t acc_hdl; 247 ddi_dma_attr_t dma_attr; 248 ddi_dma_cookie_t cookie; 249 uint_t cookie_cnt; 250 hermon_dbr_header_t *pagehdr; 251 int i; 252 hermon_dbr_info_t *info; 253 uint64_t dmaaddr; 254 uint64_t dmalen; 255 256 info = kmem_zalloc(sizeof (hermon_dbr_info_t), KM_SLEEP); 257 258 /* 259 * Initialize many of the default DMA attributes. Then set additional 260 * alignment restrictions if necessary for the dbr memory, meaning 261 * page aligned. Also use the configured value for IOMMU bypass 262 */ 263 hermon_dma_attr_init(state, &dma_attr); 264 dma_attr.dma_attr_align = PAGESIZE; 265 dma_attr.dma_attr_sgllen = 1; /* make sure only one cookie */ 266 267 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 268 DDI_DMA_SLEEP, NULL, &dma_hdl); 269 if (status != DDI_SUCCESS) { 270 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 271 cmn_err(CE_NOTE, "dbr DMA handle alloc failed\n"); 272 return (DDI_FAILURE); 273 } 274 275 status = ddi_dma_mem_alloc(dma_hdl, PAGESIZE, 276 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 277 NULL, (caddr_t *)&dmaaddr, (size_t *)&dmalen, &acc_hdl); 278 if (status != DDI_SUCCESS) { 279 ddi_dma_free_handle(&dma_hdl); 280 cmn_err(CE_CONT, "dbr DMA mem alloc failed(status %d)", status); 281 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 282 return (DDI_FAILURE); 283 } 284 285 /* this memory won't be IB registered, so do the bind here */ 286 status = ddi_dma_addr_bind_handle(dma_hdl, NULL, 287 (caddr_t)(uintptr_t)dmaaddr, (size_t)dmalen, DDI_DMA_RDWR | 288 DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt); 289 if (status != DDI_SUCCESS) { 290 ddi_dma_mem_free(&acc_hdl); 291 ddi_dma_free_handle(&dma_hdl); 292 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 293 cmn_err(CE_CONT, "dbr DMA bind handle failed (status %d)", 294 status); 295 return (DDI_FAILURE); 296 } 297 *dinfo = info; /* Pass back the pointer */ 298 299 /* init the info structure with returned info */ 300 info->dbr_dmahdl = dma_hdl; 301 info->dbr_acchdl = acc_hdl; 302 info->dbr_page = (caddr_t)(uintptr_t)dmaaddr; 303 /* extract the phys addr from the cookie */ 304 info->dbr_paddr = cookie.dmac_laddress; 305 /* should have everything now, so do the init of the header */ 306 pagehdr = (hermon_dbr_header_t *)(void *)info->dbr_page; 307 pagehdr->next = 0; 308 pagehdr->firstfree = 0; 309 pagehdr->nfree = HERMON_NUM_DBR_PER_PAGE; 310 pagehdr->dbr_info = info; 311 /* link all DBrs onto the free list */ 312 for (i = 0; i < HERMON_NUM_DBR_PER_PAGE; i++) { 313 pagehdr->dbr[i] = i + 1; 314 } 315 316 return (DDI_SUCCESS); 317 } 318 319 320 /* 321 * hermon_dbr_alloc() 322 * DBr record allocation - called from alloc cq/qp/srq 323 * will check for available dbrs in current 324 * page - if needed it will allocate another and link them 325 */ 326 327 int 328 hermon_dbr_alloc(hermon_state_t *state, uint_t index, ddi_acc_handle_t *acchdl, 329 hermon_dbr_t **vdbr, uint64_t *pdbr, uint64_t *mapoffset) 330 { 331 hermon_dbr_header_t *pagehdr, *lastpage; 332 hermon_dbr_t *record = NULL; 333 hermon_dbr_info_t *dinfo = NULL; 334 int status; 335 336 if (index != state->hs_kernel_uar_index) 337 return (hermon_user_dbr_alloc(state, index, acchdl, vdbr, pdbr, 338 mapoffset)); 339 340 mutex_enter(&state->hs_dbr_lock); 341 /* 'pagehdr' holds pointer to first page */ 342 pagehdr = (hermon_dbr_header_t *)(void *)state->hs_kern_dbr; 343 do { 344 lastpage = pagehdr; /* save pagehdr for later linking */ 345 if (pagehdr->nfree == 0) { 346 pagehdr = (hermon_dbr_header_t *)(void *)pagehdr->next; 347 continue; /* page is full, go to next if there is one */ 348 } 349 dinfo = pagehdr->dbr_info; 350 break; /* found a page w/ one available */ 351 } while (pagehdr != 0); 352 353 if (dinfo == NULL) { /* did NOT find a page with one available */ 354 status = hermon_dbr_page_alloc(state, &dinfo); 355 if (status != DDI_SUCCESS) { 356 /* do error handling */ 357 mutex_exit(&state->hs_dbr_lock); 358 return (DDI_FAILURE); 359 } 360 /* got a new page, so link it in. */ 361 pagehdr = (hermon_dbr_header_t *)(void *)dinfo->dbr_page; 362 lastpage->next = pagehdr; 363 } 364 record = pagehdr->dbr + pagehdr->firstfree; 365 pagehdr->firstfree = *record; 366 pagehdr->nfree--; 367 *record = 0; 368 369 *acchdl = dinfo->dbr_acchdl; 370 *vdbr = record; 371 *pdbr = ((uintptr_t)record - (uintptr_t)pagehdr + dinfo->dbr_paddr); 372 mutex_exit(&state->hs_dbr_lock); 373 return (DDI_SUCCESS); 374 } 375 376 /* 377 * hermon_dbr_free() 378 * DBr record deallocation - called from free cq/qp 379 * will update the counter in the header, and invalidate 380 * the dbr, but will NEVER free pages of dbrs - small 381 * price to pay, but userland access never will anyway 382 */ 383 384 void 385 hermon_dbr_free(hermon_state_t *state, uint_t indx, hermon_dbr_t *record) 386 { 387 hermon_dbr_header_t *pagehdr; 388 389 if (indx != state->hs_kernel_uar_index) { 390 hermon_user_dbr_free(state, indx, record); 391 return; 392 } 393 mutex_enter(&state->hs_dbr_lock); 394 pagehdr = (hermon_dbr_header_t *)((uintptr_t)record & 395 (uintptr_t)PAGEMASK); 396 *record = pagehdr->firstfree; 397 pagehdr->firstfree = record - pagehdr->dbr; 398 pagehdr->nfree++; /* decr the count for this one */ 399 mutex_exit(&state->hs_dbr_lock); 400 } 401 402 /* 403 * hermon_dbr_kern_free() 404 * Context: Can be called only from detach context. 405 * 406 * Free all kernel dbr pages. This includes the freeing of all the dma 407 * resources acquired during the allocation of the pages. 408 * 409 * Also, free all the user dbr pages. 410 */ 411 void 412 hermon_dbr_kern_free(hermon_state_t *state) 413 { 414 hermon_dbr_header_t *pagehdr, *lastpage; 415 hermon_dbr_info_t *dinfo; 416 hermon_user_dbr_t *udbr, *next; 417 hermon_udbr_page_t *pagep, *nextp; 418 hermon_umap_db_entry_t *umapdb; 419 int instance, status; 420 uint64_t value; 421 extern hermon_umap_db_t hermon_userland_rsrc_db; 422 423 mutex_enter(&state->hs_dbr_lock); 424 pagehdr = (hermon_dbr_header_t *)(void *)state->hs_kern_dbr; 425 while (pagehdr != NULL) { 426 lastpage = (hermon_dbr_header_t *)(void *)pagehdr->next; 427 dinfo = pagehdr->dbr_info; 428 (void) ddi_dma_unbind_handle(dinfo->dbr_dmahdl); 429 ddi_dma_mem_free(&dinfo->dbr_acchdl); /* free page */ 430 ddi_dma_free_handle(&dinfo->dbr_dmahdl); 431 kmem_free(dinfo, sizeof (hermon_dbr_info_t)); 432 pagehdr = lastpage; 433 } 434 435 udbr = state->hs_user_dbr; 436 instance = state->hs_instance; 437 mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock); 438 while (udbr != NULL) { 439 pagep = udbr->udbr_pagep; 440 while (pagep != NULL) { 441 /* probably need to remove "db" */ 442 (void) ddi_dma_unbind_handle(pagep->upg_dmahdl); 443 ddi_dma_free_handle(&pagep->upg_dmahdl); 444 freerbuf(pagep->upg_buf); 445 ddi_umem_free(pagep->upg_umemcookie); 446 status = hermon_umap_db_find_nolock(instance, 447 HERMON_DBR_KEY(udbr->udbr_index, 448 pagep->upg_index), MLNX_UMAP_DBRMEM_RSRC, 449 &value, HERMON_UMAP_DB_REMOVE, &umapdb); 450 if (status == DDI_SUCCESS) 451 hermon_umap_db_free(umapdb); 452 nextp = pagep->upg_link; 453 kmem_free(pagep, sizeof (*pagep)); 454 pagep = nextp; 455 } 456 next = udbr->udbr_link; 457 kmem_free(udbr, sizeof (*udbr)); 458 udbr = next; 459 } 460 mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock); 461 mutex_exit(&state->hs_dbr_lock); 462 } 463 464 /* 465 * hermon_ah_alloc() 466 * Context: Can be called only from user or kernel context. 467 */ 468 int 469 hermon_ah_alloc(hermon_state_t *state, hermon_pdhdl_t pd, 470 ibt_adds_vect_t *attr_p, hermon_ahhdl_t *ahhdl, uint_t sleepflag) 471 { 472 hermon_rsrc_t *rsrc; 473 hermon_hw_udav_t *udav; 474 hermon_ahhdl_t ah; 475 int status; 476 477 /* 478 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to 479 * indicate that we wish to allocate an "invalid" (i.e. empty) 480 * address handle XXX 481 */ 482 483 /* Validate that specified port number is legal */ 484 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) { 485 return (IBT_HCA_PORT_INVALID); 486 } 487 488 /* 489 * Allocate the software structure for tracking the address handle 490 * (i.e. the Hermon Address Handle struct). 491 */ 492 status = hermon_rsrc_alloc(state, HERMON_AHHDL, 1, sleepflag, &rsrc); 493 if (status != DDI_SUCCESS) { 494 return (IBT_INSUFF_RESOURCE); 495 } 496 ah = (hermon_ahhdl_t)rsrc->hr_addr; 497 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 498 499 /* Increment the reference count on the protection domain (PD) */ 500 hermon_pd_refcnt_inc(pd); 501 502 udav = (hermon_hw_udav_t *)kmem_zalloc(sizeof (hermon_hw_udav_t), 503 KM_SLEEP); 504 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav)) 505 506 /* 507 * Fill in the UDAV data. We first zero out the UDAV, then populate 508 * it by then calling hermon_set_addr_path() to fill in the common 509 * portions that can be pulled from the "ibt_adds_vect_t" passed in 510 */ 511 status = hermon_set_addr_path(state, attr_p, 512 (hermon_hw_addr_path_t *)udav, HERMON_ADDRPATH_UDAV); 513 if (status != DDI_SUCCESS) { 514 hermon_pd_refcnt_dec(pd); 515 hermon_rsrc_free(state, &rsrc); 516 return (status); 517 } 518 udav->pd = pd->pd_pdnum; 519 udav->sl = attr_p->av_srvl; 520 521 /* 522 * Fill in the rest of the Hermon Address Handle struct. 523 * 524 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field 525 * here because we may need to return it later to the IBTF (as a 526 * result of a subsequent query operation). Unlike the other UDAV 527 * parameters, the value of "av_dgid.gid_guid" is not always preserved. 528 * The reason for this is described in hermon_set_addr_path(). 529 */ 530 ah->ah_rsrcp = rsrc; 531 ah->ah_pdhdl = pd; 532 ah->ah_udav = udav; 533 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 534 *ahhdl = ah; 535 536 return (DDI_SUCCESS); 537 } 538 539 540 /* 541 * hermon_ah_free() 542 * Context: Can be called only from user or kernel context. 543 */ 544 /* ARGSUSED */ 545 int 546 hermon_ah_free(hermon_state_t *state, hermon_ahhdl_t *ahhdl, uint_t sleepflag) 547 { 548 hermon_rsrc_t *rsrc; 549 hermon_pdhdl_t pd; 550 hermon_ahhdl_t ah; 551 552 /* 553 * Pull all the necessary information from the Hermon Address Handle 554 * struct. This is necessary here because the resource for the 555 * AH is going to be freed up as part of this operation. 556 */ 557 ah = *ahhdl; 558 mutex_enter(&ah->ah_lock); 559 rsrc = ah->ah_rsrcp; 560 pd = ah->ah_pdhdl; 561 mutex_exit(&ah->ah_lock); 562 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 563 564 /* Free the UDAV memory */ 565 kmem_free(ah->ah_udav, sizeof (hermon_hw_udav_t)); 566 567 /* Decrement the reference count on the protection domain (PD) */ 568 hermon_pd_refcnt_dec(pd); 569 570 /* Free the Hermon Address Handle structure */ 571 hermon_rsrc_free(state, &rsrc); 572 573 /* Set the ahhdl pointer to NULL and return success */ 574 *ahhdl = NULL; 575 576 return (DDI_SUCCESS); 577 } 578 579 580 /* 581 * hermon_ah_query() 582 * Context: Can be called from interrupt or base context. 583 */ 584 /* ARGSUSED */ 585 int 586 hermon_ah_query(hermon_state_t *state, hermon_ahhdl_t ah, hermon_pdhdl_t *pd, 587 ibt_adds_vect_t *attr_p) 588 { 589 mutex_enter(&ah->ah_lock); 590 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p)) 591 592 /* 593 * Pull the PD and UDAV from the Hermon Address Handle structure 594 */ 595 *pd = ah->ah_pdhdl; 596 597 /* 598 * Fill in "ibt_adds_vect_t". We call hermon_get_addr_path() to fill 599 * the common portions that can be pulled from the UDAV we pass in. 600 * 601 * NOTE: We will also fill the "av_dgid.gid_guid" field from the 602 * "ah_save_guid" field we have previously saved away. The reason 603 * for this is described in hermon_ah_alloc() and hermon_ah_modify(). 604 */ 605 hermon_get_addr_path(state, (hermon_hw_addr_path_t *)ah->ah_udav, 606 attr_p, HERMON_ADDRPATH_UDAV); 607 608 attr_p->av_dgid.gid_guid = ah->ah_save_guid; 609 610 mutex_exit(&ah->ah_lock); 611 return (DDI_SUCCESS); 612 } 613 614 615 /* 616 * hermon_ah_modify() 617 * Context: Can be called from interrupt or base context. 618 */ 619 /* ARGSUSED */ 620 int 621 hermon_ah_modify(hermon_state_t *state, hermon_ahhdl_t ah, 622 ibt_adds_vect_t *attr_p) 623 { 624 hermon_hw_udav_t old_udav; 625 uint64_t data_old; 626 int status, size, i; 627 628 /* Validate that specified port number is legal */ 629 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) { 630 return (IBT_HCA_PORT_INVALID); 631 } 632 633 mutex_enter(&ah->ah_lock); 634 635 /* Save a copy of the current UDAV data in old_udav. */ 636 bcopy(ah->ah_udav, &old_udav, sizeof (hermon_hw_udav_t)); 637 638 /* 639 * Fill in the new UDAV with the caller's data, passed in via the 640 * "ibt_adds_vect_t" structure. 641 * 642 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid" 643 * field here (just as we did during hermon_ah_alloc()) because we 644 * may need to return it later to the IBTF (as a result of a 645 * subsequent query operation). As explained in hermon_ah_alloc(), 646 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid" 647 * is not always preserved. The reason for this is described in 648 * hermon_set_addr_path(). 649 */ 650 status = hermon_set_addr_path(state, attr_p, 651 (hermon_hw_addr_path_t *)ah->ah_udav, HERMON_ADDRPATH_UDAV); 652 if (status != DDI_SUCCESS) { 653 mutex_exit(&ah->ah_lock); 654 return (status); 655 } 656 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 657 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(ah->ah_udav))) 658 ah->ah_udav->sl = attr_p->av_srvl; 659 660 /* 661 * Copy changes into the new UDAV. 662 * Note: We copy in 64-bit chunks. For the first two of these 663 * chunks it is necessary to read the current contents of the 664 * UDAV, mask off the modifiable portions (maintaining any 665 * of the "reserved" portions), and then mask on the new data. 666 */ 667 size = sizeof (hermon_hw_udav_t) >> 3; 668 for (i = 0; i < size; i++) { 669 data_old = ((uint64_t *)&old_udav)[i]; 670 671 /* 672 * Apply mask to change only the relevant values. 673 */ 674 if (i == 0) { 675 data_old = data_old & HERMON_UDAV_MODIFY_MASK0; 676 } else if (i == 1) { 677 data_old = data_old & HERMON_UDAV_MODIFY_MASK1; 678 } else { 679 data_old = 0; 680 } 681 682 /* Store the updated values to the UDAV */ 683 ((uint64_t *)ah->ah_udav)[i] |= data_old; 684 } 685 686 /* 687 * Put the valid PD number back into the UDAV entry, as it 688 * might have been clobbered above. 689 */ 690 ah->ah_udav->pd = old_udav.pd; 691 692 693 mutex_exit(&ah->ah_lock); 694 return (DDI_SUCCESS); 695 } 696 697 /* 698 * hermon_mcg_attach() 699 * Context: Can be called only from user or kernel context. 700 */ 701 int 702 hermon_mcg_attach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid, 703 ib_lid_t lid) 704 { 705 hermon_rsrc_t *rsrc; 706 hermon_hw_mcg_t *mcg_entry; 707 hermon_hw_mcg_qp_list_t *mcg_entry_qplist; 708 hermon_mcghdl_t mcg, newmcg; 709 uint64_t mgid_hash; 710 uint32_t end_indx; 711 int status; 712 uint_t qp_found; 713 714 /* 715 * It is only allowed to attach MCG to UD queue pairs. Verify 716 * that the intended QP is of the appropriate transport type 717 */ 718 if (qp->qp_serv_type != HERMON_QP_UD) { 719 return (IBT_QP_SRV_TYPE_INVALID); 720 } 721 722 /* 723 * Check for invalid Multicast DLID. Specifically, all Multicast 724 * LIDs should be within a well defined range. If the specified LID 725 * is outside of that range, then return an error. 726 */ 727 if (hermon_mlid_is_valid(lid) == 0) { 728 return (IBT_MC_MLID_INVALID); 729 } 730 /* 731 * Check for invalid Multicast GID. All Multicast GIDs should have 732 * a well-defined pattern of bits and flags that are allowable. If 733 * the specified GID does not meet the criteria, then return an error. 734 */ 735 if (hermon_mgid_is_valid(gid) == 0) { 736 return (IBT_MC_MGID_INVALID); 737 } 738 739 /* 740 * Compute the MGID hash value. Since the MCG table is arranged as 741 * a number of separate hash chains, this operation converts the 742 * specified MGID into the starting index of an entry in the hash 743 * table (i.e. the index for the start of the appropriate hash chain). 744 * Subsequent operations below will walk the chain searching for the 745 * right place to add this new QP. 746 */ 747 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 748 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT()); 749 if (status != HERMON_CMD_SUCCESS) { 750 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n", 751 status); 752 if (status == HERMON_CMD_INVALID_STATUS) { 753 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 754 } 755 return (ibc_get_ci_failure(0)); 756 } 757 758 /* 759 * Grab the multicast group mutex. Then grab the pre-allocated 760 * temporary buffer used for holding and/or modifying MCG entries. 761 * Zero out the temporary MCG entry before we begin. 762 */ 763 mutex_enter(&state->hs_mcglock); 764 mcg_entry = state->hs_mcgtmp; 765 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry); 766 bzero(mcg_entry, HERMON_MCGMEM_SZ(state)); 767 768 /* 769 * Walk through the array of MCG entries starting at "mgid_hash". 770 * Try to find the appropriate place for this new QP to be added. 771 * This could happen when the first entry of the chain has MGID == 0 772 * (which means that the hash chain is empty), or because we find 773 * an entry with the same MGID (in which case we'll add the QP to 774 * that MCG), or because we come to the end of the chain (in which 775 * case this is the first QP being added to the multicast group that 776 * corresponds to the MGID. The hermon_mcg_walk_mgid_hash() routine 777 * walks the list and returns an index into the MCG table. The entry 778 * at this index is then checked to determine which case we have 779 * fallen into (see below). Note: We are using the "shadow" MCG 780 * list (of hermon_mcg_t structs) for this lookup because the real 781 * MCG entries are in hardware (and the lookup process would be much 782 * more time consuming). 783 */ 784 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL); 785 mcg = &state->hs_mcghdl[end_indx]; 786 787 /* 788 * If MGID == 0, then the hash chain is empty. Just fill in the 789 * current entry. Note: No need to allocate an MCG table entry 790 * as all the hash chain "heads" are already preallocated. 791 */ 792 if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) { 793 794 /* Fill in the current entry in the "shadow" MCG list */ 795 hermon_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL); 796 797 /* 798 * Try to add the new QP number to the list. This (and the 799 * above) routine fills in a temporary MCG. The "mcg_entry" 800 * and "mcg_entry_qplist" pointers simply point to different 801 * offsets within the same temporary copy of the MCG (for 802 * convenience). Note: If this fails, we need to invalidate 803 * the entries we've already put into the "shadow" list entry 804 * above. 805 */ 806 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 807 &qp_found); 808 if (status != DDI_SUCCESS) { 809 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s)); 810 mutex_exit(&state->hs_mcglock); 811 return (status); 812 } 813 if (!qp_found) 814 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1); 815 /* set the member count */ 816 817 /* 818 * Once the temporary MCG has been filled in, write the entry 819 * into the appropriate location in the Hermon MCG entry table. 820 * If it's successful, then drop the lock and return success. 821 * Note: In general, this operation shouldn't fail. If it 822 * does, then it is an indication that something (probably in 823 * HW, but maybe in SW) has gone seriously wrong. We still 824 * want to zero out the entries that we've filled in above 825 * (in the hermon_mcg_setup_new_hdr() routine). 826 */ 827 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 828 HERMON_CMD_NOSLEEP_SPIN); 829 if (status != HERMON_CMD_SUCCESS) { 830 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s)); 831 mutex_exit(&state->hs_mcglock); 832 HERMON_WARNING(state, "failed to write MCG entry"); 833 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 834 "%08x\n", status); 835 if (status == HERMON_CMD_INVALID_STATUS) { 836 hermon_fm_ereport(state, HCA_SYS_ERR, 837 HCA_ERR_SRV_LOST); 838 } 839 return (ibc_get_ci_failure(0)); 840 } 841 842 /* 843 * Now that we know all the Hermon firmware accesses have been 844 * successful, we update the "shadow" MCG entry by incrementing 845 * the "number of attached QPs" count. 846 * 847 * We increment only if the QP is not already part of the 848 * MCG by checking the 'qp_found' flag returned from the 849 * qplist_add above. 850 */ 851 if (!qp_found) { 852 mcg->mcg_num_qps++; 853 854 /* 855 * Increment the refcnt for this QP. Because the QP 856 * was added to this MCG, the refcnt must be 857 * incremented. 858 */ 859 hermon_qp_mcg_refcnt_inc(qp); 860 } 861 862 /* 863 * We drop the lock and return success. 864 */ 865 mutex_exit(&state->hs_mcglock); 866 return (DDI_SUCCESS); 867 } 868 869 /* 870 * If the specified MGID matches the MGID in the current entry, then 871 * we need to try to add the QP to the current MCG entry. In this 872 * case, it means that we need to read the existing MCG entry (into 873 * the temporary MCG), add the new QP number to the temporary entry 874 * (using the same method we used above), and write the entry back 875 * to the hardware (same as above). 876 */ 877 if ((mcg->mcg_mgid_h == gid.gid_prefix) && 878 (mcg->mcg_mgid_l == gid.gid_guid)) { 879 880 /* 881 * Read the current MCG entry into the temporary MCG. Note: 882 * In general, this operation shouldn't fail. If it does, 883 * then it is an indication that something (probably in HW, 884 * but maybe in SW) has gone seriously wrong. 885 */ 886 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 887 HERMON_CMD_NOSLEEP_SPIN); 888 if (status != HERMON_CMD_SUCCESS) { 889 mutex_exit(&state->hs_mcglock); 890 HERMON_WARNING(state, "failed to read MCG entry"); 891 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: " 892 "%08x\n", status); 893 if (status == HERMON_CMD_INVALID_STATUS) { 894 hermon_fm_ereport(state, HCA_SYS_ERR, 895 HCA_ERR_SRV_LOST); 896 } 897 return (ibc_get_ci_failure(0)); 898 } 899 900 /* 901 * Try to add the new QP number to the list. This routine 902 * fills in the necessary pieces of the temporary MCG. The 903 * "mcg_entry_qplist" pointer is used to point to the portion 904 * of the temporary MCG that holds the QP numbers. 905 * 906 * Note: hermon_mcg_qplist_add() returns SUCCESS if it 907 * already found the QP in the list. In this case, the QP is 908 * not added on to the list again. Check the flag 'qp_found' 909 * if this value is needed to be known. 910 * 911 */ 912 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 913 &qp_found); 914 if (status != DDI_SUCCESS) { 915 mutex_exit(&state->hs_mcglock); 916 return (status); 917 } 918 if (!qp_found) 919 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1); 920 /* set the member count */ 921 922 /* 923 * Once the temporary MCG has been updated, write the entry 924 * into the appropriate location in the Hermon MCG entry table. 925 * If it's successful, then drop the lock and return success. 926 * Note: In general, this operation shouldn't fail. If it 927 * does, then it is an indication that something (probably in 928 * HW, but maybe in SW) has gone seriously wrong. 929 */ 930 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 931 HERMON_CMD_NOSLEEP_SPIN); 932 if (status != HERMON_CMD_SUCCESS) { 933 mutex_exit(&state->hs_mcglock); 934 HERMON_WARNING(state, "failed to write MCG entry"); 935 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 936 "%08x\n", status); 937 if (status == HERMON_CMD_INVALID_STATUS) { 938 hermon_fm_ereport(state, HCA_SYS_ERR, 939 HCA_ERR_SRV_LOST); 940 } 941 return (ibc_get_ci_failure(0)); 942 } 943 944 /* 945 * Now that we know all the Hermon firmware accesses have been 946 * successful, we update the current "shadow" MCG entry by 947 * incrementing the "number of attached QPs" count. 948 * 949 * We increment only if the QP is not already part of the 950 * MCG by checking the 'qp_found' flag returned 951 * hermon_mcg_walk_mgid_hashfrom the qplist_add above. 952 */ 953 if (!qp_found) { 954 mcg->mcg_num_qps++; 955 956 /* 957 * Increment the refcnt for this QP. Because the QP 958 * was added to this MCG, the refcnt must be 959 * incremented. 960 */ 961 hermon_qp_mcg_refcnt_inc(qp); 962 } 963 964 /* 965 * We drop the lock and return success. 966 */ 967 mutex_exit(&state->hs_mcglock); 968 return (DDI_SUCCESS); 969 } 970 971 /* 972 * If we've reached here, then we're at the end of the hash chain. 973 * We need to allocate a new MCG entry, fill it in, write it to Hermon, 974 * and update the previous entry to link the new one to the end of the 975 * chain. 976 */ 977 978 /* 979 * Allocate an MCG table entry. This will be filled in with all 980 * the necessary parameters to define the multicast group. Then it 981 * will be written to the hardware in the next-to-last step below. 982 */ 983 status = hermon_rsrc_alloc(state, HERMON_MCG, 1, HERMON_NOSLEEP, &rsrc); 984 if (status != DDI_SUCCESS) { 985 mutex_exit(&state->hs_mcglock); 986 return (IBT_INSUFF_RESOURCE); 987 } 988 989 /* 990 * Fill in the new entry in the "shadow" MCG list. Note: Just as 991 * it does above, hermon_mcg_setup_new_hdr() also fills in a portion 992 * of the temporary MCG entry (the rest of which will be filled in by 993 * hermon_mcg_qplist_add() below) 994 */ 995 newmcg = &state->hs_mcghdl[rsrc->hr_indx]; 996 hermon_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc); 997 998 /* 999 * Try to add the new QP number to the list. This routine fills in 1000 * the final necessary pieces of the temporary MCG. The 1001 * "mcg_entry_qplist" pointer is used to point to the portion of the 1002 * temporary MCG that holds the QP numbers. If we fail here, we 1003 * must undo the previous resource allocation. 1004 * 1005 * Note: hermon_mcg_qplist_add() can we return SUCCESS if it already 1006 * found the QP in the list. In this case, the QP is not added on to 1007 * the list again. Check the flag 'qp_found' if this value is needed 1008 * to be known. 1009 */ 1010 status = hermon_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp, 1011 &qp_found); 1012 if (status != DDI_SUCCESS) { 1013 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1014 hermon_rsrc_free(state, &rsrc); 1015 mutex_exit(&state->hs_mcglock); 1016 return (status); 1017 } 1018 mcg_entry->member_cnt = (newmcg->mcg_num_qps + 1); 1019 /* set the member count */ 1020 1021 /* 1022 * Once the temporary MCG has been updated, write the entry into the 1023 * appropriate location in the Hermon MCG entry table. If this is 1024 * successful, then we need to chain the previous entry to this one. 1025 * Note: In general, this operation shouldn't fail. If it does, then 1026 * it is an indication that something (probably in HW, but maybe in 1027 * SW) has gone seriously wrong. 1028 */ 1029 status = hermon_write_mgm_cmd_post(state, mcg_entry, rsrc->hr_indx, 1030 HERMON_CMD_NOSLEEP_SPIN); 1031 if (status != HERMON_CMD_SUCCESS) { 1032 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1033 hermon_rsrc_free(state, &rsrc); 1034 mutex_exit(&state->hs_mcglock); 1035 HERMON_WARNING(state, "failed to write MCG entry"); 1036 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1037 status); 1038 if (status == HERMON_CMD_INVALID_STATUS) { 1039 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1040 } 1041 return (ibc_get_ci_failure(0)); 1042 } 1043 1044 /* 1045 * Now read the current MCG entry (the one previously at the end of 1046 * hash chain) into the temporary MCG. We are going to update its 1047 * "next_gid_indx" now and write the entry back to the MCG table. 1048 * Note: In general, this operation shouldn't fail. If it does, then 1049 * it is an indication that something (probably in HW, but maybe in SW) 1050 * has gone seriously wrong. We will free up the MCG entry resource, 1051 * but we will not undo the previously written MCG entry in the HW. 1052 * This is OK, though, because the MCG entry is not currently attached 1053 * to any hash chain. 1054 */ 1055 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 1056 HERMON_CMD_NOSLEEP_SPIN); 1057 if (status != HERMON_CMD_SUCCESS) { 1058 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1059 hermon_rsrc_free(state, &rsrc); 1060 mutex_exit(&state->hs_mcglock); 1061 HERMON_WARNING(state, "failed to read MCG entry"); 1062 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1063 status); 1064 if (status == HERMON_CMD_INVALID_STATUS) { 1065 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1066 } 1067 return (ibc_get_ci_failure(0)); 1068 } 1069 1070 /* 1071 * Finally, we update the "next_gid_indx" field in the temporary MCG 1072 * and attempt to write the entry back into the Hermon MCG table. If 1073 * this succeeds, then we update the "shadow" list to reflect the 1074 * change, drop the lock, and return success. Note: In general, this 1075 * operation shouldn't fail. If it does, then it is an indication 1076 * that something (probably in HW, but maybe in SW) has gone seriously 1077 * wrong. Just as we do above, we will free up the MCG entry resource, 1078 * but we will not try to undo the previously written MCG entry. This 1079 * is OK, though, because (since we failed here to update the end of 1080 * the chain) that other entry is not currently attached to any chain. 1081 */ 1082 mcg_entry->next_gid_indx = rsrc->hr_indx; 1083 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 1084 HERMON_CMD_NOSLEEP_SPIN); 1085 if (status != HERMON_CMD_SUCCESS) { 1086 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1087 hermon_rsrc_free(state, &rsrc); 1088 mutex_exit(&state->hs_mcglock); 1089 HERMON_WARNING(state, "failed to write MCG entry"); 1090 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1091 status); 1092 if (status == HERMON_CMD_INVALID_STATUS) { 1093 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1094 } 1095 return (ibc_get_ci_failure(0)); 1096 } 1097 mcg = &state->hs_mcghdl[end_indx]; 1098 mcg->mcg_next_indx = rsrc->hr_indx; 1099 1100 /* 1101 * Now that we know all the Hermon firmware accesses have been 1102 * successful, we update the new "shadow" MCG entry by incrementing 1103 * the "number of attached QPs" count. Then we drop the lock and 1104 * return success. 1105 */ 1106 newmcg->mcg_num_qps++; 1107 1108 /* 1109 * Increment the refcnt for this QP. Because the QP 1110 * was added to this MCG, the refcnt must be 1111 * incremented. 1112 */ 1113 hermon_qp_mcg_refcnt_inc(qp); 1114 1115 mutex_exit(&state->hs_mcglock); 1116 return (DDI_SUCCESS); 1117 } 1118 1119 1120 /* 1121 * hermon_mcg_detach() 1122 * Context: Can be called only from user or kernel context. 1123 */ 1124 int 1125 hermon_mcg_detach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid, 1126 ib_lid_t lid) 1127 { 1128 hermon_hw_mcg_t *mcg_entry; 1129 hermon_hw_mcg_qp_list_t *mcg_entry_qplist; 1130 hermon_mcghdl_t mcg; 1131 uint64_t mgid_hash; 1132 uint32_t end_indx, prev_indx; 1133 int status; 1134 1135 /* 1136 * Check for invalid Multicast DLID. Specifically, all Multicast 1137 * LIDs should be within a well defined range. If the specified LID 1138 * is outside of that range, then return an error. 1139 */ 1140 if (hermon_mlid_is_valid(lid) == 0) { 1141 return (IBT_MC_MLID_INVALID); 1142 } 1143 1144 /* 1145 * Compute the MGID hash value. As described above, the MCG table is 1146 * arranged as a number of separate hash chains. This operation 1147 * converts the specified MGID into the starting index of an entry in 1148 * the hash table (i.e. the index for the start of the appropriate 1149 * hash chain). Subsequent operations below will walk the chain 1150 * searching for a matching entry from which to attempt to remove 1151 * the specified QP. 1152 */ 1153 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 1154 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT()); 1155 if (status != HERMON_CMD_SUCCESS) { 1156 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n", 1157 status); 1158 if (status == HERMON_CMD_INVALID_STATUS) { 1159 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1160 } 1161 return (ibc_get_ci_failure(0)); 1162 } 1163 1164 /* 1165 * Grab the multicast group mutex. Then grab the pre-allocated 1166 * temporary buffer used for holding and/or modifying MCG entries. 1167 */ 1168 mutex_enter(&state->hs_mcglock); 1169 mcg_entry = state->hs_mcgtmp; 1170 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry); 1171 1172 /* 1173 * Walk through the array of MCG entries starting at "mgid_hash". 1174 * Try to find an MCG entry with a matching MGID. The 1175 * hermon_mcg_walk_mgid_hash() routine walks the list and returns an 1176 * index into the MCG table. The entry at this index is checked to 1177 * determine whether it is a match or not. If it is a match, then 1178 * we continue on to attempt to remove the QP from the MCG. If it 1179 * is not a match (or not a valid MCG entry), then we return an error. 1180 */ 1181 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx); 1182 mcg = &state->hs_mcghdl[end_indx]; 1183 1184 /* 1185 * If MGID == 0 (the hash chain is empty) or if the specified MGID 1186 * does not match the MGID in the current entry, then return 1187 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not 1188 * valid). 1189 */ 1190 if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) || 1191 ((mcg->mcg_mgid_h != gid.gid_prefix) || 1192 (mcg->mcg_mgid_l != gid.gid_guid))) { 1193 mutex_exit(&state->hs_mcglock); 1194 return (IBT_MC_MGID_INVALID); 1195 } 1196 1197 /* 1198 * Read the current MCG entry into the temporary MCG. Note: In 1199 * general, this operation shouldn't fail. If it does, then it is 1200 * an indication that something (probably in HW, but maybe in SW) 1201 * has gone seriously wrong. 1202 */ 1203 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 1204 HERMON_CMD_NOSLEEP_SPIN); 1205 if (status != HERMON_CMD_SUCCESS) { 1206 mutex_exit(&state->hs_mcglock); 1207 HERMON_WARNING(state, "failed to read MCG entry"); 1208 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1209 status); 1210 if (status == HERMON_CMD_INVALID_STATUS) { 1211 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1212 } 1213 return (ibc_get_ci_failure(0)); 1214 } 1215 1216 /* 1217 * Search the QP number list for a match. If a match is found, then 1218 * remove the entry from the QP list. Otherwise, if no match is found, 1219 * return an error. 1220 */ 1221 status = hermon_mcg_qplist_remove(mcg, mcg_entry_qplist, qp); 1222 if (status != DDI_SUCCESS) { 1223 mutex_exit(&state->hs_mcglock); 1224 return (status); 1225 } 1226 1227 /* 1228 * Decrement the MCG count for this QP. When the 'qp_mcg' 1229 * field becomes 0, then this QP is no longer a member of any 1230 * MCG. 1231 */ 1232 hermon_qp_mcg_refcnt_dec(qp); 1233 1234 /* 1235 * If the current MCG's QP number list is about to be made empty 1236 * ("mcg_num_qps" == 1), then remove the entry itself from the hash 1237 * chain. Otherwise, just write the updated MCG entry back to the 1238 * hardware. In either case, once we successfully update the hardware 1239 * chain, then we decrement the "shadow" list entry's "mcg_num_qps" 1240 * count (or zero out the entire "shadow" list entry) before returning 1241 * success. Note: Zeroing out the "shadow" list entry is done 1242 * inside of hermon_mcg_hash_list_remove(). 1243 */ 1244 if (mcg->mcg_num_qps == 1) { 1245 1246 /* Remove an MCG entry from the hash chain */ 1247 status = hermon_mcg_hash_list_remove(state, end_indx, prev_indx, 1248 mcg_entry); 1249 if (status != DDI_SUCCESS) { 1250 mutex_exit(&state->hs_mcglock); 1251 return (status); 1252 } 1253 1254 } else { 1255 /* 1256 * Write the updated MCG entry back to the Hermon MCG table. 1257 * If this succeeds, then we update the "shadow" list to 1258 * reflect the change (i.e. decrement the "mcg_num_qps"), 1259 * drop the lock, and return success. Note: In general, 1260 * this operation shouldn't fail. If it does, then it is an 1261 * indication that something (probably in HW, but maybe in SW) 1262 * has gone seriously wrong. 1263 */ 1264 mcg_entry->member_cnt = (mcg->mcg_num_qps - 1); 1265 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 1266 HERMON_CMD_NOSLEEP_SPIN); 1267 if (status != HERMON_CMD_SUCCESS) { 1268 mutex_exit(&state->hs_mcglock); 1269 HERMON_WARNING(state, "failed to write MCG entry"); 1270 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 1271 "%08x\n", status); 1272 if (status == HERMON_CMD_INVALID_STATUS) { 1273 hermon_fm_ereport(state, HCA_SYS_ERR, 1274 HCA_ERR_SRV_LOST); 1275 } 1276 return (ibc_get_ci_failure(0)); 1277 } 1278 mcg->mcg_num_qps--; 1279 } 1280 1281 mutex_exit(&state->hs_mcglock); 1282 return (DDI_SUCCESS); 1283 } 1284 1285 /* 1286 * hermon_qp_mcg_refcnt_inc() 1287 * Context: Can be called from interrupt or base context. 1288 */ 1289 static void 1290 hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp) 1291 { 1292 /* Increment the QP's MCG reference count */ 1293 mutex_enter(&qp->qp_lock); 1294 qp->qp_mcg_refcnt++; 1295 mutex_exit(&qp->qp_lock); 1296 } 1297 1298 1299 /* 1300 * hermon_qp_mcg_refcnt_dec() 1301 * Context: Can be called from interrupt or base context. 1302 */ 1303 static void 1304 hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp) 1305 { 1306 /* Decrement the QP's MCG reference count */ 1307 mutex_enter(&qp->qp_lock); 1308 qp->qp_mcg_refcnt--; 1309 mutex_exit(&qp->qp_lock); 1310 } 1311 1312 1313 /* 1314 * hermon_mcg_qplist_add() 1315 * Context: Can be called from interrupt or base context. 1316 */ 1317 static int 1318 hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg, 1319 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, 1320 uint_t *qp_found) 1321 { 1322 uint_t qplist_indx; 1323 1324 ASSERT(MUTEX_HELD(&state->hs_mcglock)); 1325 1326 qplist_indx = mcg->mcg_num_qps; 1327 1328 /* 1329 * Determine if we have exceeded the maximum number of QP per 1330 * multicast group. If we have, then return an error 1331 */ 1332 if (qplist_indx >= state->hs_cfg_profile->cp_num_qp_per_mcg) { 1333 return (IBT_HCA_MCG_QP_EXCEEDED); 1334 } 1335 1336 /* 1337 * Determine if the QP is already attached to this MCG table. If it 1338 * is, then we break out and treat this operation as a NO-OP 1339 */ 1340 for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps; 1341 qplist_indx++) { 1342 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) { 1343 break; 1344 } 1345 } 1346 1347 /* 1348 * If the QP was already on the list, set 'qp_found' to TRUE. We still 1349 * return SUCCESS in this case, but the qplist will not have been 1350 * updated because the QP was already on the list. 1351 */ 1352 if (qplist_indx < mcg->mcg_num_qps) { 1353 *qp_found = 1; 1354 } else { 1355 /* 1356 * Otherwise, append the new QP number to the end of the 1357 * current QP list. Note: We will increment the "mcg_num_qps" 1358 * field on the "shadow" MCG list entry later (after we know 1359 * that all necessary Hermon firmware accesses have been 1360 * successful). 1361 * 1362 * Set 'qp_found' to 0 so we know the QP was added on to the 1363 * list for sure. 1364 */ 1365 mcg_qplist[qplist_indx].qpn = 1366 (qp->qp_qpnum | HERMON_MCG_QPN_BLOCK_LB); 1367 *qp_found = 0; 1368 } 1369 1370 return (DDI_SUCCESS); 1371 } 1372 1373 1374 1375 /* 1376 * hermon_mcg_qplist_remove() 1377 * Context: Can be called from interrupt or base context. 1378 */ 1379 static int 1380 hermon_mcg_qplist_remove(hermon_mcghdl_t mcg, 1381 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp) 1382 { 1383 uint_t i, qplist_indx; 1384 1385 /* 1386 * Search the MCG QP list for a matching QPN. When 1387 * it's found, we swap the last entry with the current 1388 * one, set the last entry to zero, decrement the last 1389 * entry, and return. If it's not found, then it's 1390 * and error. 1391 */ 1392 qplist_indx = mcg->mcg_num_qps; 1393 for (i = 0; i < qplist_indx; i++) { 1394 if (mcg_qplist[i].qpn == qp->qp_qpnum) { 1395 mcg_qplist[i] = mcg_qplist[qplist_indx - 1]; 1396 mcg_qplist[qplist_indx - 1].qpn = 0; 1397 1398 return (DDI_SUCCESS); 1399 } 1400 } 1401 1402 return (IBT_QP_HDL_INVALID); 1403 } 1404 1405 1406 /* 1407 * hermon_mcg_walk_mgid_hash() 1408 * Context: Can be called from interrupt or base context. 1409 */ 1410 static uint_t 1411 hermon_mcg_walk_mgid_hash(hermon_state_t *state, uint64_t start_indx, 1412 ib_gid_t mgid, uint_t *p_indx) 1413 { 1414 hermon_mcghdl_t curr_mcghdl; 1415 uint_t curr_indx, prev_indx; 1416 1417 ASSERT(MUTEX_HELD(&state->hs_mcglock)); 1418 1419 /* Start at the head of the hash chain */ 1420 curr_indx = (uint_t)start_indx; 1421 prev_indx = curr_indx; 1422 curr_mcghdl = &state->hs_mcghdl[curr_indx]; 1423 1424 /* If the first entry in the chain has MGID == 0, then stop */ 1425 if ((curr_mcghdl->mcg_mgid_h == 0) && 1426 (curr_mcghdl->mcg_mgid_l == 0)) { 1427 goto end_mgid_hash_walk; 1428 } 1429 1430 /* If the first entry in the chain matches the MGID, then stop */ 1431 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1432 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1433 goto end_mgid_hash_walk; 1434 } 1435 1436 /* Otherwise, walk the hash chain looking for a match */ 1437 while (curr_mcghdl->mcg_next_indx != 0) { 1438 prev_indx = curr_indx; 1439 curr_indx = curr_mcghdl->mcg_next_indx; 1440 curr_mcghdl = &state->hs_mcghdl[curr_indx]; 1441 1442 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1443 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1444 break; 1445 } 1446 } 1447 1448 end_mgid_hash_walk: 1449 /* 1450 * If necessary, return the index of the previous entry too. This 1451 * is primarily used for detaching a QP from a multicast group. It 1452 * may be necessary, in that case, to delete an MCG entry from the 1453 * hash chain and having the index of the previous entry is helpful. 1454 */ 1455 if (p_indx != NULL) { 1456 *p_indx = prev_indx; 1457 } 1458 return (curr_indx); 1459 } 1460 1461 1462 /* 1463 * hermon_mcg_setup_new_hdr() 1464 * Context: Can be called from interrupt or base context. 1465 */ 1466 static void 1467 hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, hermon_hw_mcg_t *mcg_hdr, 1468 ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc) 1469 { 1470 /* 1471 * Fill in the fields of the "shadow" entry used by software 1472 * to track MCG hardware entry 1473 */ 1474 mcg->mcg_mgid_h = mgid.gid_prefix; 1475 mcg->mcg_mgid_l = mgid.gid_guid; 1476 mcg->mcg_rsrcp = mcg_rsrc; 1477 mcg->mcg_next_indx = 0; 1478 mcg->mcg_num_qps = 0; 1479 1480 /* 1481 * Fill the header fields of the MCG entry (in the temporary copy) 1482 */ 1483 mcg_hdr->mgid_h = mgid.gid_prefix; 1484 mcg_hdr->mgid_l = mgid.gid_guid; 1485 mcg_hdr->next_gid_indx = 0; 1486 } 1487 1488 1489 /* 1490 * hermon_mcg_hash_list_remove() 1491 * Context: Can be called only from user or kernel context. 1492 */ 1493 static int 1494 hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx, 1495 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry) 1496 { 1497 hermon_mcghdl_t curr_mcg, prev_mcg, next_mcg; 1498 uint_t next_indx; 1499 int status; 1500 1501 /* Get the pointer to "shadow" list for current entry */ 1502 curr_mcg = &state->hs_mcghdl[curr_indx]; 1503 1504 /* 1505 * If this is the first entry on a hash chain, then attempt to replace 1506 * the entry with the next entry on the chain. If there are no 1507 * subsequent entries on the chain, then this is the only entry and 1508 * should be invalidated. 1509 */ 1510 if (curr_indx == prev_indx) { 1511 1512 /* 1513 * If this is the only entry on the chain, then invalidate it. 1514 * Note: Invalidating an MCG entry means writing all zeros 1515 * to the entry. This is only necessary for those MCG 1516 * entries that are the "head" entries of the individual hash 1517 * chains. Regardless of whether this operation returns 1518 * success or failure, return that result to the caller. 1519 */ 1520 next_indx = curr_mcg->mcg_next_indx; 1521 if (next_indx == 0) { 1522 status = hermon_mcg_entry_invalidate(state, mcg_entry, 1523 curr_indx); 1524 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1525 return (status); 1526 } 1527 1528 /* 1529 * Otherwise, this is just the first entry on the chain, so 1530 * grab the next one 1531 */ 1532 next_mcg = &state->hs_mcghdl[next_indx]; 1533 1534 /* 1535 * Read the next MCG entry into the temporary MCG. Note: 1536 * In general, this operation shouldn't fail. If it does, 1537 * then it is an indication that something (probably in HW, 1538 * but maybe in SW) has gone seriously wrong. 1539 */ 1540 status = hermon_read_mgm_cmd_post(state, mcg_entry, next_indx, 1541 HERMON_CMD_NOSLEEP_SPIN); 1542 if (status != HERMON_CMD_SUCCESS) { 1543 HERMON_WARNING(state, "failed to read MCG entry"); 1544 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: " 1545 "%08x\n", status); 1546 if (status == HERMON_CMD_INVALID_STATUS) { 1547 hermon_fm_ereport(state, HCA_SYS_ERR, 1548 HCA_ERR_SRV_LOST); 1549 } 1550 return (ibc_get_ci_failure(0)); 1551 } 1552 1553 /* 1554 * Copy/Write the temporary MCG back to the hardware MCG list 1555 * using the current index. This essentially removes the 1556 * current MCG entry from the list by writing over it with 1557 * the next one. If this is successful, then we can do the 1558 * same operation for the "shadow" list. And we can also 1559 * free up the Hermon MCG entry resource that was associated 1560 * with the (old) next entry. Note: In general, this 1561 * operation shouldn't fail. If it does, then it is an 1562 * indication that something (probably in HW, but maybe in SW) 1563 * has gone seriously wrong. 1564 */ 1565 status = hermon_write_mgm_cmd_post(state, mcg_entry, curr_indx, 1566 HERMON_CMD_NOSLEEP_SPIN); 1567 if (status != HERMON_CMD_SUCCESS) { 1568 HERMON_WARNING(state, "failed to write MCG entry"); 1569 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 1570 "%08x\n", status); 1571 if (status == HERMON_CMD_INVALID_STATUS) { 1572 hermon_fm_ereport(state, HCA_SYS_ERR, 1573 HCA_ERR_SRV_LOST); 1574 } 1575 return (ibc_get_ci_failure(0)); 1576 } 1577 1578 /* 1579 * Copy all the software tracking information from the next 1580 * entry on the "shadow" MCG list into the current entry on 1581 * the list. Then invalidate (zero out) the other "shadow" 1582 * list entry. 1583 */ 1584 bcopy(next_mcg, curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1585 bzero(next_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1586 1587 /* 1588 * Free up the Hermon MCG entry resource used by the "next" 1589 * MCG entry. That resource is no longer needed by any 1590 * MCG entry which is first on a hash chain (like the "next" 1591 * entry has just become). 1592 */ 1593 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1594 1595 return (DDI_SUCCESS); 1596 } 1597 1598 /* 1599 * Else if this is the last entry on the hash chain (or a middle 1600 * entry, then we update the previous entry's "next_gid_index" field 1601 * to make it point instead to the next entry on the chain. By 1602 * skipping over the removed entry in this way, we can then free up 1603 * any resources associated with the current entry. Note: We don't 1604 * need to invalidate the "skipped over" hardware entry because it 1605 * will no be longer connected to any hash chains, and if/when it is 1606 * finally re-used, it will be written with entirely new values. 1607 */ 1608 1609 /* 1610 * Read the next MCG entry into the temporary MCG. Note: In general, 1611 * this operation shouldn't fail. If it does, then it is an 1612 * indication that something (probably in HW, but maybe in SW) has 1613 * gone seriously wrong. 1614 */ 1615 status = hermon_read_mgm_cmd_post(state, mcg_entry, prev_indx, 1616 HERMON_CMD_NOSLEEP_SPIN); 1617 if (status != HERMON_CMD_SUCCESS) { 1618 HERMON_WARNING(state, "failed to read MCG entry"); 1619 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1620 status); 1621 if (status == HERMON_CMD_INVALID_STATUS) { 1622 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1623 } 1624 return (ibc_get_ci_failure(0)); 1625 } 1626 1627 /* 1628 * Finally, we update the "next_gid_indx" field in the temporary MCG 1629 * and attempt to write the entry back into the Hermon MCG table. If 1630 * this succeeds, then we update the "shadow" list to reflect the 1631 * change, free up the Hermon MCG entry resource that was associated 1632 * with the current entry, and return success. Note: In general, 1633 * this operation shouldn't fail. If it does, then it is an indication 1634 * that something (probably in HW, but maybe in SW) has gone seriously 1635 * wrong. 1636 */ 1637 mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx; 1638 status = hermon_write_mgm_cmd_post(state, mcg_entry, prev_indx, 1639 HERMON_CMD_NOSLEEP_SPIN); 1640 if (status != HERMON_CMD_SUCCESS) { 1641 HERMON_WARNING(state, "failed to write MCG entry"); 1642 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1643 status); 1644 if (status == HERMON_CMD_INVALID_STATUS) { 1645 hermon_fm_ereport(state, HCA_SYS_ERR, 1646 HCA_ERR_SRV_LOST); 1647 } 1648 return (ibc_get_ci_failure(0)); 1649 } 1650 1651 /* 1652 * Get the pointer to the "shadow" MCG list entry for the previous 1653 * MCG. Update its "mcg_next_indx" to point to the next entry 1654 * the one after the current entry. Note: This next index may be 1655 * zero, indicating the end of the list. 1656 */ 1657 prev_mcg = &state->hs_mcghdl[prev_indx]; 1658 prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx; 1659 1660 /* 1661 * Free up the Hermon MCG entry resource used by the current entry. 1662 * This resource is no longer needed because the chain now skips over 1663 * the current entry. Then invalidate (zero out) the current "shadow" 1664 * list entry. 1665 */ 1666 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1667 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1668 1669 return (DDI_SUCCESS); 1670 } 1671 1672 1673 /* 1674 * hermon_mcg_entry_invalidate() 1675 * Context: Can be called only from user or kernel context. 1676 */ 1677 static int 1678 hermon_mcg_entry_invalidate(hermon_state_t *state, hermon_hw_mcg_t *mcg_entry, 1679 uint_t indx) 1680 { 1681 int status; 1682 1683 /* 1684 * Invalidate the hardware MCG entry by zeroing out this temporary 1685 * MCG and writing it the the hardware. Note: In general, this 1686 * operation shouldn't fail. If it does, then it is an indication 1687 * that something (probably in HW, but maybe in SW) has gone seriously 1688 * wrong. 1689 */ 1690 bzero(mcg_entry, HERMON_MCGMEM_SZ(state)); 1691 status = hermon_write_mgm_cmd_post(state, mcg_entry, indx, 1692 HERMON_CMD_NOSLEEP_SPIN); 1693 if (status != HERMON_CMD_SUCCESS) { 1694 HERMON_WARNING(state, "failed to write MCG entry"); 1695 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1696 status); 1697 if (status == HERMON_CMD_INVALID_STATUS) { 1698 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1699 } 1700 return (ibc_get_ci_failure(0)); 1701 } 1702 1703 return (DDI_SUCCESS); 1704 } 1705 1706 1707 /* 1708 * hermon_mgid_is_valid() 1709 * Context: Can be called from interrupt or base context. 1710 */ 1711 static int 1712 hermon_mgid_is_valid(ib_gid_t gid) 1713 { 1714 uint_t topbits, flags, scope; 1715 1716 /* 1717 * According to IBA 1.1 specification (section 4.1.1) a valid 1718 * "multicast GID" must have its top eight bits set to all ones 1719 */ 1720 topbits = (gid.gid_prefix >> HERMON_MCG_TOPBITS_SHIFT) & 1721 HERMON_MCG_TOPBITS_MASK; 1722 if (topbits != HERMON_MCG_TOPBITS) { 1723 return (0); 1724 } 1725 1726 /* 1727 * The next 4 bits are the "flag" bits. These are valid only 1728 * if they are "0" (which correspond to permanently assigned/ 1729 * "well-known" multicast GIDs) or "1" (for so-called "transient" 1730 * multicast GIDs). All other values are reserved. 1731 */ 1732 flags = (gid.gid_prefix >> HERMON_MCG_FLAGS_SHIFT) & 1733 HERMON_MCG_FLAGS_MASK; 1734 if (!((flags == HERMON_MCG_FLAGS_PERM) || 1735 (flags == HERMON_MCG_FLAGS_NONPERM))) { 1736 return (0); 1737 } 1738 1739 /* 1740 * The next 4 bits are the "scope" bits. These are valid only 1741 * if they are "2" (Link-local), "5" (Site-local), "8" 1742 * (Organization-local) or "E" (Global). All other values 1743 * are reserved (or currently unassigned). 1744 */ 1745 scope = (gid.gid_prefix >> HERMON_MCG_SCOPE_SHIFT) & 1746 HERMON_MCG_SCOPE_MASK; 1747 if (!((scope == HERMON_MCG_SCOPE_LINKLOC) || 1748 (scope == HERMON_MCG_SCOPE_SITELOC) || 1749 (scope == HERMON_MCG_SCOPE_ORGLOC) || 1750 (scope == HERMON_MCG_SCOPE_GLOBAL))) { 1751 return (0); 1752 } 1753 1754 /* 1755 * If it passes all of the above checks, then we will consider it 1756 * a valid multicast GID. 1757 */ 1758 return (1); 1759 } 1760 1761 1762 /* 1763 * hermon_mlid_is_valid() 1764 * Context: Can be called from interrupt or base context. 1765 */ 1766 static int 1767 hermon_mlid_is_valid(ib_lid_t lid) 1768 { 1769 /* 1770 * According to IBA 1.1 specification (section 4.1.1) a valid 1771 * "multicast DLID" must be between 0xC000 and 0xFFFE. 1772 */ 1773 if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) { 1774 return (0); 1775 } 1776 1777 return (1); 1778 } 1779 1780 1781 /* 1782 * hermon_pd_alloc() 1783 * Context: Can be called only from user or kernel context. 1784 */ 1785 int 1786 hermon_pd_alloc(hermon_state_t *state, hermon_pdhdl_t *pdhdl, uint_t sleepflag) 1787 { 1788 hermon_rsrc_t *rsrc; 1789 hermon_pdhdl_t pd; 1790 int status; 1791 1792 /* 1793 * Allocate the software structure for tracking the protection domain 1794 * (i.e. the Hermon Protection Domain handle). By default each PD 1795 * structure will have a unique PD number assigned to it. All that 1796 * is necessary is for software to initialize the PD reference count 1797 * (to zero) and return success. 1798 */ 1799 status = hermon_rsrc_alloc(state, HERMON_PDHDL, 1, sleepflag, &rsrc); 1800 if (status != DDI_SUCCESS) { 1801 return (IBT_INSUFF_RESOURCE); 1802 } 1803 pd = (hermon_pdhdl_t)rsrc->hr_addr; 1804 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1805 1806 pd->pd_refcnt = 0; 1807 *pdhdl = pd; 1808 1809 return (DDI_SUCCESS); 1810 } 1811 1812 1813 /* 1814 * hermon_pd_free() 1815 * Context: Can be called only from user or kernel context. 1816 */ 1817 int 1818 hermon_pd_free(hermon_state_t *state, hermon_pdhdl_t *pdhdl) 1819 { 1820 hermon_rsrc_t *rsrc; 1821 hermon_pdhdl_t pd; 1822 1823 /* 1824 * Pull all the necessary information from the Hermon Protection Domain 1825 * handle. This is necessary here because the resource for the 1826 * PD is going to be freed up as part of this operation. 1827 */ 1828 pd = *pdhdl; 1829 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1830 rsrc = pd->pd_rsrcp; 1831 1832 /* 1833 * Check the PD reference count. If the reference count is non-zero, 1834 * then it means that this protection domain is still referenced by 1835 * some memory region, queue pair, address handle, or other IB object 1836 * If it is non-zero, then return an error. Otherwise, free the 1837 * Hermon resource and return success. 1838 */ 1839 if (pd->pd_refcnt != 0) { 1840 return (IBT_PD_IN_USE); 1841 } 1842 1843 /* Free the Hermon Protection Domain handle */ 1844 hermon_rsrc_free(state, &rsrc); 1845 1846 /* Set the pdhdl pointer to NULL and return success */ 1847 *pdhdl = (hermon_pdhdl_t)NULL; 1848 1849 return (DDI_SUCCESS); 1850 } 1851 1852 1853 /* 1854 * hermon_pd_refcnt_inc() 1855 * Context: Can be called from interrupt or base context. 1856 */ 1857 void 1858 hermon_pd_refcnt_inc(hermon_pdhdl_t pd) 1859 { 1860 /* Increment the protection domain's reference count */ 1861 atomic_inc_32(&pd->pd_refcnt); 1862 } 1863 1864 1865 /* 1866 * hermon_pd_refcnt_dec() 1867 * Context: Can be called from interrupt or base context. 1868 */ 1869 void 1870 hermon_pd_refcnt_dec(hermon_pdhdl_t pd) 1871 { 1872 /* Decrement the protection domain's reference count */ 1873 atomic_dec_32(&pd->pd_refcnt); 1874 } 1875 1876 1877 /* 1878 * hermon_port_query() 1879 * Context: Can be called only from user or kernel context. 1880 */ 1881 int 1882 hermon_port_query(hermon_state_t *state, uint_t port, ibt_hca_portinfo_t *pi) 1883 { 1884 sm_portinfo_t portinfo; 1885 sm_guidinfo_t guidinfo; 1886 sm_pkey_table_t pkeytable; 1887 ib_gid_t *sgid; 1888 uint_t sgid_max, pkey_max, tbl_size; 1889 int i, j, indx, status; 1890 ib_pkey_t *pkeyp; 1891 ib_guid_t *guidp; 1892 1893 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi)) 1894 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state)) 1895 1896 /* Validate that specified port number is legal */ 1897 if (!hermon_portnum_is_valid(state, port)) { 1898 return (IBT_HCA_PORT_INVALID); 1899 } 1900 pkeyp = state->hs_pkey[port - 1]; 1901 guidp = state->hs_guid[port - 1]; 1902 1903 /* 1904 * We use the Hermon MAD_IFC command to post a GetPortInfo MAD 1905 * to the firmware (for the specified port number). This returns 1906 * a full PortInfo MAD (in "portinfo") which we subsequently 1907 * parse to fill in the "ibt_hca_portinfo_t" structure returned 1908 * to the IBTF. 1909 */ 1910 status = hermon_getportinfo_cmd_post(state, port, 1911 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 1912 if (status != HERMON_CMD_SUCCESS) { 1913 cmn_err(CE_CONT, "Hermon: GetPortInfo (port %02d) command " 1914 "failed: %08x\n", port, status); 1915 if (status == HERMON_CMD_INVALID_STATUS) { 1916 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1917 } 1918 return (ibc_get_ci_failure(0)); 1919 } 1920 1921 /* 1922 * Parse the PortInfo MAD and fill in the IBTF structure 1923 */ 1924 pi->p_base_lid = portinfo.LID; 1925 pi->p_qkey_violations = portinfo.Q_KeyViolations; 1926 pi->p_pkey_violations = portinfo.P_KeyViolations; 1927 pi->p_sm_sl = portinfo.MasterSMSL; 1928 pi->p_sm_lid = portinfo.MasterSMLID; 1929 pi->p_linkstate = portinfo.PortState; 1930 pi->p_port_num = portinfo.LocalPortNum; 1931 pi->p_mtu = portinfo.MTUCap; 1932 pi->p_lmc = portinfo.LMC; 1933 pi->p_max_vl = portinfo.VLCap; 1934 pi->p_subnet_timeout = portinfo.SubnetTimeOut; 1935 pi->p_msg_sz = ((uint32_t)1 << HERMON_QP_LOG_MAX_MSGSZ); 1936 tbl_size = state->hs_cfg_profile->cp_log_max_gidtbl; 1937 pi->p_sgid_tbl_sz = (1 << tbl_size); 1938 tbl_size = state->hs_cfg_profile->cp_log_max_pkeytbl; 1939 pi->p_pkey_tbl_sz = (1 << tbl_size); 1940 state->hs_sn_prefix[port - 1] = portinfo.GidPrefix; 1941 1942 /* 1943 * Convert InfiniBand-defined port capability flags to the format 1944 * specified by the IBTF 1945 */ 1946 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM) 1947 pi->p_capabilities |= IBT_PORT_CAP_SM; 1948 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED) 1949 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED; 1950 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD) 1951 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL; 1952 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD) 1953 pi->p_capabilities |= IBT_PORT_CAP_DM; 1954 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD) 1955 pi->p_capabilities |= IBT_PORT_CAP_VENDOR; 1956 1957 /* 1958 * Fill in the SGID table. Since the only access to the Hermon 1959 * GID tables is through the firmware's MAD_IFC interface, we 1960 * post as many GetGUIDInfo MADs as necessary to read in the entire 1961 * contents of the SGID table (for the specified port). Note: The 1962 * GetGUIDInfo command only gets eight GUIDs per operation. These 1963 * GUIDs are then appended to the GID prefix for the port (from the 1964 * GetPortInfo above) to form the entire SGID table. 1965 */ 1966 for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) { 1967 status = hermon_getguidinfo_cmd_post(state, port, i >> 3, 1968 HERMON_SLEEPFLAG_FOR_CONTEXT(), &guidinfo); 1969 if (status != HERMON_CMD_SUCCESS) { 1970 cmn_err(CE_CONT, "Hermon: GetGUIDInfo (port %02d) " 1971 "command failed: %08x\n", port, status); 1972 if (status == HERMON_CMD_INVALID_STATUS) { 1973 hermon_fm_ereport(state, HCA_SYS_ERR, 1974 HCA_ERR_SRV_LOST); 1975 } 1976 return (ibc_get_ci_failure(0)); 1977 } 1978 1979 /* Figure out how many of the entries are valid */ 1980 sgid_max = min((pi->p_sgid_tbl_sz - i), 8); 1981 for (j = 0; j < sgid_max; j++) { 1982 indx = (i + j); 1983 sgid = &pi->p_sgid_tbl[indx]; 1984 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid)) 1985 sgid->gid_prefix = portinfo.GidPrefix; 1986 guidp[indx] = sgid->gid_guid = 1987 guidinfo.GUIDBlocks[j]; 1988 } 1989 } 1990 1991 /* 1992 * Fill in the PKey table. Just as for the GID tables above, the 1993 * only access to the Hermon PKey tables is through the firmware's 1994 * MAD_IFC interface. We post as many GetPKeyTable MADs as necessary 1995 * to read in the entire contents of the PKey table (for the specified 1996 * port). Note: The GetPKeyTable command only gets 32 PKeys per 1997 * operation. 1998 */ 1999 for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) { 2000 status = hermon_getpkeytable_cmd_post(state, port, i, 2001 HERMON_SLEEPFLAG_FOR_CONTEXT(), &pkeytable); 2002 if (status != HERMON_CMD_SUCCESS) { 2003 cmn_err(CE_CONT, "Hermon: GetPKeyTable (port %02d) " 2004 "command failed: %08x\n", port, status); 2005 if (status == HERMON_CMD_INVALID_STATUS) { 2006 hermon_fm_ereport(state, HCA_SYS_ERR, 2007 HCA_ERR_SRV_LOST); 2008 } 2009 return (ibc_get_ci_failure(0)); 2010 } 2011 2012 /* Figure out how many of the entries are valid */ 2013 pkey_max = min((pi->p_pkey_tbl_sz - i), 32); 2014 for (j = 0; j < pkey_max; j++) { 2015 indx = (i + j); 2016 pkeyp[indx] = pi->p_pkey_tbl[indx] = 2017 pkeytable.P_KeyTableBlocks[j]; 2018 } 2019 } 2020 2021 return (DDI_SUCCESS); 2022 } 2023 2024 2025 /* 2026 * hermon_port_modify() 2027 * Context: Can be called only from user or kernel context. 2028 */ 2029 /* ARGSUSED */ 2030 int 2031 hermon_port_modify(hermon_state_t *state, uint8_t port, 2032 ibt_port_modify_flags_t flags, uint8_t init_type) 2033 { 2034 sm_portinfo_t portinfo; 2035 uint32_t capmask; 2036 int status; 2037 hermon_hw_set_port_t set_port; 2038 2039 /* 2040 * Return an error if either of the unsupported flags are set 2041 */ 2042 if ((flags & IBT_PORT_SHUTDOWN) || 2043 (flags & IBT_PORT_SET_INIT_TYPE)) { 2044 return (IBT_NOT_SUPPORTED); 2045 } 2046 2047 bzero(&set_port, sizeof (set_port)); 2048 2049 /* 2050 * Determine whether we are trying to reset the QKey counter 2051 */ 2052 if (flags & IBT_PORT_RESET_QKEY) 2053 set_port.rqk = 1; 2054 2055 /* Validate that specified port number is legal */ 2056 if (!hermon_portnum_is_valid(state, port)) { 2057 return (IBT_HCA_PORT_INVALID); 2058 } 2059 2060 /* 2061 * Use the Hermon MAD_IFC command to post a GetPortInfo MAD to the 2062 * firmware (for the specified port number). This returns a full 2063 * PortInfo MAD (in "portinfo") from which we pull the current 2064 * capability mask. We then modify the capability mask as directed 2065 * by the "pmod_flags" field, and write the updated capability mask 2066 * using the Hermon SET_IB command (below). 2067 */ 2068 status = hermon_getportinfo_cmd_post(state, port, 2069 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 2070 if (status != HERMON_CMD_SUCCESS) { 2071 if (status == HERMON_CMD_INVALID_STATUS) { 2072 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2073 } 2074 return (ibc_get_ci_failure(0)); 2075 } 2076 2077 /* 2078 * Convert InfiniBand-defined port capability flags to the format 2079 * specified by the IBTF. Specifically, we modify the capability 2080 * mask based on the specified values. 2081 */ 2082 capmask = portinfo.CapabilityMask; 2083 2084 if (flags & IBT_PORT_RESET_SM) 2085 capmask &= ~SM_CAP_MASK_IS_SM; 2086 else if (flags & IBT_PORT_SET_SM) 2087 capmask |= SM_CAP_MASK_IS_SM; 2088 2089 if (flags & IBT_PORT_RESET_SNMP) 2090 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD; 2091 else if (flags & IBT_PORT_SET_SNMP) 2092 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD; 2093 2094 if (flags & IBT_PORT_RESET_DEVMGT) 2095 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD; 2096 else if (flags & IBT_PORT_SET_DEVMGT) 2097 capmask |= SM_CAP_MASK_IS_DM_SUPPD; 2098 2099 if (flags & IBT_PORT_RESET_VENDOR) 2100 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD; 2101 else if (flags & IBT_PORT_SET_VENDOR) 2102 capmask |= SM_CAP_MASK_IS_VM_SUPPD; 2103 2104 set_port.cap_mask = capmask; 2105 2106 /* 2107 * Use the Hermon SET_PORT command to update the capability mask and 2108 * (possibly) reset the QKey violation counter for the specified port. 2109 * Note: In general, this operation shouldn't fail. If it does, then 2110 * it is an indication that something (probably in HW, but maybe in 2111 * SW) has gone seriously wrong. 2112 */ 2113 status = hermon_set_port_cmd_post(state, &set_port, port, 2114 HERMON_SLEEPFLAG_FOR_CONTEXT()); 2115 if (status != HERMON_CMD_SUCCESS) { 2116 HERMON_WARNING(state, "failed to modify port capabilities"); 2117 cmn_err(CE_CONT, "Hermon: SET_IB (port %02d) command failed: " 2118 "%08x\n", port, status); 2119 if (status == HERMON_CMD_INVALID_STATUS) { 2120 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2121 } 2122 return (ibc_get_ci_failure(0)); 2123 } 2124 2125 return (DDI_SUCCESS); 2126 } 2127 2128 2129 /* 2130 * hermon_set_addr_path() 2131 * Context: Can be called from interrupt or base context. 2132 * 2133 * Note: This routine is used for two purposes. It is used to fill in the 2134 * Hermon UDAV fields, and it is used to fill in the address path information 2135 * for QPs. Because the two Hermon structures are similar, common fields can 2136 * be filled in here. Because they are different, however, we pass 2137 * an additional flag to indicate which type is being filled and do each one 2138 * uniquely 2139 */ 2140 2141 int hermon_srate_override = -1; /* allows ease of testing */ 2142 2143 int 2144 hermon_set_addr_path(hermon_state_t *state, ibt_adds_vect_t *av, 2145 hermon_hw_addr_path_t *path, uint_t type) 2146 { 2147 uint_t gidtbl_sz; 2148 hermon_hw_udav_t *udav; 2149 2150 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2151 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2152 2153 udav = (hermon_hw_udav_t *)(void *)path; 2154 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav)) 2155 path->mlid = av->av_src_path; 2156 path->rlid = av->av_dlid; 2157 2158 if (av->av_srate == IBT_SRATE_10) { 2159 path->max_stat_rate = 8; /* 4xSDR-10.0Gb/s injection rate */ 2160 } else if (av->av_srate == IBT_SRATE_20) { 2161 path->max_stat_rate = 11; /* 4xDDR-20Gb/s injection rate */ 2162 } else if (av->av_srate == IBT_SRATE_2) { 2163 path->max_stat_rate = 7; /* 1xSDR-2.5Gb/s injection rate */ 2164 } else if (av->av_srate == IBT_SRATE_5) { 2165 path->max_stat_rate = 10; /* 1xDDR-5Gb/s injection rate */ 2166 } else if (av->av_srate == IBT_SRATE_NOT_SPECIFIED) { 2167 path->max_stat_rate = 0; /* Max */ 2168 } else { 2169 return (IBT_STATIC_RATE_INVALID); 2170 } 2171 if (hermon_srate_override != -1) /* for evaluating HCA firmware */ 2172 path->max_stat_rate = hermon_srate_override; 2173 2174 /* If "grh" flag is set, then check for valid SGID index too */ 2175 gidtbl_sz = (1 << state->hs_queryport.log_max_gid); 2176 if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) { 2177 return (IBT_SGID_INVALID); 2178 } 2179 2180 /* 2181 * Fill in all "global" values regardless of the value in the GRH 2182 * flag. Because "grh" is not set unless "av_send_grh" is set, the 2183 * hardware will ignore the other "global" values as necessary. Note: 2184 * SW does this here to enable later query operations to return 2185 * exactly the same params that were passed when the addr path was 2186 * last written. 2187 */ 2188 path->grh = av->av_send_grh; 2189 if (type == HERMON_ADDRPATH_QP) { 2190 path->mgid_index = av->av_sgid_ix; 2191 } else { 2192 /* 2193 * For Hermon UDAV, the "mgid_index" field is the index into 2194 * a combined table (not a per-port table), but having sections 2195 * for each port. So some extra calculations are necessary. 2196 */ 2197 2198 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) + 2199 av->av_sgid_ix; 2200 2201 udav->portnum = av->av_port_num; 2202 } 2203 2204 /* 2205 * According to Hermon PRM, the (31:0) part of rgid_l must be set to 2206 * "0x2" if the 'grh' or 'g' bit is cleared. It also says that we 2207 * only need to do it for UDAV's. So we enforce that here. 2208 * 2209 * NOTE: The entire 64 bits worth of GUID info is actually being 2210 * preserved (for UDAVs) by the callers of this function 2211 * (hermon_ah_alloc() and hermon_ah_modify()) and as long as the 2212 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are 2213 * "don't care". 2214 */ 2215 if ((path->grh) || (type == HERMON_ADDRPATH_QP)) { 2216 path->flow_label = av->av_flow; 2217 path->tclass = av->av_tclass; 2218 path->hop_limit = av->av_hop; 2219 bcopy(&(av->av_dgid.gid_prefix), &(path->rgid_h), 2220 sizeof (uint64_t)); 2221 bcopy(&(av->av_dgid.gid_guid), &(path->rgid_l), 2222 sizeof (uint64_t)); 2223 } else { 2224 path->rgid_l = 0x2; 2225 path->flow_label = 0; 2226 path->tclass = 0; 2227 path->hop_limit = 0; 2228 path->rgid_h = 0; 2229 } 2230 /* extract the default service level */ 2231 udav->sl = (HERMON_DEF_SCHED_SELECTION & 0x3C) >> 2; 2232 2233 return (DDI_SUCCESS); 2234 } 2235 2236 2237 /* 2238 * hermon_get_addr_path() 2239 * Context: Can be called from interrupt or base context. 2240 * 2241 * Note: Just like hermon_set_addr_path() above, this routine is used for two 2242 * purposes. It is used to read in the Hermon UDAV fields, and it is used to 2243 * read in the address path information for QPs. Because the two Hermon 2244 * structures are similar, common fields can be read in here. But because 2245 * they are slightly different, we pass an additional flag to indicate which 2246 * type is being read. 2247 */ 2248 void 2249 hermon_get_addr_path(hermon_state_t *state, hermon_hw_addr_path_t *path, 2250 ibt_adds_vect_t *av, uint_t type) 2251 { 2252 uint_t gidtbl_sz; 2253 2254 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2255 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2256 2257 av->av_src_path = path->mlid; 2258 av->av_dlid = path->rlid; 2259 2260 /* Set "av_ipd" value from max_stat_rate */ 2261 /* need to revisit for add'l rates - IBTF upgrade */ 2262 if (path->max_stat_rate == 8) { 2263 av->av_srate = IBT_SRATE_10; /* 4xSDR-10.0Gb/s injection rate */ 2264 } else if (path->max_stat_rate == 11) { 2265 av->av_srate = IBT_SRATE_20; /* 4xDDR-20Gb/s injection rate */ 2266 } else if (path->max_stat_rate == 7) { 2267 av->av_srate = IBT_SRATE_2; /* 1xSDR-2.5Gb/s injection rate */ 2268 } else if (path->max_stat_rate == 10) { 2269 av->av_srate = IBT_SRATE_5; /* 1xDDR-5Gb/s injection rate */ 2270 } else if (path->max_stat_rate == 0) { 2271 av->av_srate = IBT_SRATE_NOT_SPECIFIED; /* Max */ 2272 } else { 2273 av->av_srate = IBT_SRATE_1X; /* 1x injection rate */ 2274 } 2275 2276 2277 /* 2278 * Extract all "global" values regardless of the value in the GRH 2279 * flag. Because "av_send_grh" is set only if "grh" is set, software 2280 * knows to ignore the other "global" values as necessary. Note: SW 2281 * does it this way to enable these query operations to return exactly 2282 * the same params that were passed when the addr path was last written. 2283 */ 2284 av->av_send_grh = path->grh; 2285 if (type == HERMON_ADDRPATH_QP) { 2286 av->av_sgid_ix = path->mgid_index; 2287 } else { 2288 /* 2289 * For Hermon UDAV, the "mgid_index" field is the index into 2290 * a combined table (not a per-port table). 2291 */ 2292 gidtbl_sz = (1 << state->hs_queryport.log_max_gid); 2293 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) * 2294 gidtbl_sz); 2295 2296 av->av_port_num = ((hermon_hw_udav_t *)(void *)path)->portnum; 2297 } 2298 av->av_flow = path->flow_label; 2299 av->av_tclass = path->tclass; 2300 av->av_hop = path->hop_limit; 2301 /* this is for alignment issue w/ the addr path struct in Hermon */ 2302 bcopy(&(path->rgid_h), &(av->av_dgid.gid_prefix), sizeof (uint64_t)); 2303 bcopy(&(path->rgid_l), &(av->av_dgid.gid_guid), sizeof (uint64_t)); 2304 } 2305 2306 2307 /* 2308 * hermon_portnum_is_valid() 2309 * Context: Can be called from interrupt or base context. 2310 */ 2311 int 2312 hermon_portnum_is_valid(hermon_state_t *state, uint_t portnum) 2313 { 2314 uint_t max_port; 2315 2316 max_port = state->hs_cfg_profile->cp_num_ports; 2317 if ((portnum <= max_port) && (portnum != 0)) { 2318 return (1); 2319 } else { 2320 return (0); 2321 } 2322 } 2323 2324 2325 /* 2326 * hermon_pkeyindex_is_valid() 2327 * Context: Can be called from interrupt or base context. 2328 */ 2329 int 2330 hermon_pkeyindex_is_valid(hermon_state_t *state, uint_t pkeyindx) 2331 { 2332 uint_t max_pkeyindx; 2333 2334 max_pkeyindx = 1 << state->hs_cfg_profile->cp_log_max_pkeytbl; 2335 if (pkeyindx < max_pkeyindx) { 2336 return (1); 2337 } else { 2338 return (0); 2339 } 2340 } 2341 2342 2343 /* 2344 * hermon_queue_alloc() 2345 * Context: Can be called from interrupt or base context. 2346 */ 2347 int 2348 hermon_queue_alloc(hermon_state_t *state, hermon_qalloc_info_t *qa_info, 2349 uint_t sleepflag) 2350 { 2351 ddi_dma_attr_t dma_attr; 2352 int (*callback)(caddr_t); 2353 uint64_t realsize, alloc_mask; 2354 uint_t type; 2355 int flag, status; 2356 2357 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2358 2359 /* Set the callback flag appropriately */ 2360 callback = (sleepflag == HERMON_SLEEP) ? DDI_DMA_SLEEP : 2361 DDI_DMA_DONTWAIT; 2362 2363 /* 2364 * Initialize many of the default DMA attributes. Then set additional 2365 * alignment restrictions as necessary for the queue memory. Also 2366 * respect the configured value for IOMMU bypass 2367 */ 2368 hermon_dma_attr_init(state, &dma_attr); 2369 dma_attr.dma_attr_align = qa_info->qa_bind_align; 2370 type = state->hs_cfg_profile->cp_iommu_bypass; 2371 if (type == HERMON_BINDMEM_BYPASS) { 2372 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2373 } 2374 2375 /* Allocate a DMA handle */ 2376 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, callback, NULL, 2377 &qa_info->qa_dmahdl); 2378 if (status != DDI_SUCCESS) { 2379 return (DDI_FAILURE); 2380 } 2381 2382 /* 2383 * Determine the amount of memory to allocate, depending on the values 2384 * in "qa_bind_align" and "qa_alloc_align". The problem we are trying 2385 * to solve here is that allocating a DMA handle with IOMMU bypass 2386 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments 2387 * that are less restrictive than the page size. Since we may need 2388 * stricter alignments on the memory allocated by ddi_dma_mem_alloc() 2389 * (e.g. in Hermon QP work queue memory allocation), we use the 2390 * following method to calculate how much additional memory to request, 2391 * and we enforce our own alignment on the allocated result. 2392 */ 2393 alloc_mask = qa_info->qa_alloc_align - 1; 2394 if (qa_info->qa_bind_align == qa_info->qa_alloc_align) { 2395 realsize = qa_info->qa_size; 2396 } else { 2397 realsize = qa_info->qa_size + alloc_mask; 2398 } 2399 2400 /* 2401 * If we are to allocate the queue from system memory, then use 2402 * ddi_dma_mem_alloc() to find the space. Otherwise, this is a 2403 * host memory allocation, use ddi_umem_alloc(). In either case, 2404 * return a pointer to the memory range allocated (including any 2405 * necessary alignment adjustments), the "real" memory pointer, 2406 * the "real" size, and a ddi_acc_handle_t to use when reading 2407 * from/writing to the memory. 2408 */ 2409 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) { 2410 /* Allocate system memory for the queue */ 2411 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize, 2412 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, callback, NULL, 2413 (caddr_t *)&qa_info->qa_buf_real, 2414 (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl); 2415 if (status != DDI_SUCCESS) { 2416 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2417 return (DDI_FAILURE); 2418 } 2419 2420 /* 2421 * Save temporary copy of the real pointer. (This may be 2422 * modified in the last step below). 2423 */ 2424 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2425 2426 bzero(qa_info->qa_buf_real, qa_info->qa_buf_realsz); 2427 2428 } else { /* HERMON_QUEUE_LOCATION_USERLAND */ 2429 2430 /* Allocate userland mappable memory for the queue */ 2431 flag = (sleepflag == HERMON_SLEEP) ? DDI_UMEM_SLEEP : 2432 DDI_UMEM_NOSLEEP; 2433 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag, 2434 &qa_info->qa_umemcookie); 2435 if (qa_info->qa_buf_real == NULL) { 2436 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2437 return (DDI_FAILURE); 2438 } 2439 2440 /* 2441 * Save temporary copy of the real pointer. (This may be 2442 * modified in the last step below). 2443 */ 2444 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2445 2446 } 2447 2448 /* 2449 * The next to last step is to ensure that the final address 2450 * ("qa_buf_aligned") has the appropriate "alloc" alignment 2451 * restriction applied to it (if necessary). 2452 */ 2453 if (qa_info->qa_bind_align != qa_info->qa_alloc_align) { 2454 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t) 2455 qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask); 2456 } 2457 /* 2458 * The last step is to figure out the offset of the start relative 2459 * to the first page of the region - will be used in the eqc/cqc 2460 * passed to the HW 2461 */ 2462 qa_info->qa_pgoffs = (uint_t)((uintptr_t) 2463 qa_info->qa_buf_aligned & HERMON_PAGEMASK); 2464 2465 return (DDI_SUCCESS); 2466 } 2467 2468 2469 /* 2470 * hermon_queue_free() 2471 * Context: Can be called from interrupt or base context. 2472 */ 2473 void 2474 hermon_queue_free(hermon_qalloc_info_t *qa_info) 2475 { 2476 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2477 2478 /* 2479 * Depending on how (i.e. from where) we allocated the memory for 2480 * this queue, we choose the appropriate method for releasing the 2481 * resources. 2482 */ 2483 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) { 2484 2485 ddi_dma_mem_free(&qa_info->qa_acchdl); 2486 2487 } else if (qa_info->qa_location == HERMON_QUEUE_LOCATION_USERLAND) { 2488 2489 ddi_umem_free(qa_info->qa_umemcookie); 2490 2491 } 2492 2493 /* Always free the dma handle */ 2494 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2495 } 2496 2497 /* 2498 * hermon_destroy_fmr_pool() 2499 * Create a pool of FMRs. 2500 * Context: Can be called from kernel context only. 2501 */ 2502 int 2503 hermon_create_fmr_pool(hermon_state_t *state, hermon_pdhdl_t pd, 2504 ibt_fmr_pool_attr_t *fmr_attr, hermon_fmrhdl_t *fmrpoolp) 2505 { 2506 hermon_fmrhdl_t fmrpool; 2507 hermon_fmr_list_t *fmr, *fmr_next; 2508 hermon_mrhdl_t mr; 2509 char taskqname[48]; 2510 int status; 2511 int sleep; 2512 int i; 2513 2514 sleep = (fmr_attr->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP : 2515 HERMON_NOSLEEP; 2516 if ((sleep == HERMON_SLEEP) && 2517 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 2518 return (IBT_INVALID_PARAM); 2519 } 2520 2521 fmrpool = (hermon_fmrhdl_t)kmem_zalloc(sizeof (*fmrpool), sleep); 2522 if (fmrpool == NULL) { 2523 status = IBT_INSUFF_RESOURCE; 2524 goto fail; 2525 } 2526 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmrpool)) 2527 2528 mutex_init(&fmrpool->fmr_lock, NULL, MUTEX_DRIVER, 2529 DDI_INTR_PRI(state->hs_intrmsi_pri)); 2530 2531 fmrpool->fmr_state = state; 2532 fmrpool->fmr_flush_function = fmr_attr->fmr_func_hdlr; 2533 fmrpool->fmr_flush_arg = fmr_attr->fmr_func_arg; 2534 fmrpool->fmr_pool_size = 0; 2535 fmrpool->fmr_cache = 0; 2536 fmrpool->fmr_max_pages = fmr_attr->fmr_max_pages_per_fmr; 2537 fmrpool->fmr_page_sz = fmr_attr->fmr_page_sz; 2538 fmrpool->fmr_dirty_watermark = fmr_attr->fmr_dirty_watermark; 2539 fmrpool->fmr_dirty_len = 0; 2540 fmrpool->fmr_flags = fmr_attr->fmr_flags; 2541 2542 /* Create taskq to handle cleanup and flush processing */ 2543 (void) snprintf(taskqname, 50, "fmrpool/%d/%d @ 0x%" PRIx64, 2544 fmr_attr->fmr_pool_size, hermon_debug_fmrpool_cnt, 2545 (uint64_t)(uintptr_t)fmrpool); 2546 fmrpool->fmr_taskq = ddi_taskq_create(state->hs_dip, taskqname, 2547 HERMON_TASKQ_NTHREADS, TASKQ_DEFAULTPRI, 0); 2548 if (fmrpool->fmr_taskq == NULL) { 2549 status = IBT_INSUFF_RESOURCE; 2550 goto fail1; 2551 } 2552 2553 fmrpool->fmr_free_list = NULL; 2554 fmrpool->fmr_dirty_list = NULL; 2555 2556 if (fmr_attr->fmr_cache) { 2557 hermon_fmr_cache_init(fmrpool); 2558 } 2559 2560 for (i = 0; i < fmr_attr->fmr_pool_size; i++) { 2561 status = hermon_mr_alloc_fmr(state, pd, fmrpool, &mr); 2562 if (status != DDI_SUCCESS) { 2563 goto fail2; 2564 } 2565 2566 fmr = (hermon_fmr_list_t *)kmem_zalloc( 2567 sizeof (hermon_fmr_list_t), sleep); 2568 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2569 2570 fmr->fmr = mr; 2571 fmr->fmr_refcnt = 0; 2572 fmr->fmr_remaps = 0; 2573 fmr->fmr_pool = fmrpool; 2574 fmr->fmr_in_cache = 0; 2575 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 2576 mr->mr_fmr = fmr; 2577 2578 fmr->fmr_next = fmrpool->fmr_free_list; 2579 fmrpool->fmr_free_list = fmr; 2580 fmrpool->fmr_pool_size++; 2581 } 2582 2583 /* Set to return pool */ 2584 *fmrpoolp = fmrpool; 2585 2586 return (IBT_SUCCESS); 2587 fail2: 2588 hermon_fmr_cache_fini(fmrpool); 2589 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { 2590 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2591 fmr_next = fmr->fmr_next; 2592 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr); 2593 kmem_free(fmr, sizeof (hermon_fmr_list_t)); 2594 } 2595 ddi_taskq_destroy(fmrpool->fmr_taskq); 2596 fail1: 2597 kmem_free(fmrpool, sizeof (*fmrpool)); 2598 fail: 2599 if (status == DDI_FAILURE) { 2600 return (ibc_get_ci_failure(0)); 2601 } else { 2602 return (status); 2603 } 2604 } 2605 2606 /* 2607 * hermon_destroy_fmr_pool() 2608 * Destroy an FMR pool and free all associated resources. 2609 * Context: Can be called from kernel context only. 2610 */ 2611 int 2612 hermon_destroy_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2613 { 2614 hermon_fmr_list_t *fmr, *fmr_next; 2615 int status; 2616 2617 mutex_enter(&fmrpool->fmr_lock); 2618 status = hermon_fmr_cleanup(state, fmrpool); 2619 if (status != DDI_SUCCESS) { 2620 mutex_exit(&fmrpool->fmr_lock); 2621 return (status); 2622 } 2623 2624 if (fmrpool->fmr_cache) { 2625 hermon_fmr_cache_fini(fmrpool); 2626 } 2627 2628 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { 2629 fmr_next = fmr->fmr_next; 2630 2631 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr); 2632 kmem_free(fmr, sizeof (hermon_fmr_list_t)); 2633 } 2634 mutex_exit(&fmrpool->fmr_lock); 2635 2636 ddi_taskq_destroy(fmrpool->fmr_taskq); 2637 mutex_destroy(&fmrpool->fmr_lock); 2638 2639 kmem_free(fmrpool, sizeof (*fmrpool)); 2640 return (DDI_SUCCESS); 2641 } 2642 2643 /* 2644 * hermon_flush_fmr_pool() 2645 * Ensure that all unmapped FMRs are fully invalidated. 2646 * Context: Can be called from kernel context only. 2647 */ 2648 int 2649 hermon_flush_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2650 { 2651 int status; 2652 2653 /* 2654 * Force the unmapping of all entries on the dirty list, regardless of 2655 * whether the watermark has been hit yet. 2656 */ 2657 /* grab the pool lock */ 2658 mutex_enter(&fmrpool->fmr_lock); 2659 status = hermon_fmr_cleanup(state, fmrpool); 2660 mutex_exit(&fmrpool->fmr_lock); 2661 return (status); 2662 } 2663 2664 /* 2665 * hermon_deregister_fmr() 2666 * Map memory into FMR 2667 * Context: Can be called from interrupt or base context. 2668 */ 2669 int 2670 hermon_register_physical_fmr(hermon_state_t *state, hermon_fmrhdl_t fmrpool, 2671 ibt_pmr_attr_t *mem_pattr, hermon_mrhdl_t *mr, 2672 ibt_pmr_desc_t *mem_desc_p) 2673 { 2674 hermon_fmr_list_t *fmr; 2675 hermon_fmr_list_t query; 2676 avl_index_t where; 2677 int status; 2678 2679 /* Check length */ 2680 mutex_enter(&fmrpool->fmr_lock); 2681 if (mem_pattr->pmr_len < 1 || (mem_pattr->pmr_num_buf > 2682 fmrpool->fmr_max_pages)) { 2683 mutex_exit(&fmrpool->fmr_lock); 2684 return (IBT_MR_LEN_INVALID); 2685 } 2686 2687 mutex_enter(&fmrpool->fmr_cachelock); 2688 /* lookup in fmr cache */ 2689 /* if exists, grab it, and return it */ 2690 if (fmrpool->fmr_cache) { 2691 query.fmr_desc.pmd_iova = mem_pattr->pmr_iova; 2692 query.fmr_desc.pmd_phys_buf_list_sz = mem_pattr->pmr_len; 2693 fmr = (hermon_fmr_list_t *)avl_find(&fmrpool->fmr_cache_avl, 2694 &query, &where); 2695 2696 /* 2697 * If valid FMR was found in cache, return that fmr info 2698 */ 2699 if (fmr != NULL) { 2700 fmr->fmr_refcnt++; 2701 /* Store pmr desc for use in cache */ 2702 (void) memcpy(mem_desc_p, &fmr->fmr_desc, 2703 sizeof (ibt_pmr_desc_t)); 2704 *mr = (hermon_mrhdl_t)fmr->fmr; 2705 mutex_exit(&fmrpool->fmr_cachelock); 2706 mutex_exit(&fmrpool->fmr_lock); 2707 return (DDI_SUCCESS); 2708 } 2709 } 2710 2711 /* FMR does not exist in cache, proceed with registration */ 2712 2713 /* grab next free entry */ 2714 fmr = fmrpool->fmr_free_list; 2715 if (fmr == NULL) { 2716 mutex_exit(&fmrpool->fmr_cachelock); 2717 mutex_exit(&fmrpool->fmr_lock); 2718 return (IBT_INSUFF_RESOURCE); 2719 } 2720 2721 fmrpool->fmr_free_list = fmrpool->fmr_free_list->fmr_next; 2722 fmr->fmr_next = NULL; 2723 2724 status = hermon_mr_register_physical_fmr(state, mem_pattr, fmr->fmr, 2725 mem_desc_p); 2726 if (status != DDI_SUCCESS) { 2727 mutex_exit(&fmrpool->fmr_cachelock); 2728 mutex_exit(&fmrpool->fmr_lock); 2729 return (status); 2730 } 2731 2732 fmr->fmr_refcnt = 1; 2733 fmr->fmr_remaps++; 2734 2735 /* Store pmr desc for use in cache */ 2736 (void) memcpy(&fmr->fmr_desc, mem_desc_p, sizeof (ibt_pmr_desc_t)); 2737 *mr = (hermon_mrhdl_t)fmr->fmr; 2738 2739 /* Store in cache */ 2740 if (fmrpool->fmr_cache) { 2741 if (!fmr->fmr_in_cache) { 2742 avl_insert(&fmrpool->fmr_cache_avl, fmr, where); 2743 fmr->fmr_in_cache = 1; 2744 } 2745 } 2746 2747 mutex_exit(&fmrpool->fmr_cachelock); 2748 mutex_exit(&fmrpool->fmr_lock); 2749 return (DDI_SUCCESS); 2750 } 2751 2752 /* 2753 * hermon_deregister_fmr() 2754 * Unmap FMR 2755 * Context: Can be called from kernel context only. 2756 */ 2757 int 2758 hermon_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr) 2759 { 2760 hermon_fmr_list_t *fmr; 2761 hermon_fmrhdl_t fmrpool; 2762 int status; 2763 2764 fmr = mr->mr_fmr; 2765 fmrpool = fmr->fmr_pool; 2766 2767 /* Grab pool lock */ 2768 mutex_enter(&fmrpool->fmr_lock); 2769 fmr->fmr_refcnt--; 2770 2771 if (fmr->fmr_refcnt == 0) { 2772 /* 2773 * First, do some bit of invalidation, reducing our exposure to 2774 * having this region still registered in hardware. 2775 */ 2776 (void) hermon_mr_invalidate_fmr(state, mr); 2777 2778 /* 2779 * If we've exhausted our remaps then add the FMR to the dirty 2780 * list, not allowing it to be re-used until we have done a 2781 * flush. Otherwise, simply add it back to the free list for 2782 * re-mapping. 2783 */ 2784 if (fmr->fmr_remaps < 2785 state->hs_cfg_profile->cp_fmr_max_remaps) { 2786 /* add to free list */ 2787 fmr->fmr_next = fmrpool->fmr_free_list; 2788 fmrpool->fmr_free_list = fmr; 2789 } else { 2790 /* add to dirty list */ 2791 fmr->fmr_next = fmrpool->fmr_dirty_list; 2792 fmrpool->fmr_dirty_list = fmr; 2793 fmrpool->fmr_dirty_len++; 2794 2795 status = ddi_taskq_dispatch(fmrpool->fmr_taskq, 2796 hermon_fmr_processing, fmrpool, DDI_NOSLEEP); 2797 if (status == DDI_FAILURE) { 2798 mutex_exit(&fmrpool->fmr_lock); 2799 return (IBT_INSUFF_RESOURCE); 2800 } 2801 } 2802 } 2803 /* Release pool lock */ 2804 mutex_exit(&fmrpool->fmr_lock); 2805 2806 return (DDI_SUCCESS); 2807 } 2808 2809 2810 /* 2811 * hermon_fmr_processing() 2812 * If required, perform cleanup. 2813 * Context: Called from taskq context only. 2814 */ 2815 static void 2816 hermon_fmr_processing(void *fmr_args) 2817 { 2818 hermon_fmrhdl_t fmrpool; 2819 int status; 2820 2821 ASSERT(fmr_args != NULL); 2822 2823 fmrpool = (hermon_fmrhdl_t)fmr_args; 2824 2825 /* grab pool lock */ 2826 mutex_enter(&fmrpool->fmr_lock); 2827 if (fmrpool->fmr_dirty_len >= fmrpool->fmr_dirty_watermark) { 2828 status = hermon_fmr_cleanup(fmrpool->fmr_state, fmrpool); 2829 if (status != DDI_SUCCESS) { 2830 mutex_exit(&fmrpool->fmr_lock); 2831 return; 2832 } 2833 2834 if (fmrpool->fmr_flush_function != NULL) { 2835 (void) fmrpool->fmr_flush_function( 2836 (ibc_fmr_pool_hdl_t)fmrpool, 2837 fmrpool->fmr_flush_arg); 2838 } 2839 } 2840 2841 /* let pool lock go */ 2842 mutex_exit(&fmrpool->fmr_lock); 2843 } 2844 2845 /* 2846 * hermon_fmr_cleanup() 2847 * Perform cleaning processing, walking the list and performing the MTT sync 2848 * operation if required. 2849 * Context: can be called from taskq or base context. 2850 */ 2851 static int 2852 hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2853 { 2854 hermon_fmr_list_t *fmr; 2855 hermon_fmr_list_t *fmr_next; 2856 int sync_needed; 2857 int status; 2858 2859 ASSERT(MUTEX_HELD(&fmrpool->fmr_lock)); 2860 2861 sync_needed = 0; 2862 for (fmr = fmrpool->fmr_dirty_list; fmr; fmr = fmr_next) { 2863 fmr_next = fmr->fmr_next; 2864 fmr->fmr_remaps = 0; 2865 2866 (void) hermon_mr_deregister_fmr(state, fmr->fmr); 2867 2868 /* 2869 * Update lists. 2870 * - add fmr back to free list 2871 * - remove fmr from dirty list 2872 */ 2873 fmr->fmr_next = fmrpool->fmr_free_list; 2874 fmrpool->fmr_free_list = fmr; 2875 2876 2877 /* 2878 * Because we have updated the dirty list, and deregistered the 2879 * FMR entry, we do need to sync the TPT, so we set the 2880 * 'sync_needed' flag here so we sync once we finish dirty_list 2881 * processing. 2882 */ 2883 sync_needed = 1; 2884 } 2885 2886 fmrpool->fmr_dirty_list = NULL; 2887 fmrpool->fmr_dirty_len = 0; 2888 2889 if (sync_needed) { 2890 status = hermon_sync_tpt_cmd_post(state, 2891 HERMON_CMD_NOSLEEP_SPIN); 2892 if (status != HERMON_CMD_SUCCESS) { 2893 return (status); 2894 } 2895 } 2896 2897 return (DDI_SUCCESS); 2898 } 2899 2900 /* 2901 * hermon_fmr_avl_compare() 2902 * Context: Can be called from user or kernel context. 2903 */ 2904 static int 2905 hermon_fmr_avl_compare(const void *q, const void *e) 2906 { 2907 hermon_fmr_list_t *entry, *query; 2908 2909 entry = (hermon_fmr_list_t *)e; 2910 query = (hermon_fmr_list_t *)q; 2911 2912 if (query->fmr_desc.pmd_iova < entry->fmr_desc.pmd_iova) { 2913 return (-1); 2914 } else if (query->fmr_desc.pmd_iova > entry->fmr_desc.pmd_iova) { 2915 return (+1); 2916 } else { 2917 return (0); 2918 } 2919 } 2920 2921 2922 /* 2923 * hermon_fmr_cache_init() 2924 * Context: Can be called from user or kernel context. 2925 */ 2926 static void 2927 hermon_fmr_cache_init(hermon_fmrhdl_t fmr) 2928 { 2929 /* Initialize the lock used for FMR cache AVL tree access */ 2930 mutex_init(&fmr->fmr_cachelock, NULL, MUTEX_DRIVER, 2931 DDI_INTR_PRI(fmr->fmr_state->hs_intrmsi_pri)); 2932 2933 /* Initialize the AVL tree for the FMR cache */ 2934 avl_create(&fmr->fmr_cache_avl, hermon_fmr_avl_compare, 2935 sizeof (hermon_fmr_list_t), 2936 offsetof(hermon_fmr_list_t, fmr_avlnode)); 2937 2938 fmr->fmr_cache = 1; 2939 } 2940 2941 2942 /* 2943 * hermon_fmr_cache_fini() 2944 * Context: Can be called from user or kernel context. 2945 */ 2946 static void 2947 hermon_fmr_cache_fini(hermon_fmrhdl_t fmr) 2948 { 2949 void *cookie; 2950 2951 /* 2952 * Empty all entries (if necessary) and destroy the AVL tree. 2953 * The FMRs themselves are freed as part of destroy_pool() 2954 */ 2955 cookie = NULL; 2956 while (((void *)(hermon_fmr_list_t *)avl_destroy_nodes( 2957 &fmr->fmr_cache_avl, &cookie)) != NULL) { 2958 /* loop through */ 2959 } 2960 avl_destroy(&fmr->fmr_cache_avl); 2961 2962 /* Destroy the lock used for FMR cache */ 2963 mutex_destroy(&fmr->fmr_cachelock); 2964 } 2965 2966 /* 2967 * hermon_get_dma_cookies() 2968 * Return DMA cookies in the pre-allocated paddr_list_p based on the length 2969 * needed. 2970 * Context: Can be called from interrupt or base context. 2971 */ 2972 int 2973 hermon_get_dma_cookies(hermon_state_t *state, ibt_phys_buf_t *paddr_list_p, 2974 ibt_va_attr_t *va_attrs, uint_t list_len, uint_t *cookiecnt, 2975 ibc_ma_hdl_t *ibc_ma_hdl_p) 2976 { 2977 ddi_dma_handle_t dma_hdl; 2978 ddi_dma_attr_t dma_attr; 2979 ddi_dma_cookie_t dmacookie; 2980 int (*callback)(caddr_t); 2981 int status; 2982 int i; 2983 2984 /* Set the callback flag appropriately */ 2985 callback = (va_attrs->va_flags & IBT_VA_NOSLEEP) ? DDI_DMA_DONTWAIT : 2986 DDI_DMA_SLEEP; 2987 if ((callback == DDI_DMA_SLEEP) && 2988 (HERMON_SLEEP != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 2989 return (IBT_INVALID_PARAM); 2990 } 2991 2992 /* 2993 * Initialize many of the default DMA attributes and allocate the DMA 2994 * handle. Then, if we're bypassing the IOMMU, set the 2995 * DDI_DMA_FORCE_PHYSICAL flag. 2996 */ 2997 hermon_dma_attr_init(state, &dma_attr); 2998 2999 #ifdef __x86 3000 /* 3001 * On x86 we can specify a maximum segment length for our returned 3002 * cookies. 3003 */ 3004 if (va_attrs->va_flags & IBT_VA_FMR) { 3005 dma_attr.dma_attr_seg = PAGESIZE - 1; 3006 } 3007 #endif 3008 3009 /* 3010 * Check to see if the RO flag is set, and if so, 3011 * set that bit in the attr structure as well. 3012 * 3013 * NOTE 1: This function is ONLY called by consumers, and only for 3014 * data buffers 3015 */ 3016 if (hermon_kernel_data_ro == HERMON_RO_ENABLED) { 3017 dma_attr.dma_attr_flags |= DDI_DMA_RELAXED_ORDERING; 3018 } 3019 3020 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 3021 callback, NULL, &dma_hdl); 3022 if (status != DDI_SUCCESS) { 3023 switch (status) { 3024 case DDI_DMA_NORESOURCES: 3025 return (IBT_INSUFF_RESOURCE); 3026 case DDI_DMA_BADATTR: 3027 default: 3028 return (ibc_get_ci_failure(0)); 3029 } 3030 } 3031 3032 /* 3033 * Now bind the handle with the correct DMA attributes. 3034 */ 3035 if (va_attrs->va_flags & IBT_VA_BUF) { 3036 status = ddi_dma_buf_bind_handle(dma_hdl, va_attrs->va_buf, 3037 DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, 3038 NULL, &dmacookie, cookiecnt); 3039 } else { 3040 status = ddi_dma_addr_bind_handle(dma_hdl, NULL, 3041 (caddr_t)(uintptr_t)va_attrs->va_vaddr, va_attrs->va_len, 3042 DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, 3043 NULL, &dmacookie, cookiecnt); 3044 } 3045 if (status != DDI_SUCCESS) { 3046 ddi_dma_free_handle(&dma_hdl); 3047 3048 switch (status) { 3049 case DDI_DMA_NORESOURCES: 3050 return (IBT_INSUFF_RESOURCE); 3051 case DDI_DMA_TOOBIG: 3052 return (IBT_INVALID_PARAM); 3053 case DDI_DMA_PARTIAL_MAP: 3054 case DDI_DMA_INUSE: 3055 case DDI_DMA_NOMAPPING: 3056 default: 3057 return (ibc_get_ci_failure(0)); 3058 } 3059 } 3060 3061 /* 3062 * Verify our physical buffer list (PBL) is large enough to handle the 3063 * number of cookies that were returned. 3064 */ 3065 if (*cookiecnt > list_len) { 3066 (void) ddi_dma_unbind_handle(dma_hdl); 3067 ddi_dma_free_handle(&dma_hdl); 3068 return (IBT_PBL_TOO_SMALL); 3069 } 3070 3071 /* 3072 * We store the cookies returned by the DDI into our own PBL. This 3073 * sets the cookies up for later processing (for example, if we want to 3074 * split up the cookies into smaller chunks). We use the laddr and 3075 * size fields in each cookie to create each individual entry (PBE). 3076 */ 3077 3078 /* 3079 * Store first cookie info first 3080 */ 3081 paddr_list_p[0].p_laddr = dmacookie.dmac_laddress; 3082 paddr_list_p[0].p_size = dmacookie.dmac_size; 3083 3084 /* 3085 * Loop through each cookie, storing each cookie into our physical 3086 * buffer list. 3087 */ 3088 for (i = 1; i < *cookiecnt; i++) { 3089 ddi_dma_nextcookie(dma_hdl, &dmacookie); 3090 3091 paddr_list_p[i].p_laddr = dmacookie.dmac_laddress; 3092 paddr_list_p[i].p_size = dmacookie.dmac_size; 3093 } 3094 3095 /* return handle */ 3096 *ibc_ma_hdl_p = (ibc_ma_hdl_t)dma_hdl; 3097 return (DDI_SUCCESS); 3098 } 3099 3100 /* 3101 * hermon_split_dma_cookies() 3102 * Split up cookies passed in from paddr_list_p, returning the new list in the 3103 * same buffers, based on the pagesize to split the cookies into. 3104 * Context: Can be called from interrupt or base context. 3105 */ 3106 /* ARGSUSED */ 3107 int 3108 hermon_split_dma_cookies(hermon_state_t *state, ibt_phys_buf_t *paddr_list, 3109 ib_memlen_t *paddr_offset, uint_t list_len, uint_t *cookiecnt, 3110 uint_t pagesize) 3111 { 3112 uint64_t pageoffset; 3113 uint64_t pagemask; 3114 uint_t pageshift; 3115 uint_t current_cookiecnt; 3116 uint_t cookies_needed; 3117 uint64_t last_size, extra_cookie; 3118 int i_increment; 3119 int i, k; 3120 int status; 3121 3122 /* Setup pagesize calculations */ 3123 pageoffset = pagesize - 1; 3124 pagemask = (~pageoffset); 3125 pageshift = highbit(pagesize) - 1; 3126 3127 /* 3128 * Setup first cookie offset based on pagesize requested. 3129 */ 3130 *paddr_offset = paddr_list[0].p_laddr & pageoffset; 3131 paddr_list[0].p_laddr &= pagemask; 3132 3133 /* Save away the current number of cookies that are passed in */ 3134 current_cookiecnt = *cookiecnt; 3135 3136 /* Perform splitting up of current cookies into pagesize blocks */ 3137 for (i = 0; i < current_cookiecnt; i += i_increment) { 3138 /* 3139 * If the cookie is smaller than pagesize, or already is 3140 * pagesize, then we are already within our limits, so we skip 3141 * it. 3142 */ 3143 if (paddr_list[i].p_size <= pagesize) { 3144 i_increment = 1; 3145 continue; 3146 } 3147 3148 /* 3149 * If this is our first cookie, then we have to deal with the 3150 * offset that may be present in the first address. So add 3151 * that to our size, to calculate potential change to the last 3152 * cookie's size. 3153 * 3154 * Also, calculate the number of cookies that we'll need to 3155 * split up this block into. 3156 */ 3157 if (i == 0) { 3158 last_size = (paddr_list[i].p_size + *paddr_offset) & 3159 pageoffset; 3160 cookies_needed = (paddr_list[i].p_size + 3161 *paddr_offset) >> pageshift; 3162 } else { 3163 last_size = 0; 3164 cookies_needed = paddr_list[i].p_size >> pageshift; 3165 } 3166 3167 /* 3168 * If our size is not a multiple of pagesize, we need one more 3169 * cookie. 3170 */ 3171 if (last_size) { 3172 extra_cookie = 1; 3173 } else { 3174 extra_cookie = 0; 3175 } 3176 3177 /* 3178 * Split cookie into pagesize chunks, shifting list of cookies 3179 * down, using more cookie slots in the PBL if necessary. 3180 */ 3181 status = hermon_dma_cookie_shift(paddr_list, i, list_len, 3182 current_cookiecnt - i, cookies_needed + extra_cookie); 3183 if (status != 0) { 3184 return (status); 3185 } 3186 3187 /* 3188 * If the very first cookie, we must take possible offset into 3189 * account. 3190 */ 3191 if (i == 0) { 3192 paddr_list[i].p_size = pagesize - *paddr_offset; 3193 } else { 3194 paddr_list[i].p_size = pagesize; 3195 } 3196 3197 /* 3198 * We have shifted the existing cookies down the PBL, now fill 3199 * in the blank entries by splitting up our current block. 3200 */ 3201 for (k = 1; k < cookies_needed; k++) { 3202 paddr_list[i + k].p_laddr = 3203 paddr_list[i + k - 1].p_laddr + pagesize; 3204 paddr_list[i + k].p_size = pagesize; 3205 } 3206 3207 /* If we have one extra cookie (of less than pagesize...) */ 3208 if (extra_cookie) { 3209 paddr_list[i + k].p_laddr = 3210 paddr_list[i + k - 1].p_laddr + pagesize; 3211 paddr_list[i + k].p_size = (size_t)last_size; 3212 } 3213 3214 /* Increment cookiecnt appropriately based on cookies used */ 3215 i_increment = cookies_needed + extra_cookie; 3216 current_cookiecnt += i_increment - 1; 3217 } 3218 3219 /* Update to new cookie count */ 3220 *cookiecnt = current_cookiecnt; 3221 return (DDI_SUCCESS); 3222 } 3223 3224 /* 3225 * hermon_dma_cookie_shift() 3226 * Context: Can be called from interrupt or base context. 3227 */ 3228 int 3229 hermon_dma_cookie_shift(ibt_phys_buf_t *paddr_list, int start, int end, 3230 int cookiecnt, int num_shift) 3231 { 3232 int shift_start; 3233 int i; 3234 3235 /* Calculating starting point in the PBL list */ 3236 shift_start = start + cookiecnt - 1; 3237 3238 /* Check if we're at the end of our PBL list */ 3239 if ((shift_start + num_shift - 1) >= end) { 3240 return (IBT_PBL_TOO_SMALL); 3241 } 3242 3243 for (i = shift_start; i > start; i--) { 3244 paddr_list[i + num_shift - 1] = paddr_list[i]; 3245 } 3246 3247 return (DDI_SUCCESS); 3248 } 3249 3250 3251 /* 3252 * hermon_free_dma_cookies() 3253 * Context: Can be called from interrupt or base context. 3254 */ 3255 int 3256 hermon_free_dma_cookies(ibc_ma_hdl_t ma_hdl) 3257 { 3258 ddi_dma_handle_t dma_hdl; 3259 int status; 3260 3261 dma_hdl = (ddi_dma_handle_t)ma_hdl; 3262 3263 status = ddi_dma_unbind_handle(dma_hdl); 3264 if (status != DDI_SUCCESS) { 3265 return (ibc_get_ci_failure(0)); 3266 } 3267 ddi_dma_free_handle(&dma_hdl); 3268 3269 return (DDI_SUCCESS); 3270 } 3271