1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * hermon_misc.c 29 * Hermon Miscellaneous routines - Address Handle, Multicast, Protection 30 * Domain, and port-related operations 31 * 32 * Implements all the routines necessary for allocating, freeing, querying 33 * and modifying Address Handles and Protection Domains. Also implements 34 * all the routines necessary for adding and removing Queue Pairs to/from 35 * Multicast Groups. Lastly, it implements the routines necessary for 36 * port-related query and modify operations. 37 */ 38 39 #include <sys/types.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/bitmap.h> 45 #include <sys/sysmacros.h> 46 47 #include <sys/ib/adapters/hermon/hermon.h> 48 49 extern uint32_t hermon_kernel_data_ro; 50 51 /* used for helping uniquify fmr pool taskq name */ 52 static uint_t hermon_debug_fmrpool_cnt = 0x00000000; 53 54 static int hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg, 55 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, uint_t *qp_found); 56 static int hermon_mcg_qplist_remove(hermon_mcghdl_t mcg, 57 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp); 58 static void hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp); 59 static void hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp); 60 static uint_t hermon_mcg_walk_mgid_hash(hermon_state_t *state, 61 uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx); 62 static void hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, 63 hermon_hw_mcg_t *mcg_hdr, ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc); 64 static int hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx, 65 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry); 66 static int hermon_mcg_entry_invalidate(hermon_state_t *state, 67 hermon_hw_mcg_t *mcg_entry, uint_t indx); 68 static int hermon_mgid_is_valid(ib_gid_t gid); 69 static int hermon_mlid_is_valid(ib_lid_t lid); 70 static void hermon_fmr_processing(void *fmr_args); 71 static int hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t pool); 72 static void hermon_fmr_cache_init(hermon_fmrhdl_t fmr); 73 static void hermon_fmr_cache_fini(hermon_fmrhdl_t fmr); 74 static int hermon_fmr_avl_compare(const void *q, const void *e); 75 76 77 #define HERMON_MAX_DBR_PAGES_PER_USER 64 78 #define HERMON_DBR_KEY(index, page) \ 79 (((uint64_t)index) * HERMON_MAX_DBR_PAGES_PER_USER + (page)) 80 81 static hermon_udbr_page_t * 82 hermon_dbr_new_user_page(hermon_state_t *state, uint_t index, 83 uint_t page) 84 { 85 hermon_udbr_page_t *pagep; 86 ddi_dma_attr_t dma_attr; 87 uint_t cookiecnt; 88 int i, status; 89 uint64_t *p; 90 hermon_umap_db_entry_t *umapdb; 91 92 pagep = kmem_alloc(sizeof (*pagep), KM_SLEEP); 93 pagep->upg_index = page; 94 pagep->upg_nfree = PAGESIZE / sizeof (hermon_dbr_t); 95 pagep->upg_firstfree = 0; 96 pagep->upg_kvaddr = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP, 97 &pagep->upg_umemcookie); /* not HERMON_PAGESIZE here */ 98 99 /* link free entries */ 100 p = (uint64_t *)(void *)pagep->upg_kvaddr; 101 for (i = pagep->upg_firstfree; i < pagep->upg_nfree; i++) 102 p[i] = i + 1; 103 pagep->upg_buf = ddi_umem_iosetup(pagep->upg_umemcookie, 0, 104 PAGESIZE, B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 105 106 hermon_dma_attr_init(state, &dma_attr); 107 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 108 DDI_DMA_SLEEP, NULL, &pagep->upg_dmahdl); 109 if (status != DDI_SUCCESS) { 110 IBTF_DPRINTF_L2("hermon", "hermon_new_user_page: " 111 "ddi_dma_buf_bind_handle failed: %d", status); 112 return (NULL); 113 } 114 status = ddi_dma_buf_bind_handle(pagep->upg_dmahdl, 115 pagep->upg_buf, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 116 DDI_DMA_SLEEP, NULL, &pagep->upg_dmacookie, &cookiecnt); 117 if (status != DDI_SUCCESS) { 118 IBTF_DPRINTF_L2("hermon", "hermon_dbr_new_user_page: " 119 "ddi_dma_buf_bind_handle failed: %d", status); 120 ddi_dma_free_handle(&pagep->upg_dmahdl); 121 return (NULL); 122 } 123 ASSERT(cookiecnt == 1); 124 125 /* create db entry for mmap */ 126 umapdb = hermon_umap_db_alloc(state->hs_instance, 127 HERMON_DBR_KEY(index, page), MLNX_UMAP_DBRMEM_RSRC, 128 (uint64_t)(uintptr_t)pagep); 129 hermon_umap_db_add(umapdb); 130 return (pagep); 131 } 132 133 134 /*ARGSUSED*/ 135 static int 136 hermon_user_dbr_alloc(hermon_state_t *state, uint_t index, 137 ddi_acc_handle_t *acchdl, hermon_dbr_t **vdbr, uint64_t *pdbr, 138 uint64_t *mapoffset) 139 { 140 hermon_user_dbr_t *udbr; 141 hermon_udbr_page_t *pagep; 142 uint_t next_page; 143 int j; 144 145 mutex_enter(&state->hs_dbr_lock); 146 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link) 147 if (udbr->udbr_index == index) 148 break; 149 if (udbr == NULL) { 150 udbr = kmem_alloc(sizeof (*udbr), KM_SLEEP); 151 udbr->udbr_link = state->hs_user_dbr; 152 state->hs_user_dbr = udbr; 153 udbr->udbr_index = index; 154 udbr->udbr_pagep = NULL; 155 } 156 pagep = udbr->udbr_pagep; 157 next_page = (pagep == NULL) ? 0 : (pagep->upg_index + 1); 158 while (pagep != NULL) 159 if (pagep->upg_nfree > 0) 160 break; 161 else 162 pagep = pagep->upg_link; 163 if (pagep == NULL) { 164 pagep = hermon_dbr_new_user_page(state, index, next_page); 165 if (pagep == NULL) { 166 mutex_exit(&state->hs_dbr_lock); 167 return (DDI_FAILURE); 168 } 169 pagep->upg_link = udbr->udbr_pagep; 170 udbr->udbr_pagep = pagep; 171 } 172 j = pagep->upg_firstfree; /* index within page */ 173 pagep->upg_firstfree = ((uint64_t *)(void *)pagep->upg_kvaddr)[j]; 174 pagep->upg_nfree--; 175 ((uint64_t *)(void *)pagep->upg_kvaddr)[j] = 0; /* clear dbr */ 176 *mapoffset = ((HERMON_DBR_KEY(index, pagep->upg_index) << 177 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_DBRMEM_RSRC) << PAGESHIFT; 178 *vdbr = (hermon_dbr_t *)((uint64_t *)(void *)pagep->upg_kvaddr + j); 179 *pdbr = pagep->upg_dmacookie.dmac_laddress + j * sizeof (uint64_t); 180 181 mutex_exit(&state->hs_dbr_lock); 182 return (DDI_SUCCESS); 183 } 184 185 static void 186 hermon_user_dbr_free(hermon_state_t *state, uint_t index, hermon_dbr_t *record) 187 { 188 hermon_user_dbr_t *udbr; 189 hermon_udbr_page_t *pagep; 190 caddr_t kvaddr; 191 uint_t dbr_index; 192 uint_t max_free = PAGESIZE / sizeof (hermon_dbr_t); 193 194 dbr_index = (uintptr_t)record & PAGEOFFSET; /* offset (not yet index) */ 195 kvaddr = (caddr_t)record - dbr_index; 196 dbr_index /= sizeof (hermon_dbr_t); /* now it's the index */ 197 198 mutex_enter(&state->hs_dbr_lock); 199 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link) 200 if (udbr->udbr_index == index) 201 break; 202 if (udbr == NULL) { 203 IBTF_DPRINTF_L2("hermon", "free user dbr: udbr struct not " 204 "found for index %x", index); 205 mutex_exit(&state->hs_dbr_lock); 206 return; 207 } 208 for (pagep = udbr->udbr_pagep; pagep != NULL; pagep = pagep->upg_link) 209 if (pagep->upg_kvaddr == kvaddr) 210 break; 211 if (pagep == NULL) { 212 IBTF_DPRINTF_L2("hermon", "free user dbr: pagep struct not" 213 " found for index %x, kvaddr %p, DBR index %x", 214 index, kvaddr, dbr_index); 215 mutex_exit(&state->hs_dbr_lock); 216 return; 217 } 218 if (pagep->upg_nfree >= max_free) { 219 IBTF_DPRINTF_L2("hermon", "free user dbr: overflow: " 220 "UCE index %x, DBR index %x", index, dbr_index); 221 mutex_exit(&state->hs_dbr_lock); 222 return; 223 } 224 ASSERT(dbr_index < max_free); 225 ((uint64_t *)(void *)kvaddr)[dbr_index] = pagep->upg_firstfree; 226 pagep->upg_firstfree = dbr_index; 227 pagep->upg_nfree++; 228 mutex_exit(&state->hs_dbr_lock); 229 230 /* XXX still need to unlink and free struct */ 231 /* XXX munmap needs to be managed */ 232 } 233 234 /* 235 * hermon_dbr_page_alloc() 236 * first page allocation - called from attach or open 237 * in this case, we want exactly one page per call, and aligned on a 238 * page - and may need to be mapped to the user for access 239 */ 240 241 int 242 hermon_dbr_page_alloc(hermon_state_t *state, hermon_dbr_info_t **dinfo) 243 { 244 int status; 245 ddi_dma_handle_t dma_hdl; 246 ddi_acc_handle_t acc_hdl; 247 ddi_dma_attr_t dma_attr; 248 ddi_dma_cookie_t cookie; 249 uint_t cookie_cnt; 250 hermon_dbr_header_t *pagehdr; 251 int i; 252 hermon_dbr_info_t *info; 253 uint64_t dmaaddr; 254 uint64_t dmalen; 255 256 info = kmem_zalloc(sizeof (hermon_dbr_info_t), KM_SLEEP); 257 258 /* 259 * Initialize many of the default DMA attributes. Then set additional 260 * alignment restrictions if necessary for the dbr memory, meaning 261 * page aligned. Also use the configured value for IOMMU bypass 262 */ 263 hermon_dma_attr_init(state, &dma_attr); 264 dma_attr.dma_attr_align = PAGESIZE; 265 dma_attr.dma_attr_sgllen = 1; /* make sure only one cookie */ 266 267 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 268 DDI_DMA_SLEEP, NULL, &dma_hdl); 269 if (status != DDI_SUCCESS) { 270 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 271 cmn_err(CE_NOTE, "dbr DMA handle alloc failed\n"); 272 return (DDI_FAILURE); 273 } 274 275 status = ddi_dma_mem_alloc(dma_hdl, PAGESIZE, 276 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 277 NULL, (caddr_t *)&dmaaddr, (size_t *)&dmalen, &acc_hdl); 278 if (status != DDI_SUCCESS) { 279 ddi_dma_free_handle(&dma_hdl); 280 cmn_err(CE_CONT, "dbr DMA mem alloc failed(status %d)", status); 281 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 282 return (DDI_FAILURE); 283 } 284 285 /* this memory won't be IB registered, so do the bind here */ 286 status = ddi_dma_addr_bind_handle(dma_hdl, NULL, 287 (caddr_t)(uintptr_t)dmaaddr, (size_t)dmalen, DDI_DMA_RDWR | 288 DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt); 289 if (status != DDI_SUCCESS) { 290 ddi_dma_mem_free(&acc_hdl); 291 ddi_dma_free_handle(&dma_hdl); 292 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 293 cmn_err(CE_CONT, "dbr DMA bind handle failed (status %d)", 294 status); 295 return (DDI_FAILURE); 296 } 297 *dinfo = info; /* Pass back the pointer */ 298 299 /* init the info structure with returned info */ 300 info->dbr_dmahdl = dma_hdl; 301 info->dbr_acchdl = acc_hdl; 302 info->dbr_page = (caddr_t)(uintptr_t)dmaaddr; 303 /* extract the phys addr from the cookie */ 304 info->dbr_paddr = cookie.dmac_laddress; 305 /* should have everything now, so do the init of the header */ 306 pagehdr = (hermon_dbr_header_t *)(void *)info->dbr_page; 307 pagehdr->next = 0; 308 pagehdr->firstfree = 0; 309 pagehdr->nfree = HERMON_NUM_DBR_PER_PAGE; 310 pagehdr->dbr_info = info; 311 /* link all DBrs onto the free list */ 312 for (i = 0; i < HERMON_NUM_DBR_PER_PAGE; i++) { 313 pagehdr->dbr[i] = i + 1; 314 } 315 316 return (DDI_SUCCESS); 317 } 318 319 320 /* 321 * hermon_dbr_alloc() 322 * DBr record allocation - called from alloc cq/qp/srq 323 * will check for available dbrs in current 324 * page - if needed it will allocate another and link them 325 */ 326 327 int 328 hermon_dbr_alloc(hermon_state_t *state, uint_t index, ddi_acc_handle_t *acchdl, 329 hermon_dbr_t **vdbr, uint64_t *pdbr, uint64_t *mapoffset) 330 { 331 hermon_dbr_header_t *pagehdr, *lastpage; 332 hermon_dbr_t *record = NULL; 333 hermon_dbr_info_t *dinfo = NULL; 334 int status; 335 336 if (index != state->hs_kernel_uar_index) 337 return (hermon_user_dbr_alloc(state, index, acchdl, vdbr, pdbr, 338 mapoffset)); 339 340 mutex_enter(&state->hs_dbr_lock); 341 /* 'pagehdr' holds pointer to first page */ 342 pagehdr = (hermon_dbr_header_t *)(void *)state->hs_kern_dbr; 343 do { 344 lastpage = pagehdr; /* save pagehdr for later linking */ 345 if (pagehdr->nfree == 0) { 346 pagehdr = (hermon_dbr_header_t *)(void *)pagehdr->next; 347 continue; /* page is full, go to next if there is one */ 348 } 349 dinfo = pagehdr->dbr_info; 350 break; /* found a page w/ one available */ 351 } while (pagehdr != 0); 352 353 if (dinfo == NULL) { /* did NOT find a page with one available */ 354 status = hermon_dbr_page_alloc(state, &dinfo); 355 if (status != DDI_SUCCESS) { 356 /* do error handling */ 357 mutex_exit(&state->hs_dbr_lock); 358 return (DDI_FAILURE); 359 } 360 /* got a new page, so link it in. */ 361 pagehdr = (hermon_dbr_header_t *)(void *)dinfo->dbr_page; 362 lastpage->next = pagehdr; 363 } 364 record = pagehdr->dbr + pagehdr->firstfree; 365 pagehdr->firstfree = *record; 366 pagehdr->nfree--; 367 *record = 0; 368 369 *acchdl = dinfo->dbr_acchdl; 370 *vdbr = record; 371 *pdbr = ((uintptr_t)record - (uintptr_t)pagehdr + dinfo->dbr_paddr); 372 mutex_exit(&state->hs_dbr_lock); 373 return (DDI_SUCCESS); 374 } 375 376 /* 377 * hermon_dbr_free() 378 * DBr record deallocation - called from free cq/qp 379 * will update the counter in the header, and invalidate 380 * the dbr, but will NEVER free pages of dbrs - small 381 * price to pay, but userland access never will anyway 382 */ 383 384 void 385 hermon_dbr_free(hermon_state_t *state, uint_t indx, hermon_dbr_t *record) 386 { 387 hermon_dbr_header_t *pagehdr; 388 389 if (indx != state->hs_kernel_uar_index) { 390 hermon_user_dbr_free(state, indx, record); 391 return; 392 } 393 mutex_enter(&state->hs_dbr_lock); 394 pagehdr = (hermon_dbr_header_t *)((uintptr_t)record & 395 (uintptr_t)PAGEMASK); 396 *record = pagehdr->firstfree; 397 pagehdr->firstfree = record - pagehdr->dbr; 398 pagehdr->nfree++; /* decr the count for this one */ 399 mutex_exit(&state->hs_dbr_lock); 400 } 401 402 /* 403 * hermon_dbr_kern_free() 404 * Context: Can be called only from detach context. 405 * 406 * Free all kernel dbr pages. This includes the freeing of all the dma 407 * resources acquired during the allocation of the pages. 408 * 409 * Also, free all the user dbr pages. 410 */ 411 void 412 hermon_dbr_kern_free(hermon_state_t *state) 413 { 414 hermon_dbr_header_t *pagehdr, *lastpage; 415 hermon_dbr_info_t *dinfo; 416 hermon_user_dbr_t *udbr, *next; 417 hermon_udbr_page_t *pagep, *nextp; 418 hermon_umap_db_entry_t *umapdb; 419 int instance, status; 420 uint64_t value; 421 extern hermon_umap_db_t hermon_userland_rsrc_db; 422 423 mutex_enter(&state->hs_dbr_lock); 424 pagehdr = (hermon_dbr_header_t *)(void *)state->hs_kern_dbr; 425 while (pagehdr != NULL) { 426 lastpage = (hermon_dbr_header_t *)(void *)pagehdr->next; 427 dinfo = pagehdr->dbr_info; 428 (void) ddi_dma_unbind_handle(dinfo->dbr_dmahdl); 429 ddi_dma_mem_free(&dinfo->dbr_acchdl); /* free page */ 430 ddi_dma_free_handle(&dinfo->dbr_dmahdl); 431 kmem_free(dinfo, sizeof (hermon_dbr_info_t)); 432 pagehdr = lastpage; 433 } 434 435 udbr = state->hs_user_dbr; 436 instance = state->hs_instance; 437 mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock); 438 while (udbr != NULL) { 439 pagep = udbr->udbr_pagep; 440 while (pagep != NULL) { 441 /* probably need to remove "db" */ 442 (void) ddi_dma_unbind_handle(pagep->upg_dmahdl); 443 ddi_dma_free_handle(&pagep->upg_dmahdl); 444 freerbuf(pagep->upg_buf); 445 ddi_umem_free(pagep->upg_umemcookie); 446 status = hermon_umap_db_find_nolock(instance, 447 HERMON_DBR_KEY(udbr->udbr_index, 448 pagep->upg_index), MLNX_UMAP_DBRMEM_RSRC, 449 &value, HERMON_UMAP_DB_REMOVE, &umapdb); 450 if (status == DDI_SUCCESS) 451 hermon_umap_db_free(umapdb); 452 nextp = pagep->upg_link; 453 kmem_free(pagep, sizeof (*pagep)); 454 pagep = nextp; 455 } 456 next = udbr->udbr_link; 457 kmem_free(udbr, sizeof (*udbr)); 458 udbr = next; 459 } 460 mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock); 461 mutex_exit(&state->hs_dbr_lock); 462 } 463 464 /* 465 * hermon_ah_alloc() 466 * Context: Can be called only from user or kernel context. 467 */ 468 int 469 hermon_ah_alloc(hermon_state_t *state, hermon_pdhdl_t pd, 470 ibt_adds_vect_t *attr_p, hermon_ahhdl_t *ahhdl, uint_t sleepflag) 471 { 472 hermon_rsrc_t *rsrc; 473 hermon_hw_udav_t *udav; 474 hermon_ahhdl_t ah; 475 int status; 476 477 /* 478 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to 479 * indicate that we wish to allocate an "invalid" (i.e. empty) 480 * address handle XXX 481 */ 482 483 /* Validate that specified port number is legal */ 484 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) { 485 return (IBT_HCA_PORT_INVALID); 486 } 487 488 /* 489 * Allocate the software structure for tracking the address handle 490 * (i.e. the Hermon Address Handle struct). 491 */ 492 status = hermon_rsrc_alloc(state, HERMON_AHHDL, 1, sleepflag, &rsrc); 493 if (status != DDI_SUCCESS) { 494 return (IBT_INSUFF_RESOURCE); 495 } 496 ah = (hermon_ahhdl_t)rsrc->hr_addr; 497 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 498 499 /* Increment the reference count on the protection domain (PD) */ 500 hermon_pd_refcnt_inc(pd); 501 502 udav = (hermon_hw_udav_t *)kmem_zalloc(sizeof (hermon_hw_udav_t), 503 KM_SLEEP); 504 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav)) 505 506 /* 507 * Fill in the UDAV data. We first zero out the UDAV, then populate 508 * it by then calling hermon_set_addr_path() to fill in the common 509 * portions that can be pulled from the "ibt_adds_vect_t" passed in 510 */ 511 status = hermon_set_addr_path(state, attr_p, 512 (hermon_hw_addr_path_t *)udav, HERMON_ADDRPATH_UDAV); 513 if (status != DDI_SUCCESS) { 514 hermon_pd_refcnt_dec(pd); 515 hermon_rsrc_free(state, &rsrc); 516 return (status); 517 } 518 udav->pd = pd->pd_pdnum; 519 udav->sl = attr_p->av_srvl; 520 521 /* 522 * Fill in the rest of the Hermon Address Handle struct. 523 * 524 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field 525 * here because we may need to return it later to the IBTF (as a 526 * result of a subsequent query operation). Unlike the other UDAV 527 * parameters, the value of "av_dgid.gid_guid" is not always preserved. 528 * The reason for this is described in hermon_set_addr_path(). 529 */ 530 ah->ah_rsrcp = rsrc; 531 ah->ah_pdhdl = pd; 532 ah->ah_udav = udav; 533 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 534 *ahhdl = ah; 535 536 return (DDI_SUCCESS); 537 } 538 539 540 /* 541 * hermon_ah_free() 542 * Context: Can be called only from user or kernel context. 543 */ 544 /* ARGSUSED */ 545 int 546 hermon_ah_free(hermon_state_t *state, hermon_ahhdl_t *ahhdl, uint_t sleepflag) 547 { 548 hermon_rsrc_t *rsrc; 549 hermon_pdhdl_t pd; 550 hermon_ahhdl_t ah; 551 552 /* 553 * Pull all the necessary information from the Hermon Address Handle 554 * struct. This is necessary here because the resource for the 555 * AH is going to be freed up as part of this operation. 556 */ 557 ah = *ahhdl; 558 mutex_enter(&ah->ah_lock); 559 rsrc = ah->ah_rsrcp; 560 pd = ah->ah_pdhdl; 561 mutex_exit(&ah->ah_lock); 562 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 563 564 /* Free the UDAV memory */ 565 kmem_free(ah->ah_udav, sizeof (hermon_hw_udav_t)); 566 567 /* Decrement the reference count on the protection domain (PD) */ 568 hermon_pd_refcnt_dec(pd); 569 570 /* Free the Hermon Address Handle structure */ 571 hermon_rsrc_free(state, &rsrc); 572 573 /* Set the ahhdl pointer to NULL and return success */ 574 *ahhdl = NULL; 575 576 return (DDI_SUCCESS); 577 } 578 579 580 /* 581 * hermon_ah_query() 582 * Context: Can be called from interrupt or base context. 583 */ 584 /* ARGSUSED */ 585 int 586 hermon_ah_query(hermon_state_t *state, hermon_ahhdl_t ah, hermon_pdhdl_t *pd, 587 ibt_adds_vect_t *attr_p) 588 { 589 mutex_enter(&ah->ah_lock); 590 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p)) 591 592 /* 593 * Pull the PD and UDAV from the Hermon Address Handle structure 594 */ 595 *pd = ah->ah_pdhdl; 596 597 /* 598 * Fill in "ibt_adds_vect_t". We call hermon_get_addr_path() to fill 599 * the common portions that can be pulled from the UDAV we pass in. 600 * 601 * NOTE: We will also fill the "av_dgid.gid_guid" field from the 602 * "ah_save_guid" field we have previously saved away. The reason 603 * for this is described in hermon_ah_alloc() and hermon_ah_modify(). 604 */ 605 hermon_get_addr_path(state, (hermon_hw_addr_path_t *)ah->ah_udav, 606 attr_p, HERMON_ADDRPATH_UDAV); 607 608 attr_p->av_dgid.gid_guid = ah->ah_save_guid; 609 610 mutex_exit(&ah->ah_lock); 611 return (DDI_SUCCESS); 612 } 613 614 615 /* 616 * hermon_ah_modify() 617 * Context: Can be called from interrupt or base context. 618 */ 619 /* ARGSUSED */ 620 int 621 hermon_ah_modify(hermon_state_t *state, hermon_ahhdl_t ah, 622 ibt_adds_vect_t *attr_p) 623 { 624 hermon_hw_udav_t old_udav; 625 uint64_t data_old; 626 int status, size, i; 627 628 /* Validate that specified port number is legal */ 629 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) { 630 return (IBT_HCA_PORT_INVALID); 631 } 632 633 mutex_enter(&ah->ah_lock); 634 635 /* Save a copy of the current UDAV data in old_udav. */ 636 bcopy(ah->ah_udav, &old_udav, sizeof (hermon_hw_udav_t)); 637 638 /* 639 * Fill in the new UDAV with the caller's data, passed in via the 640 * "ibt_adds_vect_t" structure. 641 * 642 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid" 643 * field here (just as we did during hermon_ah_alloc()) because we 644 * may need to return it later to the IBTF (as a result of a 645 * subsequent query operation). As explained in hermon_ah_alloc(), 646 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid" 647 * is not always preserved. The reason for this is described in 648 * hermon_set_addr_path(). 649 */ 650 status = hermon_set_addr_path(state, attr_p, 651 (hermon_hw_addr_path_t *)ah->ah_udav, HERMON_ADDRPATH_UDAV); 652 if (status != DDI_SUCCESS) { 653 mutex_exit(&ah->ah_lock); 654 return (status); 655 } 656 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 657 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(ah->ah_udav))) 658 ah->ah_udav->sl = attr_p->av_srvl; 659 660 /* 661 * Copy changes into the new UDAV. 662 * Note: We copy in 64-bit chunks. For the first two of these 663 * chunks it is necessary to read the current contents of the 664 * UDAV, mask off the modifiable portions (maintaining any 665 * of the "reserved" portions), and then mask on the new data. 666 */ 667 size = sizeof (hermon_hw_udav_t) >> 3; 668 for (i = 0; i < size; i++) { 669 data_old = ((uint64_t *)&old_udav)[i]; 670 671 /* 672 * Apply mask to change only the relevant values. 673 */ 674 if (i == 0) { 675 data_old = data_old & HERMON_UDAV_MODIFY_MASK0; 676 } else if (i == 1) { 677 data_old = data_old & HERMON_UDAV_MODIFY_MASK1; 678 } else { 679 data_old = 0; 680 } 681 682 /* Store the updated values to the UDAV */ 683 ((uint64_t *)ah->ah_udav)[i] |= data_old; 684 } 685 686 /* 687 * Put the valid PD number back into the UDAV entry, as it 688 * might have been clobbered above. 689 */ 690 ah->ah_udav->pd = old_udav.pd; 691 692 693 mutex_exit(&ah->ah_lock); 694 return (DDI_SUCCESS); 695 } 696 697 /* 698 * hermon_mcg_attach() 699 * Context: Can be called only from user or kernel context. 700 */ 701 int 702 hermon_mcg_attach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid, 703 ib_lid_t lid) 704 { 705 hermon_rsrc_t *rsrc; 706 hermon_hw_mcg_t *mcg_entry; 707 hermon_hw_mcg_qp_list_t *mcg_entry_qplist; 708 hermon_mcghdl_t mcg, newmcg; 709 uint64_t mgid_hash; 710 uint32_t end_indx; 711 int status; 712 uint_t qp_found; 713 714 /* 715 * It is only allowed to attach MCG to UD queue pairs. Verify 716 * that the intended QP is of the appropriate transport type 717 */ 718 if (qp->qp_serv_type != HERMON_QP_UD) { 719 return (IBT_QP_SRV_TYPE_INVALID); 720 } 721 722 /* 723 * Check for invalid Multicast DLID. Specifically, all Multicast 724 * LIDs should be within a well defined range. If the specified LID 725 * is outside of that range, then return an error. 726 */ 727 if (hermon_mlid_is_valid(lid) == 0) { 728 return (IBT_MC_MLID_INVALID); 729 } 730 /* 731 * Check for invalid Multicast GID. All Multicast GIDs should have 732 * a well-defined pattern of bits and flags that are allowable. If 733 * the specified GID does not meet the criteria, then return an error. 734 */ 735 if (hermon_mgid_is_valid(gid) == 0) { 736 return (IBT_MC_MGID_INVALID); 737 } 738 739 /* 740 * Compute the MGID hash value. Since the MCG table is arranged as 741 * a number of separate hash chains, this operation converts the 742 * specified MGID into the starting index of an entry in the hash 743 * table (i.e. the index for the start of the appropriate hash chain). 744 * Subsequent operations below will walk the chain searching for the 745 * right place to add this new QP. 746 */ 747 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 748 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT()); 749 if (status != HERMON_CMD_SUCCESS) { 750 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n", 751 status); 752 if (status == HERMON_CMD_INVALID_STATUS) { 753 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 754 } 755 return (ibc_get_ci_failure(0)); 756 } 757 758 /* 759 * Grab the multicast group mutex. Then grab the pre-allocated 760 * temporary buffer used for holding and/or modifying MCG entries. 761 * Zero out the temporary MCG entry before we begin. 762 */ 763 mutex_enter(&state->hs_mcglock); 764 mcg_entry = state->hs_mcgtmp; 765 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry); 766 bzero(mcg_entry, HERMON_MCGMEM_SZ(state)); 767 768 /* 769 * Walk through the array of MCG entries starting at "mgid_hash". 770 * Try to find the appropriate place for this new QP to be added. 771 * This could happen when the first entry of the chain has MGID == 0 772 * (which means that the hash chain is empty), or because we find 773 * an entry with the same MGID (in which case we'll add the QP to 774 * that MCG), or because we come to the end of the chain (in which 775 * case this is the first QP being added to the multicast group that 776 * corresponds to the MGID. The hermon_mcg_walk_mgid_hash() routine 777 * walks the list and returns an index into the MCG table. The entry 778 * at this index is then checked to determine which case we have 779 * fallen into (see below). Note: We are using the "shadow" MCG 780 * list (of hermon_mcg_t structs) for this lookup because the real 781 * MCG entries are in hardware (and the lookup process would be much 782 * more time consuming). 783 */ 784 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL); 785 mcg = &state->hs_mcghdl[end_indx]; 786 787 /* 788 * If MGID == 0, then the hash chain is empty. Just fill in the 789 * current entry. Note: No need to allocate an MCG table entry 790 * as all the hash chain "heads" are already preallocated. 791 */ 792 if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) { 793 794 /* Fill in the current entry in the "shadow" MCG list */ 795 hermon_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL); 796 797 /* 798 * Try to add the new QP number to the list. This (and the 799 * above) routine fills in a temporary MCG. The "mcg_entry" 800 * and "mcg_entry_qplist" pointers simply point to different 801 * offsets within the same temporary copy of the MCG (for 802 * convenience). Note: If this fails, we need to invalidate 803 * the entries we've already put into the "shadow" list entry 804 * above. 805 */ 806 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 807 &qp_found); 808 if (status != DDI_SUCCESS) { 809 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s)); 810 mutex_exit(&state->hs_mcglock); 811 return (status); 812 } 813 if (!qp_found) 814 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1); 815 /* set the member count */ 816 817 /* 818 * Once the temporary MCG has been filled in, write the entry 819 * into the appropriate location in the Hermon MCG entry table. 820 * If it's successful, then drop the lock and return success. 821 * Note: In general, this operation shouldn't fail. If it 822 * does, then it is an indication that something (probably in 823 * HW, but maybe in SW) has gone seriously wrong. We still 824 * want to zero out the entries that we've filled in above 825 * (in the hermon_mcg_setup_new_hdr() routine). 826 */ 827 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 828 HERMON_CMD_NOSLEEP_SPIN); 829 if (status != HERMON_CMD_SUCCESS) { 830 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s)); 831 mutex_exit(&state->hs_mcglock); 832 HERMON_WARNING(state, "failed to write MCG entry"); 833 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 834 "%08x\n", status); 835 if (status == HERMON_CMD_INVALID_STATUS) { 836 hermon_fm_ereport(state, HCA_SYS_ERR, 837 HCA_ERR_SRV_LOST); 838 } 839 return (ibc_get_ci_failure(0)); 840 } 841 842 /* 843 * Now that we know all the Hermon firmware accesses have been 844 * successful, we update the "shadow" MCG entry by incrementing 845 * the "number of attached QPs" count. 846 * 847 * We increment only if the QP is not already part of the 848 * MCG by checking the 'qp_found' flag returned from the 849 * qplist_add above. 850 */ 851 if (!qp_found) { 852 mcg->mcg_num_qps++; 853 854 /* 855 * Increment the refcnt for this QP. Because the QP 856 * was added to this MCG, the refcnt must be 857 * incremented. 858 */ 859 hermon_qp_mcg_refcnt_inc(qp); 860 } 861 862 /* 863 * We drop the lock and return success. 864 */ 865 mutex_exit(&state->hs_mcglock); 866 return (DDI_SUCCESS); 867 } 868 869 /* 870 * If the specified MGID matches the MGID in the current entry, then 871 * we need to try to add the QP to the current MCG entry. In this 872 * case, it means that we need to read the existing MCG entry (into 873 * the temporary MCG), add the new QP number to the temporary entry 874 * (using the same method we used above), and write the entry back 875 * to the hardware (same as above). 876 */ 877 if ((mcg->mcg_mgid_h == gid.gid_prefix) && 878 (mcg->mcg_mgid_l == gid.gid_guid)) { 879 880 /* 881 * Read the current MCG entry into the temporary MCG. Note: 882 * In general, this operation shouldn't fail. If it does, 883 * then it is an indication that something (probably in HW, 884 * but maybe in SW) has gone seriously wrong. 885 */ 886 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 887 HERMON_CMD_NOSLEEP_SPIN); 888 if (status != HERMON_CMD_SUCCESS) { 889 mutex_exit(&state->hs_mcglock); 890 HERMON_WARNING(state, "failed to read MCG entry"); 891 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: " 892 "%08x\n", status); 893 if (status == HERMON_CMD_INVALID_STATUS) { 894 hermon_fm_ereport(state, HCA_SYS_ERR, 895 HCA_ERR_SRV_LOST); 896 } 897 return (ibc_get_ci_failure(0)); 898 } 899 900 /* 901 * Try to add the new QP number to the list. This routine 902 * fills in the necessary pieces of the temporary MCG. The 903 * "mcg_entry_qplist" pointer is used to point to the portion 904 * of the temporary MCG that holds the QP numbers. 905 * 906 * Note: hermon_mcg_qplist_add() returns SUCCESS if it 907 * already found the QP in the list. In this case, the QP is 908 * not added on to the list again. Check the flag 'qp_found' 909 * if this value is needed to be known. 910 * 911 */ 912 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 913 &qp_found); 914 if (status != DDI_SUCCESS) { 915 mutex_exit(&state->hs_mcglock); 916 return (status); 917 } 918 if (!qp_found) 919 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1); 920 /* set the member count */ 921 922 /* 923 * Once the temporary MCG has been updated, write the entry 924 * into the appropriate location in the Hermon MCG entry table. 925 * If it's successful, then drop the lock and return success. 926 * Note: In general, this operation shouldn't fail. If it 927 * does, then it is an indication that something (probably in 928 * HW, but maybe in SW) has gone seriously wrong. 929 */ 930 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 931 HERMON_CMD_NOSLEEP_SPIN); 932 if (status != HERMON_CMD_SUCCESS) { 933 mutex_exit(&state->hs_mcglock); 934 HERMON_WARNING(state, "failed to write MCG entry"); 935 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 936 "%08x\n", status); 937 if (status == HERMON_CMD_INVALID_STATUS) { 938 hermon_fm_ereport(state, HCA_SYS_ERR, 939 HCA_ERR_SRV_LOST); 940 } 941 return (ibc_get_ci_failure(0)); 942 } 943 944 /* 945 * Now that we know all the Hermon firmware accesses have been 946 * successful, we update the current "shadow" MCG entry by 947 * incrementing the "number of attached QPs" count. 948 * 949 * We increment only if the QP is not already part of the 950 * MCG by checking the 'qp_found' flag returned 951 * hermon_mcg_walk_mgid_hashfrom the qplist_add above. 952 */ 953 if (!qp_found) { 954 mcg->mcg_num_qps++; 955 956 /* 957 * Increment the refcnt for this QP. Because the QP 958 * was added to this MCG, the refcnt must be 959 * incremented. 960 */ 961 hermon_qp_mcg_refcnt_inc(qp); 962 } 963 964 /* 965 * We drop the lock and return success. 966 */ 967 mutex_exit(&state->hs_mcglock); 968 return (DDI_SUCCESS); 969 } 970 971 /* 972 * If we've reached here, then we're at the end of the hash chain. 973 * We need to allocate a new MCG entry, fill it in, write it to Hermon, 974 * and update the previous entry to link the new one to the end of the 975 * chain. 976 */ 977 978 /* 979 * Allocate an MCG table entry. This will be filled in with all 980 * the necessary parameters to define the multicast group. Then it 981 * will be written to the hardware in the next-to-last step below. 982 */ 983 status = hermon_rsrc_alloc(state, HERMON_MCG, 1, HERMON_NOSLEEP, &rsrc); 984 if (status != DDI_SUCCESS) { 985 mutex_exit(&state->hs_mcglock); 986 return (IBT_INSUFF_RESOURCE); 987 } 988 989 /* 990 * Fill in the new entry in the "shadow" MCG list. Note: Just as 991 * it does above, hermon_mcg_setup_new_hdr() also fills in a portion 992 * of the temporary MCG entry (the rest of which will be filled in by 993 * hermon_mcg_qplist_add() below) 994 */ 995 newmcg = &state->hs_mcghdl[rsrc->hr_indx]; 996 hermon_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc); 997 998 /* 999 * Try to add the new QP number to the list. This routine fills in 1000 * the final necessary pieces of the temporary MCG. The 1001 * "mcg_entry_qplist" pointer is used to point to the portion of the 1002 * temporary MCG that holds the QP numbers. If we fail here, we 1003 * must undo the previous resource allocation. 1004 * 1005 * Note: hermon_mcg_qplist_add() can we return SUCCESS if it already 1006 * found the QP in the list. In this case, the QP is not added on to 1007 * the list again. Check the flag 'qp_found' if this value is needed 1008 * to be known. 1009 */ 1010 status = hermon_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp, 1011 &qp_found); 1012 if (status != DDI_SUCCESS) { 1013 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1014 hermon_rsrc_free(state, &rsrc); 1015 mutex_exit(&state->hs_mcglock); 1016 return (status); 1017 } 1018 mcg_entry->member_cnt = (newmcg->mcg_num_qps + 1); 1019 /* set the member count */ 1020 1021 /* 1022 * Once the temporary MCG has been updated, write the entry into the 1023 * appropriate location in the Hermon MCG entry table. If this is 1024 * successful, then we need to chain the previous entry to this one. 1025 * Note: In general, this operation shouldn't fail. If it does, then 1026 * it is an indication that something (probably in HW, but maybe in 1027 * SW) has gone seriously wrong. 1028 */ 1029 status = hermon_write_mgm_cmd_post(state, mcg_entry, rsrc->hr_indx, 1030 HERMON_CMD_NOSLEEP_SPIN); 1031 if (status != HERMON_CMD_SUCCESS) { 1032 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1033 hermon_rsrc_free(state, &rsrc); 1034 mutex_exit(&state->hs_mcglock); 1035 HERMON_WARNING(state, "failed to write MCG entry"); 1036 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1037 status); 1038 if (status == HERMON_CMD_INVALID_STATUS) { 1039 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1040 } 1041 return (ibc_get_ci_failure(0)); 1042 } 1043 1044 /* 1045 * Now read the current MCG entry (the one previously at the end of 1046 * hash chain) into the temporary MCG. We are going to update its 1047 * "next_gid_indx" now and write the entry back to the MCG table. 1048 * Note: In general, this operation shouldn't fail. If it does, then 1049 * it is an indication that something (probably in HW, but maybe in SW) 1050 * has gone seriously wrong. We will free up the MCG entry resource, 1051 * but we will not undo the previously written MCG entry in the HW. 1052 * This is OK, though, because the MCG entry is not currently attached 1053 * to any hash chain. 1054 */ 1055 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 1056 HERMON_CMD_NOSLEEP_SPIN); 1057 if (status != HERMON_CMD_SUCCESS) { 1058 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1059 hermon_rsrc_free(state, &rsrc); 1060 mutex_exit(&state->hs_mcglock); 1061 HERMON_WARNING(state, "failed to read MCG entry"); 1062 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1063 status); 1064 if (status == HERMON_CMD_INVALID_STATUS) { 1065 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1066 } 1067 return (ibc_get_ci_failure(0)); 1068 } 1069 1070 /* 1071 * Finally, we update the "next_gid_indx" field in the temporary MCG 1072 * and attempt to write the entry back into the Hermon MCG table. If 1073 * this succeeds, then we update the "shadow" list to reflect the 1074 * change, drop the lock, and return success. Note: In general, this 1075 * operation shouldn't fail. If it does, then it is an indication 1076 * that something (probably in HW, but maybe in SW) has gone seriously 1077 * wrong. Just as we do above, we will free up the MCG entry resource, 1078 * but we will not try to undo the previously written MCG entry. This 1079 * is OK, though, because (since we failed here to update the end of 1080 * the chain) that other entry is not currently attached to any chain. 1081 */ 1082 mcg_entry->next_gid_indx = rsrc->hr_indx; 1083 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 1084 HERMON_CMD_NOSLEEP_SPIN); 1085 if (status != HERMON_CMD_SUCCESS) { 1086 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1087 hermon_rsrc_free(state, &rsrc); 1088 mutex_exit(&state->hs_mcglock); 1089 HERMON_WARNING(state, "failed to write MCG entry"); 1090 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1091 status); 1092 if (status == HERMON_CMD_INVALID_STATUS) { 1093 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1094 } 1095 return (ibc_get_ci_failure(0)); 1096 } 1097 mcg = &state->hs_mcghdl[end_indx]; 1098 mcg->mcg_next_indx = rsrc->hr_indx; 1099 1100 /* 1101 * Now that we know all the Hermon firmware accesses have been 1102 * successful, we update the new "shadow" MCG entry by incrementing 1103 * the "number of attached QPs" count. Then we drop the lock and 1104 * return success. 1105 */ 1106 newmcg->mcg_num_qps++; 1107 1108 /* 1109 * Increment the refcnt for this QP. Because the QP 1110 * was added to this MCG, the refcnt must be 1111 * incremented. 1112 */ 1113 hermon_qp_mcg_refcnt_inc(qp); 1114 1115 mutex_exit(&state->hs_mcglock); 1116 return (DDI_SUCCESS); 1117 } 1118 1119 1120 /* 1121 * hermon_mcg_detach() 1122 * Context: Can be called only from user or kernel context. 1123 */ 1124 int 1125 hermon_mcg_detach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid, 1126 ib_lid_t lid) 1127 { 1128 hermon_hw_mcg_t *mcg_entry; 1129 hermon_hw_mcg_qp_list_t *mcg_entry_qplist; 1130 hermon_mcghdl_t mcg; 1131 uint64_t mgid_hash; 1132 uint32_t end_indx, prev_indx; 1133 int status; 1134 1135 /* 1136 * Check for invalid Multicast DLID. Specifically, all Multicast 1137 * LIDs should be within a well defined range. If the specified LID 1138 * is outside of that range, then return an error. 1139 */ 1140 if (hermon_mlid_is_valid(lid) == 0) { 1141 return (IBT_MC_MLID_INVALID); 1142 } 1143 1144 /* 1145 * Compute the MGID hash value. As described above, the MCG table is 1146 * arranged as a number of separate hash chains. This operation 1147 * converts the specified MGID into the starting index of an entry in 1148 * the hash table (i.e. the index for the start of the appropriate 1149 * hash chain). Subsequent operations below will walk the chain 1150 * searching for a matching entry from which to attempt to remove 1151 * the specified QP. 1152 */ 1153 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 1154 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT()); 1155 if (status != HERMON_CMD_SUCCESS) { 1156 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n", 1157 status); 1158 if (status == HERMON_CMD_INVALID_STATUS) { 1159 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1160 } 1161 return (ibc_get_ci_failure(0)); 1162 } 1163 1164 /* 1165 * Grab the multicast group mutex. Then grab the pre-allocated 1166 * temporary buffer used for holding and/or modifying MCG entries. 1167 */ 1168 mutex_enter(&state->hs_mcglock); 1169 mcg_entry = state->hs_mcgtmp; 1170 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry); 1171 1172 /* 1173 * Walk through the array of MCG entries starting at "mgid_hash". 1174 * Try to find an MCG entry with a matching MGID. The 1175 * hermon_mcg_walk_mgid_hash() routine walks the list and returns an 1176 * index into the MCG table. The entry at this index is checked to 1177 * determine whether it is a match or not. If it is a match, then 1178 * we continue on to attempt to remove the QP from the MCG. If it 1179 * is not a match (or not a valid MCG entry), then we return an error. 1180 */ 1181 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx); 1182 mcg = &state->hs_mcghdl[end_indx]; 1183 1184 /* 1185 * If MGID == 0 (the hash chain is empty) or if the specified MGID 1186 * does not match the MGID in the current entry, then return 1187 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not 1188 * valid). 1189 */ 1190 if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) || 1191 ((mcg->mcg_mgid_h != gid.gid_prefix) || 1192 (mcg->mcg_mgid_l != gid.gid_guid))) { 1193 mutex_exit(&state->hs_mcglock); 1194 return (IBT_MC_MGID_INVALID); 1195 } 1196 1197 /* 1198 * Read the current MCG entry into the temporary MCG. Note: In 1199 * general, this operation shouldn't fail. If it does, then it is 1200 * an indication that something (probably in HW, but maybe in SW) 1201 * has gone seriously wrong. 1202 */ 1203 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 1204 HERMON_CMD_NOSLEEP_SPIN); 1205 if (status != HERMON_CMD_SUCCESS) { 1206 mutex_exit(&state->hs_mcglock); 1207 HERMON_WARNING(state, "failed to read MCG entry"); 1208 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1209 status); 1210 if (status == HERMON_CMD_INVALID_STATUS) { 1211 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1212 } 1213 return (ibc_get_ci_failure(0)); 1214 } 1215 1216 /* 1217 * Search the QP number list for a match. If a match is found, then 1218 * remove the entry from the QP list. Otherwise, if no match is found, 1219 * return an error. 1220 */ 1221 status = hermon_mcg_qplist_remove(mcg, mcg_entry_qplist, qp); 1222 if (status != DDI_SUCCESS) { 1223 mutex_exit(&state->hs_mcglock); 1224 return (status); 1225 } 1226 1227 /* 1228 * Decrement the MCG count for this QP. When the 'qp_mcg' 1229 * field becomes 0, then this QP is no longer a member of any 1230 * MCG. 1231 */ 1232 hermon_qp_mcg_refcnt_dec(qp); 1233 1234 /* 1235 * If the current MCG's QP number list is about to be made empty 1236 * ("mcg_num_qps" == 1), then remove the entry itself from the hash 1237 * chain. Otherwise, just write the updated MCG entry back to the 1238 * hardware. In either case, once we successfully update the hardware 1239 * chain, then we decrement the "shadow" list entry's "mcg_num_qps" 1240 * count (or zero out the entire "shadow" list entry) before returning 1241 * success. Note: Zeroing out the "shadow" list entry is done 1242 * inside of hermon_mcg_hash_list_remove(). 1243 */ 1244 if (mcg->mcg_num_qps == 1) { 1245 1246 /* Remove an MCG entry from the hash chain */ 1247 status = hermon_mcg_hash_list_remove(state, end_indx, prev_indx, 1248 mcg_entry); 1249 if (status != DDI_SUCCESS) { 1250 mutex_exit(&state->hs_mcglock); 1251 return (status); 1252 } 1253 1254 } else { 1255 /* 1256 * Write the updated MCG entry back to the Hermon MCG table. 1257 * If this succeeds, then we update the "shadow" list to 1258 * reflect the change (i.e. decrement the "mcg_num_qps"), 1259 * drop the lock, and return success. Note: In general, 1260 * this operation shouldn't fail. If it does, then it is an 1261 * indication that something (probably in HW, but maybe in SW) 1262 * has gone seriously wrong. 1263 */ 1264 mcg_entry->member_cnt = (mcg->mcg_num_qps - 1); 1265 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 1266 HERMON_CMD_NOSLEEP_SPIN); 1267 if (status != HERMON_CMD_SUCCESS) { 1268 mutex_exit(&state->hs_mcglock); 1269 HERMON_WARNING(state, "failed to write MCG entry"); 1270 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 1271 "%08x\n", status); 1272 if (status == HERMON_CMD_INVALID_STATUS) { 1273 hermon_fm_ereport(state, HCA_SYS_ERR, 1274 HCA_ERR_SRV_LOST); 1275 } 1276 return (ibc_get_ci_failure(0)); 1277 } 1278 mcg->mcg_num_qps--; 1279 } 1280 1281 mutex_exit(&state->hs_mcglock); 1282 return (DDI_SUCCESS); 1283 } 1284 1285 /* 1286 * hermon_qp_mcg_refcnt_inc() 1287 * Context: Can be called from interrupt or base context. 1288 */ 1289 static void 1290 hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp) 1291 { 1292 /* Increment the QP's MCG reference count */ 1293 mutex_enter(&qp->qp_lock); 1294 qp->qp_mcg_refcnt++; 1295 mutex_exit(&qp->qp_lock); 1296 } 1297 1298 1299 /* 1300 * hermon_qp_mcg_refcnt_dec() 1301 * Context: Can be called from interrupt or base context. 1302 */ 1303 static void 1304 hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp) 1305 { 1306 /* Decrement the QP's MCG reference count */ 1307 mutex_enter(&qp->qp_lock); 1308 qp->qp_mcg_refcnt--; 1309 mutex_exit(&qp->qp_lock); 1310 } 1311 1312 1313 /* 1314 * hermon_mcg_qplist_add() 1315 * Context: Can be called from interrupt or base context. 1316 */ 1317 static int 1318 hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg, 1319 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, 1320 uint_t *qp_found) 1321 { 1322 uint_t qplist_indx; 1323 1324 ASSERT(MUTEX_HELD(&state->hs_mcglock)); 1325 1326 qplist_indx = mcg->mcg_num_qps; 1327 1328 /* 1329 * Determine if we have exceeded the maximum number of QP per 1330 * multicast group. If we have, then return an error 1331 */ 1332 if (qplist_indx >= state->hs_cfg_profile->cp_num_qp_per_mcg) { 1333 return (IBT_HCA_MCG_QP_EXCEEDED); 1334 } 1335 1336 /* 1337 * Determine if the QP is already attached to this MCG table. If it 1338 * is, then we break out and treat this operation as a NO-OP 1339 */ 1340 for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps; 1341 qplist_indx++) { 1342 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) { 1343 break; 1344 } 1345 } 1346 1347 /* 1348 * If the QP was already on the list, set 'qp_found' to TRUE. We still 1349 * return SUCCESS in this case, but the qplist will not have been 1350 * updated because the QP was already on the list. 1351 */ 1352 if (qplist_indx < mcg->mcg_num_qps) { 1353 *qp_found = 1; 1354 } else { 1355 /* 1356 * Otherwise, append the new QP number to the end of the 1357 * current QP list. Note: We will increment the "mcg_num_qps" 1358 * field on the "shadow" MCG list entry later (after we know 1359 * that all necessary Hermon firmware accesses have been 1360 * successful). 1361 * 1362 * Set 'qp_found' to 0 so we know the QP was added on to the 1363 * list for sure. 1364 */ 1365 mcg_qplist[qplist_indx].qpn = 1366 (qp->qp_qpnum | HERMON_MCG_QPN_BLOCK_LB); 1367 *qp_found = 0; 1368 } 1369 1370 return (DDI_SUCCESS); 1371 } 1372 1373 1374 1375 /* 1376 * hermon_mcg_qplist_remove() 1377 * Context: Can be called from interrupt or base context. 1378 */ 1379 static int 1380 hermon_mcg_qplist_remove(hermon_mcghdl_t mcg, 1381 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp) 1382 { 1383 uint_t i, qplist_indx; 1384 1385 /* 1386 * Search the MCG QP list for a matching QPN. When 1387 * it's found, we swap the last entry with the current 1388 * one, set the last entry to zero, decrement the last 1389 * entry, and return. If it's not found, then it's 1390 * and error. 1391 */ 1392 qplist_indx = mcg->mcg_num_qps; 1393 for (i = 0; i < qplist_indx; i++) { 1394 if (mcg_qplist[i].qpn == qp->qp_qpnum) { 1395 mcg_qplist[i] = mcg_qplist[qplist_indx - 1]; 1396 mcg_qplist[qplist_indx - 1].qpn = 0; 1397 1398 return (DDI_SUCCESS); 1399 } 1400 } 1401 1402 return (IBT_QP_HDL_INVALID); 1403 } 1404 1405 1406 /* 1407 * hermon_mcg_walk_mgid_hash() 1408 * Context: Can be called from interrupt or base context. 1409 */ 1410 static uint_t 1411 hermon_mcg_walk_mgid_hash(hermon_state_t *state, uint64_t start_indx, 1412 ib_gid_t mgid, uint_t *p_indx) 1413 { 1414 hermon_mcghdl_t curr_mcghdl; 1415 uint_t curr_indx, prev_indx; 1416 1417 ASSERT(MUTEX_HELD(&state->hs_mcglock)); 1418 1419 /* Start at the head of the hash chain */ 1420 curr_indx = (uint_t)start_indx; 1421 prev_indx = curr_indx; 1422 curr_mcghdl = &state->hs_mcghdl[curr_indx]; 1423 1424 /* If the first entry in the chain has MGID == 0, then stop */ 1425 if ((curr_mcghdl->mcg_mgid_h == 0) && 1426 (curr_mcghdl->mcg_mgid_l == 0)) { 1427 goto end_mgid_hash_walk; 1428 } 1429 1430 /* If the first entry in the chain matches the MGID, then stop */ 1431 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1432 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1433 goto end_mgid_hash_walk; 1434 } 1435 1436 /* Otherwise, walk the hash chain looking for a match */ 1437 while (curr_mcghdl->mcg_next_indx != 0) { 1438 prev_indx = curr_indx; 1439 curr_indx = curr_mcghdl->mcg_next_indx; 1440 curr_mcghdl = &state->hs_mcghdl[curr_indx]; 1441 1442 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1443 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1444 break; 1445 } 1446 } 1447 1448 end_mgid_hash_walk: 1449 /* 1450 * If necessary, return the index of the previous entry too. This 1451 * is primarily used for detaching a QP from a multicast group. It 1452 * may be necessary, in that case, to delete an MCG entry from the 1453 * hash chain and having the index of the previous entry is helpful. 1454 */ 1455 if (p_indx != NULL) { 1456 *p_indx = prev_indx; 1457 } 1458 return (curr_indx); 1459 } 1460 1461 1462 /* 1463 * hermon_mcg_setup_new_hdr() 1464 * Context: Can be called from interrupt or base context. 1465 */ 1466 static void 1467 hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, hermon_hw_mcg_t *mcg_hdr, 1468 ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc) 1469 { 1470 /* 1471 * Fill in the fields of the "shadow" entry used by software 1472 * to track MCG hardware entry 1473 */ 1474 mcg->mcg_mgid_h = mgid.gid_prefix; 1475 mcg->mcg_mgid_l = mgid.gid_guid; 1476 mcg->mcg_rsrcp = mcg_rsrc; 1477 mcg->mcg_next_indx = 0; 1478 mcg->mcg_num_qps = 0; 1479 1480 /* 1481 * Fill the header fields of the MCG entry (in the temporary copy) 1482 */ 1483 mcg_hdr->mgid_h = mgid.gid_prefix; 1484 mcg_hdr->mgid_l = mgid.gid_guid; 1485 mcg_hdr->next_gid_indx = 0; 1486 } 1487 1488 1489 /* 1490 * hermon_mcg_hash_list_remove() 1491 * Context: Can be called only from user or kernel context. 1492 */ 1493 static int 1494 hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx, 1495 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry) 1496 { 1497 hermon_mcghdl_t curr_mcg, prev_mcg, next_mcg; 1498 uint_t next_indx; 1499 int status; 1500 1501 /* Get the pointer to "shadow" list for current entry */ 1502 curr_mcg = &state->hs_mcghdl[curr_indx]; 1503 1504 /* 1505 * If this is the first entry on a hash chain, then attempt to replace 1506 * the entry with the next entry on the chain. If there are no 1507 * subsequent entries on the chain, then this is the only entry and 1508 * should be invalidated. 1509 */ 1510 if (curr_indx == prev_indx) { 1511 1512 /* 1513 * If this is the only entry on the chain, then invalidate it. 1514 * Note: Invalidating an MCG entry means writing all zeros 1515 * to the entry. This is only necessary for those MCG 1516 * entries that are the "head" entries of the individual hash 1517 * chains. Regardless of whether this operation returns 1518 * success or failure, return that result to the caller. 1519 */ 1520 next_indx = curr_mcg->mcg_next_indx; 1521 if (next_indx == 0) { 1522 status = hermon_mcg_entry_invalidate(state, mcg_entry, 1523 curr_indx); 1524 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1525 return (status); 1526 } 1527 1528 /* 1529 * Otherwise, this is just the first entry on the chain, so 1530 * grab the next one 1531 */ 1532 next_mcg = &state->hs_mcghdl[next_indx]; 1533 1534 /* 1535 * Read the next MCG entry into the temporary MCG. Note: 1536 * In general, this operation shouldn't fail. If it does, 1537 * then it is an indication that something (probably in HW, 1538 * but maybe in SW) has gone seriously wrong. 1539 */ 1540 status = hermon_read_mgm_cmd_post(state, mcg_entry, next_indx, 1541 HERMON_CMD_NOSLEEP_SPIN); 1542 if (status != HERMON_CMD_SUCCESS) { 1543 HERMON_WARNING(state, "failed to read MCG entry"); 1544 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: " 1545 "%08x\n", status); 1546 if (status == HERMON_CMD_INVALID_STATUS) { 1547 hermon_fm_ereport(state, HCA_SYS_ERR, 1548 HCA_ERR_SRV_LOST); 1549 } 1550 return (ibc_get_ci_failure(0)); 1551 } 1552 1553 /* 1554 * Copy/Write the temporary MCG back to the hardware MCG list 1555 * using the current index. This essentially removes the 1556 * current MCG entry from the list by writing over it with 1557 * the next one. If this is successful, then we can do the 1558 * same operation for the "shadow" list. And we can also 1559 * free up the Hermon MCG entry resource that was associated 1560 * with the (old) next entry. Note: In general, this 1561 * operation shouldn't fail. If it does, then it is an 1562 * indication that something (probably in HW, but maybe in SW) 1563 * has gone seriously wrong. 1564 */ 1565 status = hermon_write_mgm_cmd_post(state, mcg_entry, curr_indx, 1566 HERMON_CMD_NOSLEEP_SPIN); 1567 if (status != HERMON_CMD_SUCCESS) { 1568 HERMON_WARNING(state, "failed to write MCG entry"); 1569 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 1570 "%08x\n", status); 1571 if (status == HERMON_CMD_INVALID_STATUS) { 1572 hermon_fm_ereport(state, HCA_SYS_ERR, 1573 HCA_ERR_SRV_LOST); 1574 } 1575 return (ibc_get_ci_failure(0)); 1576 } 1577 1578 /* 1579 * Copy all the software tracking information from the next 1580 * entry on the "shadow" MCG list into the current entry on 1581 * the list. Then invalidate (zero out) the other "shadow" 1582 * list entry. 1583 */ 1584 bcopy(next_mcg, curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1585 bzero(next_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1586 1587 /* 1588 * Free up the Hermon MCG entry resource used by the "next" 1589 * MCG entry. That resource is no longer needed by any 1590 * MCG entry which is first on a hash chain (like the "next" 1591 * entry has just become). 1592 */ 1593 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1594 1595 return (DDI_SUCCESS); 1596 } 1597 1598 /* 1599 * Else if this is the last entry on the hash chain (or a middle 1600 * entry, then we update the previous entry's "next_gid_index" field 1601 * to make it point instead to the next entry on the chain. By 1602 * skipping over the removed entry in this way, we can then free up 1603 * any resources associated with the current entry. Note: We don't 1604 * need to invalidate the "skipped over" hardware entry because it 1605 * will no be longer connected to any hash chains, and if/when it is 1606 * finally re-used, it will be written with entirely new values. 1607 */ 1608 1609 /* 1610 * Read the next MCG entry into the temporary MCG. Note: In general, 1611 * this operation shouldn't fail. If it does, then it is an 1612 * indication that something (probably in HW, but maybe in SW) has 1613 * gone seriously wrong. 1614 */ 1615 status = hermon_read_mgm_cmd_post(state, mcg_entry, prev_indx, 1616 HERMON_CMD_NOSLEEP_SPIN); 1617 if (status != HERMON_CMD_SUCCESS) { 1618 HERMON_WARNING(state, "failed to read MCG entry"); 1619 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1620 status); 1621 if (status == HERMON_CMD_INVALID_STATUS) { 1622 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1623 } 1624 return (ibc_get_ci_failure(0)); 1625 } 1626 1627 /* 1628 * Finally, we update the "next_gid_indx" field in the temporary MCG 1629 * and attempt to write the entry back into the Hermon MCG table. If 1630 * this succeeds, then we update the "shadow" list to reflect the 1631 * change, free up the Hermon MCG entry resource that was associated 1632 * with the current entry, and return success. Note: In general, 1633 * this operation shouldn't fail. If it does, then it is an indication 1634 * that something (probably in HW, but maybe in SW) has gone seriously 1635 * wrong. 1636 */ 1637 mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx; 1638 status = hermon_write_mgm_cmd_post(state, mcg_entry, prev_indx, 1639 HERMON_CMD_NOSLEEP_SPIN); 1640 if (status != HERMON_CMD_SUCCESS) { 1641 HERMON_WARNING(state, "failed to write MCG entry"); 1642 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1643 status); 1644 if (status == HERMON_CMD_INVALID_STATUS) { 1645 hermon_fm_ereport(state, HCA_SYS_ERR, 1646 HCA_ERR_SRV_LOST); 1647 } 1648 return (ibc_get_ci_failure(0)); 1649 } 1650 1651 /* 1652 * Get the pointer to the "shadow" MCG list entry for the previous 1653 * MCG. Update its "mcg_next_indx" to point to the next entry 1654 * the one after the current entry. Note: This next index may be 1655 * zero, indicating the end of the list. 1656 */ 1657 prev_mcg = &state->hs_mcghdl[prev_indx]; 1658 prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx; 1659 1660 /* 1661 * Free up the Hermon MCG entry resource used by the current entry. 1662 * This resource is no longer needed because the chain now skips over 1663 * the current entry. Then invalidate (zero out) the current "shadow" 1664 * list entry. 1665 */ 1666 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1667 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1668 1669 return (DDI_SUCCESS); 1670 } 1671 1672 1673 /* 1674 * hermon_mcg_entry_invalidate() 1675 * Context: Can be called only from user or kernel context. 1676 */ 1677 static int 1678 hermon_mcg_entry_invalidate(hermon_state_t *state, hermon_hw_mcg_t *mcg_entry, 1679 uint_t indx) 1680 { 1681 int status; 1682 1683 /* 1684 * Invalidate the hardware MCG entry by zeroing out this temporary 1685 * MCG and writing it the the hardware. Note: In general, this 1686 * operation shouldn't fail. If it does, then it is an indication 1687 * that something (probably in HW, but maybe in SW) has gone seriously 1688 * wrong. 1689 */ 1690 bzero(mcg_entry, HERMON_MCGMEM_SZ(state)); 1691 status = hermon_write_mgm_cmd_post(state, mcg_entry, indx, 1692 HERMON_CMD_NOSLEEP_SPIN); 1693 if (status != HERMON_CMD_SUCCESS) { 1694 HERMON_WARNING(state, "failed to write MCG entry"); 1695 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1696 status); 1697 if (status == HERMON_CMD_INVALID_STATUS) { 1698 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1699 } 1700 return (ibc_get_ci_failure(0)); 1701 } 1702 1703 return (DDI_SUCCESS); 1704 } 1705 1706 1707 /* 1708 * hermon_mgid_is_valid() 1709 * Context: Can be called from interrupt or base context. 1710 */ 1711 static int 1712 hermon_mgid_is_valid(ib_gid_t gid) 1713 { 1714 uint_t topbits, flags, scope; 1715 1716 /* 1717 * According to IBA 1.1 specification (section 4.1.1) a valid 1718 * "multicast GID" must have its top eight bits set to all ones 1719 */ 1720 topbits = (gid.gid_prefix >> HERMON_MCG_TOPBITS_SHIFT) & 1721 HERMON_MCG_TOPBITS_MASK; 1722 if (topbits != HERMON_MCG_TOPBITS) { 1723 return (0); 1724 } 1725 1726 /* 1727 * The next 4 bits are the "flag" bits. These are valid only 1728 * if they are "0" (which correspond to permanently assigned/ 1729 * "well-known" multicast GIDs) or "1" (for so-called "transient" 1730 * multicast GIDs). All other values are reserved. 1731 */ 1732 flags = (gid.gid_prefix >> HERMON_MCG_FLAGS_SHIFT) & 1733 HERMON_MCG_FLAGS_MASK; 1734 if (!((flags == HERMON_MCG_FLAGS_PERM) || 1735 (flags == HERMON_MCG_FLAGS_NONPERM))) { 1736 return (0); 1737 } 1738 1739 /* 1740 * The next 4 bits are the "scope" bits. These are valid only 1741 * if they are "2" (Link-local), "5" (Site-local), "8" 1742 * (Organization-local) or "E" (Global). All other values 1743 * are reserved (or currently unassigned). 1744 */ 1745 scope = (gid.gid_prefix >> HERMON_MCG_SCOPE_SHIFT) & 1746 HERMON_MCG_SCOPE_MASK; 1747 if (!((scope == HERMON_MCG_SCOPE_LINKLOC) || 1748 (scope == HERMON_MCG_SCOPE_SITELOC) || 1749 (scope == HERMON_MCG_SCOPE_ORGLOC) || 1750 (scope == HERMON_MCG_SCOPE_GLOBAL))) { 1751 return (0); 1752 } 1753 1754 /* 1755 * If it passes all of the above checks, then we will consider it 1756 * a valid multicast GID. 1757 */ 1758 return (1); 1759 } 1760 1761 1762 /* 1763 * hermon_mlid_is_valid() 1764 * Context: Can be called from interrupt or base context. 1765 */ 1766 static int 1767 hermon_mlid_is_valid(ib_lid_t lid) 1768 { 1769 /* 1770 * According to IBA 1.1 specification (section 4.1.1) a valid 1771 * "multicast DLID" must be between 0xC000 and 0xFFFE. 1772 */ 1773 if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) { 1774 return (0); 1775 } 1776 1777 return (1); 1778 } 1779 1780 1781 /* 1782 * hermon_pd_alloc() 1783 * Context: Can be called only from user or kernel context. 1784 */ 1785 int 1786 hermon_pd_alloc(hermon_state_t *state, hermon_pdhdl_t *pdhdl, uint_t sleepflag) 1787 { 1788 hermon_rsrc_t *rsrc; 1789 hermon_pdhdl_t pd; 1790 int status; 1791 1792 /* 1793 * Allocate the software structure for tracking the protection domain 1794 * (i.e. the Hermon Protection Domain handle). By default each PD 1795 * structure will have a unique PD number assigned to it. All that 1796 * is necessary is for software to initialize the PD reference count 1797 * (to zero) and return success. 1798 */ 1799 status = hermon_rsrc_alloc(state, HERMON_PDHDL, 1, sleepflag, &rsrc); 1800 if (status != DDI_SUCCESS) { 1801 return (IBT_INSUFF_RESOURCE); 1802 } 1803 pd = (hermon_pdhdl_t)rsrc->hr_addr; 1804 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1805 1806 pd->pd_refcnt = 0; 1807 *pdhdl = pd; 1808 1809 return (DDI_SUCCESS); 1810 } 1811 1812 1813 /* 1814 * hermon_pd_free() 1815 * Context: Can be called only from user or kernel context. 1816 */ 1817 int 1818 hermon_pd_free(hermon_state_t *state, hermon_pdhdl_t *pdhdl) 1819 { 1820 hermon_rsrc_t *rsrc; 1821 hermon_pdhdl_t pd; 1822 1823 /* 1824 * Pull all the necessary information from the Hermon Protection Domain 1825 * handle. This is necessary here because the resource for the 1826 * PD is going to be freed up as part of this operation. 1827 */ 1828 pd = *pdhdl; 1829 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1830 rsrc = pd->pd_rsrcp; 1831 1832 /* 1833 * Check the PD reference count. If the reference count is non-zero, 1834 * then it means that this protection domain is still referenced by 1835 * some memory region, queue pair, address handle, or other IB object 1836 * If it is non-zero, then return an error. Otherwise, free the 1837 * Hermon resource and return success. 1838 */ 1839 if (pd->pd_refcnt != 0) { 1840 return (IBT_PD_IN_USE); 1841 } 1842 1843 /* Free the Hermon Protection Domain handle */ 1844 hermon_rsrc_free(state, &rsrc); 1845 1846 /* Set the pdhdl pointer to NULL and return success */ 1847 *pdhdl = (hermon_pdhdl_t)NULL; 1848 1849 return (DDI_SUCCESS); 1850 } 1851 1852 1853 /* 1854 * hermon_pd_refcnt_inc() 1855 * Context: Can be called from interrupt or base context. 1856 */ 1857 void 1858 hermon_pd_refcnt_inc(hermon_pdhdl_t pd) 1859 { 1860 /* Increment the protection domain's reference count */ 1861 atomic_inc_32(&pd->pd_refcnt); 1862 } 1863 1864 1865 /* 1866 * hermon_pd_refcnt_dec() 1867 * Context: Can be called from interrupt or base context. 1868 */ 1869 void 1870 hermon_pd_refcnt_dec(hermon_pdhdl_t pd) 1871 { 1872 /* Decrement the protection domain's reference count */ 1873 atomic_dec_32(&pd->pd_refcnt); 1874 } 1875 1876 1877 /* 1878 * hermon_port_query() 1879 * Context: Can be called only from user or kernel context. 1880 */ 1881 int 1882 hermon_port_query(hermon_state_t *state, uint_t port, ibt_hca_portinfo_t *pi) 1883 { 1884 sm_portinfo_t portinfo; 1885 sm_guidinfo_t guidinfo; 1886 sm_pkey_table_t pkeytable; 1887 ib_gid_t *sgid; 1888 uint_t sgid_max, pkey_max, tbl_size; 1889 int i, j, indx, status; 1890 ib_pkey_t *pkeyp; 1891 ib_guid_t *guidp; 1892 1893 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi)) 1894 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state)) 1895 1896 /* Validate that specified port number is legal */ 1897 if (!hermon_portnum_is_valid(state, port)) { 1898 return (IBT_HCA_PORT_INVALID); 1899 } 1900 pkeyp = state->hs_pkey[port - 1]; 1901 guidp = state->hs_guid[port - 1]; 1902 1903 /* 1904 * We use the Hermon MAD_IFC command to post a GetPortInfo MAD 1905 * to the firmware (for the specified port number). This returns 1906 * a full PortInfo MAD (in "portinfo") which we subsequently 1907 * parse to fill in the "ibt_hca_portinfo_t" structure returned 1908 * to the IBTF. 1909 */ 1910 status = hermon_getportinfo_cmd_post(state, port, 1911 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 1912 if (status != HERMON_CMD_SUCCESS) { 1913 cmn_err(CE_CONT, "Hermon: GetPortInfo (port %02d) command " 1914 "failed: %08x\n", port, status); 1915 if (status == HERMON_CMD_INVALID_STATUS) { 1916 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1917 } 1918 return (ibc_get_ci_failure(0)); 1919 } 1920 1921 /* 1922 * Parse the PortInfo MAD and fill in the IBTF structure 1923 */ 1924 pi->p_base_lid = portinfo.LID; 1925 pi->p_qkey_violations = portinfo.Q_KeyViolations; 1926 pi->p_pkey_violations = portinfo.P_KeyViolations; 1927 pi->p_sm_sl = portinfo.MasterSMSL; 1928 pi->p_sm_lid = portinfo.MasterSMLID; 1929 pi->p_linkstate = portinfo.PortState; 1930 pi->p_port_num = portinfo.LocalPortNum; 1931 pi->p_phys_state = portinfo.PortPhysicalState; 1932 pi->p_width_supported = portinfo.LinkWidthSupported; 1933 pi->p_width_enabled = portinfo.LinkWidthEnabled; 1934 pi->p_width_active = portinfo.LinkWidthActive; 1935 pi->p_speed_supported = portinfo.LinkSpeedSupported; 1936 pi->p_speed_enabled = portinfo.LinkSpeedEnabled; 1937 pi->p_speed_active = portinfo.LinkSpeedActive; 1938 pi->p_mtu = portinfo.MTUCap; 1939 pi->p_lmc = portinfo.LMC; 1940 pi->p_max_vl = portinfo.VLCap; 1941 pi->p_subnet_timeout = portinfo.SubnetTimeOut; 1942 pi->p_msg_sz = ((uint32_t)1 << HERMON_QP_LOG_MAX_MSGSZ); 1943 tbl_size = state->hs_cfg_profile->cp_log_max_gidtbl; 1944 pi->p_sgid_tbl_sz = (1 << tbl_size); 1945 tbl_size = state->hs_cfg_profile->cp_log_max_pkeytbl; 1946 pi->p_pkey_tbl_sz = (1 << tbl_size); 1947 state->hs_sn_prefix[port - 1] = portinfo.GidPrefix; 1948 1949 /* 1950 * Convert InfiniBand-defined port capability flags to the format 1951 * specified by the IBTF 1952 */ 1953 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM) 1954 pi->p_capabilities |= IBT_PORT_CAP_SM; 1955 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED) 1956 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED; 1957 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD) 1958 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL; 1959 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD) 1960 pi->p_capabilities |= IBT_PORT_CAP_DM; 1961 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD) 1962 pi->p_capabilities |= IBT_PORT_CAP_VENDOR; 1963 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_CLNT_REREG_SUPPD) 1964 pi->p_capabilities |= IBT_PORT_CAP_CLNT_REREG; 1965 1966 /* 1967 * Fill in the SGID table. Since the only access to the Hermon 1968 * GID tables is through the firmware's MAD_IFC interface, we 1969 * post as many GetGUIDInfo MADs as necessary to read in the entire 1970 * contents of the SGID table (for the specified port). Note: The 1971 * GetGUIDInfo command only gets eight GUIDs per operation. These 1972 * GUIDs are then appended to the GID prefix for the port (from the 1973 * GetPortInfo above) to form the entire SGID table. 1974 */ 1975 for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) { 1976 status = hermon_getguidinfo_cmd_post(state, port, i >> 3, 1977 HERMON_SLEEPFLAG_FOR_CONTEXT(), &guidinfo); 1978 if (status != HERMON_CMD_SUCCESS) { 1979 cmn_err(CE_CONT, "Hermon: GetGUIDInfo (port %02d) " 1980 "command failed: %08x\n", port, status); 1981 if (status == HERMON_CMD_INVALID_STATUS) { 1982 hermon_fm_ereport(state, HCA_SYS_ERR, 1983 HCA_ERR_SRV_LOST); 1984 } 1985 return (ibc_get_ci_failure(0)); 1986 } 1987 1988 /* Figure out how many of the entries are valid */ 1989 sgid_max = min((pi->p_sgid_tbl_sz - i), 8); 1990 for (j = 0; j < sgid_max; j++) { 1991 indx = (i + j); 1992 sgid = &pi->p_sgid_tbl[indx]; 1993 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid)) 1994 sgid->gid_prefix = portinfo.GidPrefix; 1995 guidp[indx] = sgid->gid_guid = 1996 guidinfo.GUIDBlocks[j]; 1997 } 1998 } 1999 2000 /* 2001 * Fill in the PKey table. Just as for the GID tables above, the 2002 * only access to the Hermon PKey tables is through the firmware's 2003 * MAD_IFC interface. We post as many GetPKeyTable MADs as necessary 2004 * to read in the entire contents of the PKey table (for the specified 2005 * port). Note: The GetPKeyTable command only gets 32 PKeys per 2006 * operation. 2007 */ 2008 for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) { 2009 status = hermon_getpkeytable_cmd_post(state, port, i, 2010 HERMON_SLEEPFLAG_FOR_CONTEXT(), &pkeytable); 2011 if (status != HERMON_CMD_SUCCESS) { 2012 cmn_err(CE_CONT, "Hermon: GetPKeyTable (port %02d) " 2013 "command failed: %08x\n", port, status); 2014 if (status == HERMON_CMD_INVALID_STATUS) { 2015 hermon_fm_ereport(state, HCA_SYS_ERR, 2016 HCA_ERR_SRV_LOST); 2017 } 2018 return (ibc_get_ci_failure(0)); 2019 } 2020 2021 /* Figure out how many of the entries are valid */ 2022 pkey_max = min((pi->p_pkey_tbl_sz - i), 32); 2023 for (j = 0; j < pkey_max; j++) { 2024 indx = (i + j); 2025 pkeyp[indx] = pi->p_pkey_tbl[indx] = 2026 pkeytable.P_KeyTableBlocks[j]; 2027 } 2028 } 2029 2030 return (DDI_SUCCESS); 2031 } 2032 2033 2034 /* 2035 * hermon_port_modify() 2036 * Context: Can be called only from user or kernel context. 2037 */ 2038 /* ARGSUSED */ 2039 int 2040 hermon_port_modify(hermon_state_t *state, uint8_t port, 2041 ibt_port_modify_flags_t flags, uint8_t init_type) 2042 { 2043 sm_portinfo_t portinfo; 2044 uint32_t capmask; 2045 int status; 2046 hermon_hw_set_port_t set_port; 2047 2048 /* 2049 * Return an error if either of the unsupported flags are set 2050 */ 2051 if ((flags & IBT_PORT_SHUTDOWN) || 2052 (flags & IBT_PORT_SET_INIT_TYPE)) { 2053 return (IBT_NOT_SUPPORTED); 2054 } 2055 2056 bzero(&set_port, sizeof (set_port)); 2057 2058 /* 2059 * Determine whether we are trying to reset the QKey counter 2060 */ 2061 if (flags & IBT_PORT_RESET_QKEY) 2062 set_port.rqk = 1; 2063 2064 /* Validate that specified port number is legal */ 2065 if (!hermon_portnum_is_valid(state, port)) { 2066 return (IBT_HCA_PORT_INVALID); 2067 } 2068 2069 /* 2070 * Use the Hermon MAD_IFC command to post a GetPortInfo MAD to the 2071 * firmware (for the specified port number). This returns a full 2072 * PortInfo MAD (in "portinfo") from which we pull the current 2073 * capability mask. We then modify the capability mask as directed 2074 * by the "pmod_flags" field, and write the updated capability mask 2075 * using the Hermon SET_IB command (below). 2076 */ 2077 status = hermon_getportinfo_cmd_post(state, port, 2078 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 2079 if (status != HERMON_CMD_SUCCESS) { 2080 if (status == HERMON_CMD_INVALID_STATUS) { 2081 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2082 } 2083 return (ibc_get_ci_failure(0)); 2084 } 2085 2086 /* 2087 * Convert InfiniBand-defined port capability flags to the format 2088 * specified by the IBTF. Specifically, we modify the capability 2089 * mask based on the specified values. 2090 */ 2091 capmask = portinfo.CapabilityMask; 2092 2093 if (flags & IBT_PORT_RESET_SM) 2094 capmask &= ~SM_CAP_MASK_IS_SM; 2095 else if (flags & IBT_PORT_SET_SM) 2096 capmask |= SM_CAP_MASK_IS_SM; 2097 2098 if (flags & IBT_PORT_RESET_SNMP) 2099 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD; 2100 else if (flags & IBT_PORT_SET_SNMP) 2101 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD; 2102 2103 if (flags & IBT_PORT_RESET_DEVMGT) 2104 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD; 2105 else if (flags & IBT_PORT_SET_DEVMGT) 2106 capmask |= SM_CAP_MASK_IS_DM_SUPPD; 2107 2108 if (flags & IBT_PORT_RESET_VENDOR) 2109 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD; 2110 else if (flags & IBT_PORT_SET_VENDOR) 2111 capmask |= SM_CAP_MASK_IS_VM_SUPPD; 2112 2113 set_port.cap_mask = capmask; 2114 2115 /* 2116 * Use the Hermon SET_PORT command to update the capability mask and 2117 * (possibly) reset the QKey violation counter for the specified port. 2118 * Note: In general, this operation shouldn't fail. If it does, then 2119 * it is an indication that something (probably in HW, but maybe in 2120 * SW) has gone seriously wrong. 2121 */ 2122 status = hermon_set_port_cmd_post(state, &set_port, port, 2123 HERMON_SLEEPFLAG_FOR_CONTEXT()); 2124 if (status != HERMON_CMD_SUCCESS) { 2125 HERMON_WARNING(state, "failed to modify port capabilities"); 2126 cmn_err(CE_CONT, "Hermon: SET_IB (port %02d) command failed: " 2127 "%08x\n", port, status); 2128 if (status == HERMON_CMD_INVALID_STATUS) { 2129 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2130 } 2131 return (ibc_get_ci_failure(0)); 2132 } 2133 2134 return (DDI_SUCCESS); 2135 } 2136 2137 2138 /* 2139 * hermon_set_addr_path() 2140 * Context: Can be called from interrupt or base context. 2141 * 2142 * Note: This routine is used for two purposes. It is used to fill in the 2143 * Hermon UDAV fields, and it is used to fill in the address path information 2144 * for QPs. Because the two Hermon structures are similar, common fields can 2145 * be filled in here. Because they are different, however, we pass 2146 * an additional flag to indicate which type is being filled and do each one 2147 * uniquely 2148 */ 2149 2150 int hermon_srate_override = -1; /* allows ease of testing */ 2151 2152 int 2153 hermon_set_addr_path(hermon_state_t *state, ibt_adds_vect_t *av, 2154 hermon_hw_addr_path_t *path, uint_t type) 2155 { 2156 uint_t gidtbl_sz; 2157 hermon_hw_udav_t *udav; 2158 2159 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2160 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2161 2162 udav = (hermon_hw_udav_t *)(void *)path; 2163 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav)) 2164 path->mlid = av->av_src_path; 2165 path->rlid = av->av_dlid; 2166 2167 switch (av->av_srate) { 2168 case IBT_SRATE_2: /* 1xSDR-2.5Gb/s injection rate */ 2169 path->max_stat_rate = 7; break; 2170 case IBT_SRATE_10: /* 4xSDR-10.0Gb/s injection rate */ 2171 path->max_stat_rate = 8; break; 2172 case IBT_SRATE_30: /* 12xSDR-30Gb/s injection rate */ 2173 path->max_stat_rate = 9; break; 2174 case IBT_SRATE_5: /* 1xDDR-5Gb/s injection rate */ 2175 path->max_stat_rate = 10; break; 2176 case IBT_SRATE_20: /* 4xDDR-20Gb/s injection rate */ 2177 path->max_stat_rate = 11; break; 2178 case IBT_SRATE_40: /* 4xQDR-40Gb/s injection rate */ 2179 path->max_stat_rate = 12; break; 2180 case IBT_SRATE_60: /* 12xDDR-60Gb/s injection rate */ 2181 path->max_stat_rate = 13; break; 2182 case IBT_SRATE_80: /* 8xQDR-80Gb/s injection rate */ 2183 path->max_stat_rate = 14; break; 2184 case IBT_SRATE_120: /* 12xQDR-120Gb/s injection rate */ 2185 path->max_stat_rate = 15; break; 2186 case IBT_SRATE_NOT_SPECIFIED: /* Max */ 2187 path->max_stat_rate = 0; break; 2188 default: 2189 return (IBT_STATIC_RATE_INVALID); 2190 } 2191 if (hermon_srate_override != -1) /* for evaluating HCA firmware */ 2192 path->max_stat_rate = hermon_srate_override; 2193 2194 /* If "grh" flag is set, then check for valid SGID index too */ 2195 gidtbl_sz = (1 << state->hs_queryport.log_max_gid); 2196 if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) { 2197 return (IBT_SGID_INVALID); 2198 } 2199 2200 /* 2201 * Fill in all "global" values regardless of the value in the GRH 2202 * flag. Because "grh" is not set unless "av_send_grh" is set, the 2203 * hardware will ignore the other "global" values as necessary. Note: 2204 * SW does this here to enable later query operations to return 2205 * exactly the same params that were passed when the addr path was 2206 * last written. 2207 */ 2208 path->grh = av->av_send_grh; 2209 if (type == HERMON_ADDRPATH_QP) { 2210 path->mgid_index = av->av_sgid_ix; 2211 } else { 2212 /* 2213 * For Hermon UDAV, the "mgid_index" field is the index into 2214 * a combined table (not a per-port table), but having sections 2215 * for each port. So some extra calculations are necessary. 2216 */ 2217 2218 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) + 2219 av->av_sgid_ix; 2220 2221 udav->portnum = av->av_port_num; 2222 } 2223 2224 /* 2225 * According to Hermon PRM, the (31:0) part of rgid_l must be set to 2226 * "0x2" if the 'grh' or 'g' bit is cleared. It also says that we 2227 * only need to do it for UDAV's. So we enforce that here. 2228 * 2229 * NOTE: The entire 64 bits worth of GUID info is actually being 2230 * preserved (for UDAVs) by the callers of this function 2231 * (hermon_ah_alloc() and hermon_ah_modify()) and as long as the 2232 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are 2233 * "don't care". 2234 */ 2235 if ((path->grh) || (type == HERMON_ADDRPATH_QP)) { 2236 path->flow_label = av->av_flow; 2237 path->tclass = av->av_tclass; 2238 path->hop_limit = av->av_hop; 2239 bcopy(&(av->av_dgid.gid_prefix), &(path->rgid_h), 2240 sizeof (uint64_t)); 2241 bcopy(&(av->av_dgid.gid_guid), &(path->rgid_l), 2242 sizeof (uint64_t)); 2243 } else { 2244 path->rgid_l = 0x2; 2245 path->flow_label = 0; 2246 path->tclass = 0; 2247 path->hop_limit = 0; 2248 path->rgid_h = 0; 2249 } 2250 /* extract the default service level */ 2251 udav->sl = (HERMON_DEF_SCHED_SELECTION & 0x3C) >> 2; 2252 2253 return (DDI_SUCCESS); 2254 } 2255 2256 2257 /* 2258 * hermon_get_addr_path() 2259 * Context: Can be called from interrupt or base context. 2260 * 2261 * Note: Just like hermon_set_addr_path() above, this routine is used for two 2262 * purposes. It is used to read in the Hermon UDAV fields, and it is used to 2263 * read in the address path information for QPs. Because the two Hermon 2264 * structures are similar, common fields can be read in here. But because 2265 * they are slightly different, we pass an additional flag to indicate which 2266 * type is being read. 2267 */ 2268 void 2269 hermon_get_addr_path(hermon_state_t *state, hermon_hw_addr_path_t *path, 2270 ibt_adds_vect_t *av, uint_t type) 2271 { 2272 uint_t gidtbl_sz; 2273 2274 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2275 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2276 2277 av->av_src_path = path->mlid; 2278 av->av_dlid = path->rlid; 2279 2280 /* Set "av_ipd" value from max_stat_rate */ 2281 switch (path->max_stat_rate) { 2282 case 7: /* 1xSDR-2.5Gb/s injection rate */ 2283 av->av_srate = IBT_SRATE_2; break; 2284 case 8: /* 4xSDR-10.0Gb/s injection rate */ 2285 av->av_srate = IBT_SRATE_10; break; 2286 case 9: /* 12xSDR-30Gb/s injection rate */ 2287 av->av_srate = IBT_SRATE_30; break; 2288 case 10: /* 1xDDR-5Gb/s injection rate */ 2289 av->av_srate = IBT_SRATE_5; break; 2290 case 11: /* 4xDDR-20Gb/s injection rate */ 2291 av->av_srate = IBT_SRATE_20; break; 2292 case 12: /* xQDR-40Gb/s injection rate */ 2293 av->av_srate = IBT_SRATE_40; break; 2294 case 13: /* 12xDDR-60Gb/s injection rate */ 2295 av->av_srate = IBT_SRATE_60; break; 2296 case 14: /* 8xQDR-80Gb/s injection rate */ 2297 av->av_srate = IBT_SRATE_80; break; 2298 case 15: /* 12xQDR-120Gb/s injection rate */ 2299 av->av_srate = IBT_SRATE_120; break; 2300 case 0: /* max */ 2301 av->av_srate = IBT_SRATE_10; break; 2302 default: /* 1x injection rate */ 2303 av->av_srate = IBT_SRATE_1X; 2304 } 2305 2306 /* 2307 * Extract all "global" values regardless of the value in the GRH 2308 * flag. Because "av_send_grh" is set only if "grh" is set, software 2309 * knows to ignore the other "global" values as necessary. Note: SW 2310 * does it this way to enable these query operations to return exactly 2311 * the same params that were passed when the addr path was last written. 2312 */ 2313 av->av_send_grh = path->grh; 2314 if (type == HERMON_ADDRPATH_QP) { 2315 av->av_sgid_ix = path->mgid_index; 2316 } else { 2317 /* 2318 * For Hermon UDAV, the "mgid_index" field is the index into 2319 * a combined table (not a per-port table). 2320 */ 2321 gidtbl_sz = (1 << state->hs_queryport.log_max_gid); 2322 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) * 2323 gidtbl_sz); 2324 2325 av->av_port_num = ((hermon_hw_udav_t *)(void *)path)->portnum; 2326 } 2327 av->av_flow = path->flow_label; 2328 av->av_tclass = path->tclass; 2329 av->av_hop = path->hop_limit; 2330 /* this is for alignment issue w/ the addr path struct in Hermon */ 2331 bcopy(&(path->rgid_h), &(av->av_dgid.gid_prefix), sizeof (uint64_t)); 2332 bcopy(&(path->rgid_l), &(av->av_dgid.gid_guid), sizeof (uint64_t)); 2333 } 2334 2335 2336 /* 2337 * hermon_portnum_is_valid() 2338 * Context: Can be called from interrupt or base context. 2339 */ 2340 int 2341 hermon_portnum_is_valid(hermon_state_t *state, uint_t portnum) 2342 { 2343 uint_t max_port; 2344 2345 max_port = state->hs_cfg_profile->cp_num_ports; 2346 if ((portnum <= max_port) && (portnum != 0)) { 2347 return (1); 2348 } else { 2349 return (0); 2350 } 2351 } 2352 2353 2354 /* 2355 * hermon_pkeyindex_is_valid() 2356 * Context: Can be called from interrupt or base context. 2357 */ 2358 int 2359 hermon_pkeyindex_is_valid(hermon_state_t *state, uint_t pkeyindx) 2360 { 2361 uint_t max_pkeyindx; 2362 2363 max_pkeyindx = 1 << state->hs_cfg_profile->cp_log_max_pkeytbl; 2364 if (pkeyindx < max_pkeyindx) { 2365 return (1); 2366 } else { 2367 return (0); 2368 } 2369 } 2370 2371 2372 /* 2373 * hermon_queue_alloc() 2374 * Context: Can be called from interrupt or base context. 2375 */ 2376 int 2377 hermon_queue_alloc(hermon_state_t *state, hermon_qalloc_info_t *qa_info, 2378 uint_t sleepflag) 2379 { 2380 ddi_dma_attr_t dma_attr; 2381 int (*callback)(caddr_t); 2382 uint64_t realsize, alloc_mask; 2383 uint_t type; 2384 int flag, status; 2385 2386 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2387 2388 /* Set the callback flag appropriately */ 2389 callback = (sleepflag == HERMON_SLEEP) ? DDI_DMA_SLEEP : 2390 DDI_DMA_DONTWAIT; 2391 2392 /* 2393 * Initialize many of the default DMA attributes. Then set additional 2394 * alignment restrictions as necessary for the queue memory. Also 2395 * respect the configured value for IOMMU bypass 2396 */ 2397 hermon_dma_attr_init(state, &dma_attr); 2398 dma_attr.dma_attr_align = qa_info->qa_bind_align; 2399 type = state->hs_cfg_profile->cp_iommu_bypass; 2400 if (type == HERMON_BINDMEM_BYPASS) { 2401 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2402 } 2403 2404 /* Allocate a DMA handle */ 2405 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, callback, NULL, 2406 &qa_info->qa_dmahdl); 2407 if (status != DDI_SUCCESS) { 2408 return (DDI_FAILURE); 2409 } 2410 2411 /* 2412 * Determine the amount of memory to allocate, depending on the values 2413 * in "qa_bind_align" and "qa_alloc_align". The problem we are trying 2414 * to solve here is that allocating a DMA handle with IOMMU bypass 2415 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments 2416 * that are less restrictive than the page size. Since we may need 2417 * stricter alignments on the memory allocated by ddi_dma_mem_alloc() 2418 * (e.g. in Hermon QP work queue memory allocation), we use the 2419 * following method to calculate how much additional memory to request, 2420 * and we enforce our own alignment on the allocated result. 2421 */ 2422 alloc_mask = qa_info->qa_alloc_align - 1; 2423 if (qa_info->qa_bind_align == qa_info->qa_alloc_align) { 2424 realsize = qa_info->qa_size; 2425 } else { 2426 realsize = qa_info->qa_size + alloc_mask; 2427 } 2428 2429 /* 2430 * If we are to allocate the queue from system memory, then use 2431 * ddi_dma_mem_alloc() to find the space. Otherwise, this is a 2432 * host memory allocation, use ddi_umem_alloc(). In either case, 2433 * return a pointer to the memory range allocated (including any 2434 * necessary alignment adjustments), the "real" memory pointer, 2435 * the "real" size, and a ddi_acc_handle_t to use when reading 2436 * from/writing to the memory. 2437 */ 2438 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) { 2439 /* Allocate system memory for the queue */ 2440 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize, 2441 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, callback, NULL, 2442 (caddr_t *)&qa_info->qa_buf_real, 2443 (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl); 2444 if (status != DDI_SUCCESS) { 2445 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2446 return (DDI_FAILURE); 2447 } 2448 2449 /* 2450 * Save temporary copy of the real pointer. (This may be 2451 * modified in the last step below). 2452 */ 2453 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2454 2455 bzero(qa_info->qa_buf_real, qa_info->qa_buf_realsz); 2456 2457 } else { /* HERMON_QUEUE_LOCATION_USERLAND */ 2458 2459 /* Allocate userland mappable memory for the queue */ 2460 flag = (sleepflag == HERMON_SLEEP) ? DDI_UMEM_SLEEP : 2461 DDI_UMEM_NOSLEEP; 2462 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag, 2463 &qa_info->qa_umemcookie); 2464 if (qa_info->qa_buf_real == NULL) { 2465 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2466 return (DDI_FAILURE); 2467 } 2468 2469 /* 2470 * Save temporary copy of the real pointer. (This may be 2471 * modified in the last step below). 2472 */ 2473 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2474 2475 } 2476 2477 /* 2478 * The next to last step is to ensure that the final address 2479 * ("qa_buf_aligned") has the appropriate "alloc" alignment 2480 * restriction applied to it (if necessary). 2481 */ 2482 if (qa_info->qa_bind_align != qa_info->qa_alloc_align) { 2483 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t) 2484 qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask); 2485 } 2486 /* 2487 * The last step is to figure out the offset of the start relative 2488 * to the first page of the region - will be used in the eqc/cqc 2489 * passed to the HW 2490 */ 2491 qa_info->qa_pgoffs = (uint_t)((uintptr_t) 2492 qa_info->qa_buf_aligned & HERMON_PAGEMASK); 2493 2494 return (DDI_SUCCESS); 2495 } 2496 2497 2498 /* 2499 * hermon_queue_free() 2500 * Context: Can be called from interrupt or base context. 2501 */ 2502 void 2503 hermon_queue_free(hermon_qalloc_info_t *qa_info) 2504 { 2505 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2506 2507 /* 2508 * Depending on how (i.e. from where) we allocated the memory for 2509 * this queue, we choose the appropriate method for releasing the 2510 * resources. 2511 */ 2512 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) { 2513 2514 ddi_dma_mem_free(&qa_info->qa_acchdl); 2515 2516 } else if (qa_info->qa_location == HERMON_QUEUE_LOCATION_USERLAND) { 2517 2518 ddi_umem_free(qa_info->qa_umemcookie); 2519 2520 } 2521 2522 /* Always free the dma handle */ 2523 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2524 } 2525 2526 /* 2527 * hermon_destroy_fmr_pool() 2528 * Create a pool of FMRs. 2529 * Context: Can be called from kernel context only. 2530 */ 2531 int 2532 hermon_create_fmr_pool(hermon_state_t *state, hermon_pdhdl_t pd, 2533 ibt_fmr_pool_attr_t *fmr_attr, hermon_fmrhdl_t *fmrpoolp) 2534 { 2535 hermon_fmrhdl_t fmrpool; 2536 hermon_fmr_list_t *fmr, *fmr_next; 2537 hermon_mrhdl_t mr; 2538 char taskqname[48]; 2539 int status; 2540 int sleep; 2541 int i; 2542 2543 sleep = (fmr_attr->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP : 2544 HERMON_NOSLEEP; 2545 if ((sleep == HERMON_SLEEP) && 2546 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 2547 return (IBT_INVALID_PARAM); 2548 } 2549 2550 fmrpool = (hermon_fmrhdl_t)kmem_zalloc(sizeof (*fmrpool), sleep); 2551 if (fmrpool == NULL) { 2552 status = IBT_INSUFF_RESOURCE; 2553 goto fail; 2554 } 2555 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmrpool)) 2556 2557 mutex_init(&fmrpool->fmr_lock, NULL, MUTEX_DRIVER, 2558 DDI_INTR_PRI(state->hs_intrmsi_pri)); 2559 2560 fmrpool->fmr_state = state; 2561 fmrpool->fmr_flush_function = fmr_attr->fmr_func_hdlr; 2562 fmrpool->fmr_flush_arg = fmr_attr->fmr_func_arg; 2563 fmrpool->fmr_pool_size = 0; 2564 fmrpool->fmr_cache = 0; 2565 fmrpool->fmr_max_pages = fmr_attr->fmr_max_pages_per_fmr; 2566 fmrpool->fmr_page_sz = fmr_attr->fmr_page_sz; 2567 fmrpool->fmr_dirty_watermark = fmr_attr->fmr_dirty_watermark; 2568 fmrpool->fmr_dirty_len = 0; 2569 fmrpool->fmr_flags = fmr_attr->fmr_flags; 2570 2571 /* Create taskq to handle cleanup and flush processing */ 2572 (void) snprintf(taskqname, 50, "fmrpool/%d/%d @ 0x%" PRIx64, 2573 fmr_attr->fmr_pool_size, hermon_debug_fmrpool_cnt, 2574 (uint64_t)(uintptr_t)fmrpool); 2575 fmrpool->fmr_taskq = ddi_taskq_create(state->hs_dip, taskqname, 2576 HERMON_TASKQ_NTHREADS, TASKQ_DEFAULTPRI, 0); 2577 if (fmrpool->fmr_taskq == NULL) { 2578 status = IBT_INSUFF_RESOURCE; 2579 goto fail1; 2580 } 2581 2582 fmrpool->fmr_free_list = NULL; 2583 fmrpool->fmr_dirty_list = NULL; 2584 2585 if (fmr_attr->fmr_cache) { 2586 hermon_fmr_cache_init(fmrpool); 2587 } 2588 2589 for (i = 0; i < fmr_attr->fmr_pool_size; i++) { 2590 status = hermon_mr_alloc_fmr(state, pd, fmrpool, &mr); 2591 if (status != DDI_SUCCESS) { 2592 goto fail2; 2593 } 2594 2595 fmr = (hermon_fmr_list_t *)kmem_zalloc( 2596 sizeof (hermon_fmr_list_t), sleep); 2597 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2598 2599 fmr->fmr = mr; 2600 fmr->fmr_refcnt = 0; 2601 fmr->fmr_remaps = 0; 2602 fmr->fmr_pool = fmrpool; 2603 fmr->fmr_in_cache = 0; 2604 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 2605 mr->mr_fmr = fmr; 2606 2607 fmr->fmr_next = fmrpool->fmr_free_list; 2608 fmrpool->fmr_free_list = fmr; 2609 fmrpool->fmr_pool_size++; 2610 } 2611 2612 /* Set to return pool */ 2613 *fmrpoolp = fmrpool; 2614 2615 return (IBT_SUCCESS); 2616 fail2: 2617 hermon_fmr_cache_fini(fmrpool); 2618 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { 2619 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2620 fmr_next = fmr->fmr_next; 2621 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr); 2622 kmem_free(fmr, sizeof (hermon_fmr_list_t)); 2623 } 2624 ddi_taskq_destroy(fmrpool->fmr_taskq); 2625 fail1: 2626 kmem_free(fmrpool, sizeof (*fmrpool)); 2627 fail: 2628 if (status == DDI_FAILURE) { 2629 return (ibc_get_ci_failure(0)); 2630 } else { 2631 return (status); 2632 } 2633 } 2634 2635 /* 2636 * hermon_destroy_fmr_pool() 2637 * Destroy an FMR pool and free all associated resources. 2638 * Context: Can be called from kernel context only. 2639 */ 2640 int 2641 hermon_destroy_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2642 { 2643 hermon_fmr_list_t *fmr, *fmr_next; 2644 int status; 2645 2646 mutex_enter(&fmrpool->fmr_lock); 2647 status = hermon_fmr_cleanup(state, fmrpool); 2648 if (status != DDI_SUCCESS) { 2649 mutex_exit(&fmrpool->fmr_lock); 2650 return (status); 2651 } 2652 2653 if (fmrpool->fmr_cache) { 2654 hermon_fmr_cache_fini(fmrpool); 2655 } 2656 2657 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { 2658 fmr_next = fmr->fmr_next; 2659 2660 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr); 2661 kmem_free(fmr, sizeof (hermon_fmr_list_t)); 2662 } 2663 mutex_exit(&fmrpool->fmr_lock); 2664 2665 ddi_taskq_destroy(fmrpool->fmr_taskq); 2666 mutex_destroy(&fmrpool->fmr_lock); 2667 2668 kmem_free(fmrpool, sizeof (*fmrpool)); 2669 return (DDI_SUCCESS); 2670 } 2671 2672 /* 2673 * hermon_flush_fmr_pool() 2674 * Ensure that all unmapped FMRs are fully invalidated. 2675 * Context: Can be called from kernel context only. 2676 */ 2677 int 2678 hermon_flush_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2679 { 2680 int status; 2681 2682 /* 2683 * Force the unmapping of all entries on the dirty list, regardless of 2684 * whether the watermark has been hit yet. 2685 */ 2686 /* grab the pool lock */ 2687 mutex_enter(&fmrpool->fmr_lock); 2688 status = hermon_fmr_cleanup(state, fmrpool); 2689 mutex_exit(&fmrpool->fmr_lock); 2690 return (status); 2691 } 2692 2693 /* 2694 * hermon_deregister_fmr() 2695 * Map memory into FMR 2696 * Context: Can be called from interrupt or base context. 2697 */ 2698 int 2699 hermon_register_physical_fmr(hermon_state_t *state, hermon_fmrhdl_t fmrpool, 2700 ibt_pmr_attr_t *mem_pattr, hermon_mrhdl_t *mr, 2701 ibt_pmr_desc_t *mem_desc_p) 2702 { 2703 hermon_fmr_list_t *fmr; 2704 hermon_fmr_list_t query; 2705 avl_index_t where; 2706 int status; 2707 2708 /* Check length */ 2709 mutex_enter(&fmrpool->fmr_lock); 2710 if (mem_pattr->pmr_len < 1 || (mem_pattr->pmr_num_buf > 2711 fmrpool->fmr_max_pages)) { 2712 mutex_exit(&fmrpool->fmr_lock); 2713 return (IBT_MR_LEN_INVALID); 2714 } 2715 2716 mutex_enter(&fmrpool->fmr_cachelock); 2717 /* lookup in fmr cache */ 2718 /* if exists, grab it, and return it */ 2719 if (fmrpool->fmr_cache) { 2720 query.fmr_desc.pmd_iova = mem_pattr->pmr_iova; 2721 query.fmr_desc.pmd_phys_buf_list_sz = mem_pattr->pmr_len; 2722 fmr = (hermon_fmr_list_t *)avl_find(&fmrpool->fmr_cache_avl, 2723 &query, &where); 2724 2725 /* 2726 * If valid FMR was found in cache, return that fmr info 2727 */ 2728 if (fmr != NULL) { 2729 fmr->fmr_refcnt++; 2730 /* Store pmr desc for use in cache */ 2731 (void) memcpy(mem_desc_p, &fmr->fmr_desc, 2732 sizeof (ibt_pmr_desc_t)); 2733 *mr = (hermon_mrhdl_t)fmr->fmr; 2734 mutex_exit(&fmrpool->fmr_cachelock); 2735 mutex_exit(&fmrpool->fmr_lock); 2736 return (DDI_SUCCESS); 2737 } 2738 } 2739 2740 /* FMR does not exist in cache, proceed with registration */ 2741 2742 /* grab next free entry */ 2743 fmr = fmrpool->fmr_free_list; 2744 if (fmr == NULL) { 2745 mutex_exit(&fmrpool->fmr_cachelock); 2746 mutex_exit(&fmrpool->fmr_lock); 2747 return (IBT_INSUFF_RESOURCE); 2748 } 2749 2750 fmrpool->fmr_free_list = fmrpool->fmr_free_list->fmr_next; 2751 fmr->fmr_next = NULL; 2752 2753 status = hermon_mr_register_physical_fmr(state, mem_pattr, fmr->fmr, 2754 mem_desc_p); 2755 if (status != DDI_SUCCESS) { 2756 mutex_exit(&fmrpool->fmr_cachelock); 2757 mutex_exit(&fmrpool->fmr_lock); 2758 return (status); 2759 } 2760 2761 fmr->fmr_refcnt = 1; 2762 fmr->fmr_remaps++; 2763 2764 /* Store pmr desc for use in cache */ 2765 (void) memcpy(&fmr->fmr_desc, mem_desc_p, sizeof (ibt_pmr_desc_t)); 2766 *mr = (hermon_mrhdl_t)fmr->fmr; 2767 2768 /* Store in cache */ 2769 if (fmrpool->fmr_cache) { 2770 if (!fmr->fmr_in_cache) { 2771 avl_insert(&fmrpool->fmr_cache_avl, fmr, where); 2772 fmr->fmr_in_cache = 1; 2773 } 2774 } 2775 2776 mutex_exit(&fmrpool->fmr_cachelock); 2777 mutex_exit(&fmrpool->fmr_lock); 2778 return (DDI_SUCCESS); 2779 } 2780 2781 /* 2782 * hermon_deregister_fmr() 2783 * Unmap FMR 2784 * Context: Can be called from kernel context only. 2785 */ 2786 int 2787 hermon_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr) 2788 { 2789 hermon_fmr_list_t *fmr; 2790 hermon_fmrhdl_t fmrpool; 2791 int status; 2792 2793 fmr = mr->mr_fmr; 2794 fmrpool = fmr->fmr_pool; 2795 2796 /* Grab pool lock */ 2797 mutex_enter(&fmrpool->fmr_lock); 2798 fmr->fmr_refcnt--; 2799 2800 if (fmr->fmr_refcnt == 0) { 2801 /* 2802 * First, do some bit of invalidation, reducing our exposure to 2803 * having this region still registered in hardware. 2804 */ 2805 (void) hermon_mr_invalidate_fmr(state, mr); 2806 2807 /* 2808 * If we've exhausted our remaps then add the FMR to the dirty 2809 * list, not allowing it to be re-used until we have done a 2810 * flush. Otherwise, simply add it back to the free list for 2811 * re-mapping. 2812 */ 2813 if (fmr->fmr_remaps < 2814 state->hs_cfg_profile->cp_fmr_max_remaps) { 2815 /* add to free list */ 2816 fmr->fmr_next = fmrpool->fmr_free_list; 2817 fmrpool->fmr_free_list = fmr; 2818 } else { 2819 /* add to dirty list */ 2820 fmr->fmr_next = fmrpool->fmr_dirty_list; 2821 fmrpool->fmr_dirty_list = fmr; 2822 fmrpool->fmr_dirty_len++; 2823 2824 status = ddi_taskq_dispatch(fmrpool->fmr_taskq, 2825 hermon_fmr_processing, fmrpool, DDI_NOSLEEP); 2826 if (status == DDI_FAILURE) { 2827 mutex_exit(&fmrpool->fmr_lock); 2828 return (IBT_INSUFF_RESOURCE); 2829 } 2830 } 2831 } 2832 /* Release pool lock */ 2833 mutex_exit(&fmrpool->fmr_lock); 2834 2835 return (DDI_SUCCESS); 2836 } 2837 2838 2839 /* 2840 * hermon_fmr_processing() 2841 * If required, perform cleanup. 2842 * Context: Called from taskq context only. 2843 */ 2844 static void 2845 hermon_fmr_processing(void *fmr_args) 2846 { 2847 hermon_fmrhdl_t fmrpool; 2848 int status; 2849 2850 ASSERT(fmr_args != NULL); 2851 2852 fmrpool = (hermon_fmrhdl_t)fmr_args; 2853 2854 /* grab pool lock */ 2855 mutex_enter(&fmrpool->fmr_lock); 2856 if (fmrpool->fmr_dirty_len >= fmrpool->fmr_dirty_watermark) { 2857 status = hermon_fmr_cleanup(fmrpool->fmr_state, fmrpool); 2858 if (status != DDI_SUCCESS) { 2859 mutex_exit(&fmrpool->fmr_lock); 2860 return; 2861 } 2862 2863 if (fmrpool->fmr_flush_function != NULL) { 2864 (void) fmrpool->fmr_flush_function( 2865 (ibc_fmr_pool_hdl_t)fmrpool, 2866 fmrpool->fmr_flush_arg); 2867 } 2868 } 2869 2870 /* let pool lock go */ 2871 mutex_exit(&fmrpool->fmr_lock); 2872 } 2873 2874 /* 2875 * hermon_fmr_cleanup() 2876 * Perform cleaning processing, walking the list and performing the MTT sync 2877 * operation if required. 2878 * Context: can be called from taskq or base context. 2879 */ 2880 static int 2881 hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2882 { 2883 hermon_fmr_list_t *fmr; 2884 hermon_fmr_list_t *fmr_next; 2885 int sync_needed; 2886 int status; 2887 2888 ASSERT(MUTEX_HELD(&fmrpool->fmr_lock)); 2889 2890 sync_needed = 0; 2891 for (fmr = fmrpool->fmr_dirty_list; fmr; fmr = fmr_next) { 2892 fmr_next = fmr->fmr_next; 2893 fmr->fmr_remaps = 0; 2894 2895 (void) hermon_mr_deregister_fmr(state, fmr->fmr); 2896 2897 /* 2898 * Update lists. 2899 * - add fmr back to free list 2900 * - remove fmr from dirty list 2901 */ 2902 fmr->fmr_next = fmrpool->fmr_free_list; 2903 fmrpool->fmr_free_list = fmr; 2904 2905 2906 /* 2907 * Because we have updated the dirty list, and deregistered the 2908 * FMR entry, we do need to sync the TPT, so we set the 2909 * 'sync_needed' flag here so we sync once we finish dirty_list 2910 * processing. 2911 */ 2912 sync_needed = 1; 2913 } 2914 2915 fmrpool->fmr_dirty_list = NULL; 2916 fmrpool->fmr_dirty_len = 0; 2917 2918 if (sync_needed) { 2919 status = hermon_sync_tpt_cmd_post(state, 2920 HERMON_CMD_NOSLEEP_SPIN); 2921 if (status != HERMON_CMD_SUCCESS) { 2922 return (status); 2923 } 2924 } 2925 2926 return (DDI_SUCCESS); 2927 } 2928 2929 /* 2930 * hermon_fmr_avl_compare() 2931 * Context: Can be called from user or kernel context. 2932 */ 2933 static int 2934 hermon_fmr_avl_compare(const void *q, const void *e) 2935 { 2936 hermon_fmr_list_t *entry, *query; 2937 2938 entry = (hermon_fmr_list_t *)e; 2939 query = (hermon_fmr_list_t *)q; 2940 2941 if (query->fmr_desc.pmd_iova < entry->fmr_desc.pmd_iova) { 2942 return (-1); 2943 } else if (query->fmr_desc.pmd_iova > entry->fmr_desc.pmd_iova) { 2944 return (+1); 2945 } else { 2946 return (0); 2947 } 2948 } 2949 2950 2951 /* 2952 * hermon_fmr_cache_init() 2953 * Context: Can be called from user or kernel context. 2954 */ 2955 static void 2956 hermon_fmr_cache_init(hermon_fmrhdl_t fmr) 2957 { 2958 /* Initialize the lock used for FMR cache AVL tree access */ 2959 mutex_init(&fmr->fmr_cachelock, NULL, MUTEX_DRIVER, 2960 DDI_INTR_PRI(fmr->fmr_state->hs_intrmsi_pri)); 2961 2962 /* Initialize the AVL tree for the FMR cache */ 2963 avl_create(&fmr->fmr_cache_avl, hermon_fmr_avl_compare, 2964 sizeof (hermon_fmr_list_t), 2965 offsetof(hermon_fmr_list_t, fmr_avlnode)); 2966 2967 fmr->fmr_cache = 1; 2968 } 2969 2970 2971 /* 2972 * hermon_fmr_cache_fini() 2973 * Context: Can be called from user or kernel context. 2974 */ 2975 static void 2976 hermon_fmr_cache_fini(hermon_fmrhdl_t fmr) 2977 { 2978 void *cookie; 2979 2980 /* 2981 * Empty all entries (if necessary) and destroy the AVL tree. 2982 * The FMRs themselves are freed as part of destroy_pool() 2983 */ 2984 cookie = NULL; 2985 while (((void *)(hermon_fmr_list_t *)avl_destroy_nodes( 2986 &fmr->fmr_cache_avl, &cookie)) != NULL) { 2987 /* loop through */ 2988 } 2989 avl_destroy(&fmr->fmr_cache_avl); 2990 2991 /* Destroy the lock used for FMR cache */ 2992 mutex_destroy(&fmr->fmr_cachelock); 2993 } 2994 2995 /* 2996 * hermon_get_dma_cookies() 2997 * Return DMA cookies in the pre-allocated paddr_list_p based on the length 2998 * needed. 2999 * Context: Can be called from interrupt or base context. 3000 */ 3001 int 3002 hermon_get_dma_cookies(hermon_state_t *state, ibt_phys_buf_t *paddr_list_p, 3003 ibt_va_attr_t *va_attrs, uint_t list_len, uint_t *cookiecnt, 3004 ibc_ma_hdl_t *ibc_ma_hdl_p) 3005 { 3006 ddi_dma_handle_t dma_hdl; 3007 ddi_dma_attr_t dma_attr; 3008 ddi_dma_cookie_t dmacookie; 3009 int (*callback)(caddr_t); 3010 int status; 3011 int i; 3012 3013 /* Set the callback flag appropriately */ 3014 callback = (va_attrs->va_flags & IBT_VA_NOSLEEP) ? DDI_DMA_DONTWAIT : 3015 DDI_DMA_SLEEP; 3016 if ((callback == DDI_DMA_SLEEP) && 3017 (HERMON_SLEEP != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 3018 return (IBT_INVALID_PARAM); 3019 } 3020 3021 /* 3022 * Initialize many of the default DMA attributes and allocate the DMA 3023 * handle. Then, if we're bypassing the IOMMU, set the 3024 * DDI_DMA_FORCE_PHYSICAL flag. 3025 */ 3026 hermon_dma_attr_init(state, &dma_attr); 3027 3028 #ifdef __x86 3029 /* 3030 * On x86 we can specify a maximum segment length for our returned 3031 * cookies. 3032 */ 3033 if (va_attrs->va_flags & IBT_VA_FMR) { 3034 dma_attr.dma_attr_seg = PAGESIZE - 1; 3035 } 3036 #endif 3037 3038 /* 3039 * Check to see if the RO flag is set, and if so, 3040 * set that bit in the attr structure as well. 3041 * 3042 * NOTE 1: This function is ONLY called by consumers, and only for 3043 * data buffers 3044 */ 3045 if (hermon_kernel_data_ro == HERMON_RO_ENABLED) { 3046 dma_attr.dma_attr_flags |= DDI_DMA_RELAXED_ORDERING; 3047 } 3048 3049 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 3050 callback, NULL, &dma_hdl); 3051 if (status != DDI_SUCCESS) { 3052 switch (status) { 3053 case DDI_DMA_NORESOURCES: 3054 return (IBT_INSUFF_RESOURCE); 3055 case DDI_DMA_BADATTR: 3056 default: 3057 return (ibc_get_ci_failure(0)); 3058 } 3059 } 3060 3061 /* 3062 * Now bind the handle with the correct DMA attributes. 3063 */ 3064 if (va_attrs->va_flags & IBT_VA_BUF) { 3065 status = ddi_dma_buf_bind_handle(dma_hdl, va_attrs->va_buf, 3066 DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, 3067 NULL, &dmacookie, cookiecnt); 3068 } else { 3069 status = ddi_dma_addr_bind_handle(dma_hdl, NULL, 3070 (caddr_t)(uintptr_t)va_attrs->va_vaddr, va_attrs->va_len, 3071 DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, 3072 NULL, &dmacookie, cookiecnt); 3073 } 3074 if (status != DDI_SUCCESS) { 3075 ddi_dma_free_handle(&dma_hdl); 3076 3077 switch (status) { 3078 case DDI_DMA_NORESOURCES: 3079 return (IBT_INSUFF_RESOURCE); 3080 case DDI_DMA_TOOBIG: 3081 return (IBT_INVALID_PARAM); 3082 case DDI_DMA_PARTIAL_MAP: 3083 case DDI_DMA_INUSE: 3084 case DDI_DMA_NOMAPPING: 3085 default: 3086 return (ibc_get_ci_failure(0)); 3087 } 3088 } 3089 3090 /* 3091 * Verify our physical buffer list (PBL) is large enough to handle the 3092 * number of cookies that were returned. 3093 */ 3094 if (*cookiecnt > list_len) { 3095 (void) ddi_dma_unbind_handle(dma_hdl); 3096 ddi_dma_free_handle(&dma_hdl); 3097 return (IBT_PBL_TOO_SMALL); 3098 } 3099 3100 /* 3101 * We store the cookies returned by the DDI into our own PBL. This 3102 * sets the cookies up for later processing (for example, if we want to 3103 * split up the cookies into smaller chunks). We use the laddr and 3104 * size fields in each cookie to create each individual entry (PBE). 3105 */ 3106 3107 /* 3108 * Store first cookie info first 3109 */ 3110 paddr_list_p[0].p_laddr = dmacookie.dmac_laddress; 3111 paddr_list_p[0].p_size = dmacookie.dmac_size; 3112 3113 /* 3114 * Loop through each cookie, storing each cookie into our physical 3115 * buffer list. 3116 */ 3117 for (i = 1; i < *cookiecnt; i++) { 3118 ddi_dma_nextcookie(dma_hdl, &dmacookie); 3119 3120 paddr_list_p[i].p_laddr = dmacookie.dmac_laddress; 3121 paddr_list_p[i].p_size = dmacookie.dmac_size; 3122 } 3123 3124 /* return handle */ 3125 *ibc_ma_hdl_p = (ibc_ma_hdl_t)dma_hdl; 3126 return (DDI_SUCCESS); 3127 } 3128 3129 /* 3130 * hermon_split_dma_cookies() 3131 * Split up cookies passed in from paddr_list_p, returning the new list in the 3132 * same buffers, based on the pagesize to split the cookies into. 3133 * Context: Can be called from interrupt or base context. 3134 */ 3135 /* ARGSUSED */ 3136 int 3137 hermon_split_dma_cookies(hermon_state_t *state, ibt_phys_buf_t *paddr_list, 3138 ib_memlen_t *paddr_offset, uint_t list_len, uint_t *cookiecnt, 3139 uint_t pagesize) 3140 { 3141 uint64_t pageoffset; 3142 uint64_t pagemask; 3143 uint_t pageshift; 3144 uint_t current_cookiecnt; 3145 uint_t cookies_needed; 3146 uint64_t last_size, extra_cookie; 3147 int i_increment; 3148 int i, k; 3149 int status; 3150 3151 /* Setup pagesize calculations */ 3152 pageoffset = pagesize - 1; 3153 pagemask = (~pageoffset); 3154 pageshift = highbit(pagesize) - 1; 3155 3156 /* 3157 * Setup first cookie offset based on pagesize requested. 3158 */ 3159 *paddr_offset = paddr_list[0].p_laddr & pageoffset; 3160 paddr_list[0].p_laddr &= pagemask; 3161 3162 /* Save away the current number of cookies that are passed in */ 3163 current_cookiecnt = *cookiecnt; 3164 3165 /* Perform splitting up of current cookies into pagesize blocks */ 3166 for (i = 0; i < current_cookiecnt; i += i_increment) { 3167 /* 3168 * If the cookie is smaller than pagesize, or already is 3169 * pagesize, then we are already within our limits, so we skip 3170 * it. 3171 */ 3172 if (paddr_list[i].p_size <= pagesize) { 3173 i_increment = 1; 3174 continue; 3175 } 3176 3177 /* 3178 * If this is our first cookie, then we have to deal with the 3179 * offset that may be present in the first address. So add 3180 * that to our size, to calculate potential change to the last 3181 * cookie's size. 3182 * 3183 * Also, calculate the number of cookies that we'll need to 3184 * split up this block into. 3185 */ 3186 if (i == 0) { 3187 last_size = (paddr_list[i].p_size + *paddr_offset) & 3188 pageoffset; 3189 cookies_needed = (paddr_list[i].p_size + 3190 *paddr_offset) >> pageshift; 3191 } else { 3192 last_size = 0; 3193 cookies_needed = paddr_list[i].p_size >> pageshift; 3194 } 3195 3196 /* 3197 * If our size is not a multiple of pagesize, we need one more 3198 * cookie. 3199 */ 3200 if (last_size) { 3201 extra_cookie = 1; 3202 } else { 3203 extra_cookie = 0; 3204 } 3205 3206 /* 3207 * Split cookie into pagesize chunks, shifting list of cookies 3208 * down, using more cookie slots in the PBL if necessary. 3209 */ 3210 status = hermon_dma_cookie_shift(paddr_list, i, list_len, 3211 current_cookiecnt - i, cookies_needed + extra_cookie); 3212 if (status != 0) { 3213 return (status); 3214 } 3215 3216 /* 3217 * If the very first cookie, we must take possible offset into 3218 * account. 3219 */ 3220 if (i == 0) { 3221 paddr_list[i].p_size = pagesize - *paddr_offset; 3222 } else { 3223 paddr_list[i].p_size = pagesize; 3224 } 3225 3226 /* 3227 * We have shifted the existing cookies down the PBL, now fill 3228 * in the blank entries by splitting up our current block. 3229 */ 3230 for (k = 1; k < cookies_needed; k++) { 3231 paddr_list[i + k].p_laddr = 3232 paddr_list[i + k - 1].p_laddr + pagesize; 3233 paddr_list[i + k].p_size = pagesize; 3234 } 3235 3236 /* If we have one extra cookie (of less than pagesize...) */ 3237 if (extra_cookie) { 3238 paddr_list[i + k].p_laddr = 3239 paddr_list[i + k - 1].p_laddr + pagesize; 3240 paddr_list[i + k].p_size = (size_t)last_size; 3241 } 3242 3243 /* Increment cookiecnt appropriately based on cookies used */ 3244 i_increment = cookies_needed + extra_cookie; 3245 current_cookiecnt += i_increment - 1; 3246 } 3247 3248 /* Update to new cookie count */ 3249 *cookiecnt = current_cookiecnt; 3250 return (DDI_SUCCESS); 3251 } 3252 3253 /* 3254 * hermon_dma_cookie_shift() 3255 * Context: Can be called from interrupt or base context. 3256 */ 3257 int 3258 hermon_dma_cookie_shift(ibt_phys_buf_t *paddr_list, int start, int end, 3259 int cookiecnt, int num_shift) 3260 { 3261 int shift_start; 3262 int i; 3263 3264 /* Calculating starting point in the PBL list */ 3265 shift_start = start + cookiecnt - 1; 3266 3267 /* Check if we're at the end of our PBL list */ 3268 if ((shift_start + num_shift - 1) >= end) { 3269 return (IBT_PBL_TOO_SMALL); 3270 } 3271 3272 for (i = shift_start; i > start; i--) { 3273 paddr_list[i + num_shift - 1] = paddr_list[i]; 3274 } 3275 3276 return (DDI_SUCCESS); 3277 } 3278 3279 3280 /* 3281 * hermon_free_dma_cookies() 3282 * Context: Can be called from interrupt or base context. 3283 */ 3284 int 3285 hermon_free_dma_cookies(ibc_ma_hdl_t ma_hdl) 3286 { 3287 ddi_dma_handle_t dma_hdl; 3288 int status; 3289 3290 dma_hdl = (ddi_dma_handle_t)ma_hdl; 3291 3292 status = ddi_dma_unbind_handle(dma_hdl); 3293 if (status != DDI_SUCCESS) { 3294 return (ibc_get_ci_failure(0)); 3295 } 3296 ddi_dma_free_handle(&dma_hdl); 3297 3298 return (DDI_SUCCESS); 3299 } 3300