xref: /illumos-gate/usr/src/uts/common/io/ib/adapters/hermon/hermon_misc.c (revision 1ed53a3f65abecaadc1b967e341970ad0f6b2aeb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * hermon_misc.c
29  *    Hermon Miscellaneous routines - Address Handle, Multicast, Protection
30  *    Domain, and port-related operations
31  *
32  *    Implements all the routines necessary for allocating, freeing, querying
33  *    and modifying Address Handles and Protection Domains.  Also implements
34  *    all the routines necessary for adding and removing Queue Pairs to/from
35  *    Multicast Groups.  Lastly, it implements the routines necessary for
36  *    port-related query and modify operations.
37  */
38 
39 #include <sys/types.h>
40 #include <sys/conf.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/modctl.h>
44 #include <sys/bitmap.h>
45 #include <sys/sysmacros.h>
46 
47 #include <sys/ib/adapters/hermon/hermon.h>
48 
49 extern uint32_t hermon_kernel_data_ro;
50 
51 /* used for helping uniquify fmr pool taskq name */
52 static uint_t hermon_debug_fmrpool_cnt = 0x00000000;
53 
54 static int hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg,
55     hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, uint_t *qp_found);
56 static int hermon_mcg_qplist_remove(hermon_mcghdl_t mcg,
57     hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp);
58 static void hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp);
59 static void hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp);
60 static uint_t hermon_mcg_walk_mgid_hash(hermon_state_t *state,
61     uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx);
62 static void hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg,
63     hermon_hw_mcg_t *mcg_hdr, ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc);
64 static int hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx,
65     uint_t prev_indx, hermon_hw_mcg_t *mcg_entry);
66 static int hermon_mcg_entry_invalidate(hermon_state_t *state,
67     hermon_hw_mcg_t *mcg_entry, uint_t indx);
68 static int hermon_mgid_is_valid(ib_gid_t gid);
69 static int hermon_mlid_is_valid(ib_lid_t lid);
70 static void hermon_fmr_processing(void *fmr_args);
71 static int hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t pool);
72 static void hermon_fmr_cache_init(hermon_fmrhdl_t fmr);
73 static void hermon_fmr_cache_fini(hermon_fmrhdl_t fmr);
74 static int hermon_fmr_avl_compare(const void *q, const void *e);
75 
76 
77 #define	HERMON_MAX_DBR_PAGES_PER_USER	64
78 #define	HERMON_DBR_KEY(index, page) \
79 	(((uint64_t)index) * HERMON_MAX_DBR_PAGES_PER_USER + (page))
80 
81 static hermon_udbr_page_t *
82 hermon_dbr_new_user_page(hermon_state_t *state, uint_t index,
83     uint_t page)
84 {
85 	hermon_udbr_page_t *pagep;
86 	ddi_dma_attr_t dma_attr;
87 	uint_t cookiecnt;
88 	int status;
89 	hermon_umap_db_entry_t *umapdb;
90 
91 	pagep = kmem_alloc(sizeof (*pagep), KM_SLEEP);
92 	pagep->upg_index = page;
93 	pagep->upg_nfree = PAGESIZE / sizeof (hermon_dbr_t);
94 
95 	/* Allocate 1 bit per dbr for free/alloc management (0 => "free") */
96 	pagep->upg_free = kmem_zalloc(PAGESIZE / sizeof (hermon_dbr_t) / 8,
97 	    KM_SLEEP);
98 	pagep->upg_kvaddr = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP,
99 	    &pagep->upg_umemcookie); /* not HERMON_PAGESIZE here */
100 
101 	pagep->upg_buf = ddi_umem_iosetup(pagep->upg_umemcookie, 0,
102 	    PAGESIZE, B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
103 
104 	hermon_dma_attr_init(state, &dma_attr);
105 	status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
106 	    DDI_DMA_SLEEP, NULL, &pagep->upg_dmahdl);
107 	if (status != DDI_SUCCESS) {
108 		IBTF_DPRINTF_L2("hermon", "hermon_new_user_page: "
109 		    "ddi_dma_buf_bind_handle failed: %d", status);
110 		return (NULL);
111 	}
112 	status = ddi_dma_buf_bind_handle(pagep->upg_dmahdl,
113 	    pagep->upg_buf, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
114 	    DDI_DMA_SLEEP, NULL, &pagep->upg_dmacookie, &cookiecnt);
115 	if (status != DDI_SUCCESS) {
116 		IBTF_DPRINTF_L2("hermon", "hermon_dbr_new_user_page: "
117 		    "ddi_dma_buf_bind_handle failed: %d", status);
118 		ddi_dma_free_handle(&pagep->upg_dmahdl);
119 		return (NULL);
120 	}
121 	ASSERT(cookiecnt == 1);
122 
123 	/* create db entry for mmap */
124 	umapdb = hermon_umap_db_alloc(state->hs_instance,
125 	    HERMON_DBR_KEY(index, page), MLNX_UMAP_DBRMEM_RSRC,
126 	    (uint64_t)(uintptr_t)pagep);
127 	hermon_umap_db_add(umapdb);
128 	return (pagep);
129 }
130 
131 
132 /*ARGSUSED*/
133 static int
134 hermon_user_dbr_alloc(hermon_state_t *state, uint_t index,
135     ddi_acc_handle_t *acchdl, hermon_dbr_t **vdbr, uint64_t *pdbr,
136     uint64_t *mapoffset)
137 {
138 	hermon_user_dbr_t *udbr;
139 	hermon_udbr_page_t *pagep;
140 	uint_t next_page;
141 	int dbr_index;
142 	int i1, i2, i3, last;
143 	uint64_t u64, mask;
144 
145 	mutex_enter(&state->hs_dbr_lock);
146 	for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link)
147 		if (udbr->udbr_index == index)
148 			break;
149 	if (udbr == NULL) {
150 		udbr = kmem_alloc(sizeof (*udbr), KM_SLEEP);
151 		udbr->udbr_link = state->hs_user_dbr;
152 		state->hs_user_dbr = udbr;
153 		udbr->udbr_index = index;
154 		udbr->udbr_pagep = NULL;
155 	}
156 	pagep = udbr->udbr_pagep;
157 	next_page = (pagep == NULL) ? 0 : (pagep->upg_index + 1);
158 	while (pagep != NULL)
159 		if (pagep->upg_nfree > 0)
160 			break;
161 		else
162 			pagep = pagep->upg_link;
163 	if (pagep == NULL) {
164 		pagep = hermon_dbr_new_user_page(state, index, next_page);
165 		if (pagep == NULL) {
166 			mutex_exit(&state->hs_dbr_lock);
167 			return (DDI_FAILURE);
168 		}
169 		pagep->upg_link = udbr->udbr_pagep;
170 		udbr->udbr_pagep = pagep;
171 	}
172 
173 	/* Since nfree > 0, we're assured the loops below will succeed */
174 
175 	/* First, find a 64-bit (not ~0) that has a free dbr */
176 	last = PAGESIZE / sizeof (uint64_t) / 64;
177 	mask = ~0ull;
178 	for (i1 = 0; i1 < last; i1++)
179 		if ((pagep->upg_free[i1] & mask) != mask)
180 			break;
181 	u64 = pagep->upg_free[i1];
182 
183 	/* Second, find a byte (not 0xff) that has a free dbr */
184 	last = sizeof (uint64_t) / sizeof (uint8_t);
185 	for (i2 = 0, mask = 0xff; i2 < last; i2++, mask <<= 8)
186 		if ((u64 & mask) != mask)
187 			break;
188 
189 	/* Third, find a bit that is free (0) */
190 	for (i3 = 0; i3 < sizeof (uint64_t) / sizeof (uint8_t); i3++)
191 		if ((u64 & (1ul << (i3 + 8 * i2))) == 0)
192 			break;
193 
194 	/* Mark it as allocated */
195 	pagep->upg_free[i1] |= (1ul << (i3 + 8 * i2));
196 
197 	dbr_index = ((i1 * sizeof (uint64_t)) + i2) * sizeof (uint64_t) + i3;
198 	pagep->upg_nfree--;
199 	((uint64_t *)(void *)pagep->upg_kvaddr)[dbr_index] = 0;	/* clear dbr */
200 	*mapoffset = ((HERMON_DBR_KEY(index, pagep->upg_index) <<
201 	    MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_DBRMEM_RSRC) << PAGESHIFT;
202 	*vdbr = (hermon_dbr_t *)((uint64_t *)(void *)pagep->upg_kvaddr +
203 	    dbr_index);
204 	*pdbr = pagep->upg_dmacookie.dmac_laddress + dbr_index *
205 	    sizeof (uint64_t);
206 
207 	mutex_exit(&state->hs_dbr_lock);
208 	return (DDI_SUCCESS);
209 }
210 
211 static void
212 hermon_user_dbr_free(hermon_state_t *state, uint_t index, hermon_dbr_t *record)
213 {
214 	hermon_user_dbr_t	*udbr;
215 	hermon_udbr_page_t	*pagep;
216 	caddr_t			kvaddr;
217 	uint_t			dbr_index;
218 	uint_t			max_free = PAGESIZE / sizeof (hermon_dbr_t);
219 	int			i1, i2;
220 
221 	dbr_index = (uintptr_t)record & PAGEOFFSET; /* offset (not yet index) */
222 	kvaddr = (caddr_t)record - dbr_index;
223 	dbr_index /= sizeof (hermon_dbr_t); /* now it's the index */
224 
225 	mutex_enter(&state->hs_dbr_lock);
226 	for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link)
227 		if (udbr->udbr_index == index)
228 			break;
229 	if (udbr == NULL) {
230 		IBTF_DPRINTF_L2("hermon", "free user dbr: udbr struct not "
231 		    "found for index %x", index);
232 		mutex_exit(&state->hs_dbr_lock);
233 		return;
234 	}
235 	for (pagep = udbr->udbr_pagep; pagep != NULL; pagep = pagep->upg_link)
236 		if (pagep->upg_kvaddr == kvaddr)
237 			break;
238 	if (pagep == NULL) {
239 		IBTF_DPRINTF_L2("hermon", "free user dbr: pagep struct not"
240 		    " found for index %x, kvaddr %p, DBR index %x",
241 		    index, kvaddr, dbr_index);
242 		mutex_exit(&state->hs_dbr_lock);
243 		return;
244 	}
245 	if (pagep->upg_nfree >= max_free) {
246 		IBTF_DPRINTF_L2("hermon", "free user dbr: overflow: "
247 		    "UCE index %x, DBR index %x", index, dbr_index);
248 		mutex_exit(&state->hs_dbr_lock);
249 		return;
250 	}
251 	ASSERT(dbr_index < max_free);
252 	i1 = dbr_index / 64;
253 	i2 = dbr_index % 64;
254 	ASSERT((pagep->upg_free[i1] & (1ul << i2)) == (1ul << i2));
255 	pagep->upg_free[i1] &= ~(1ul << i2);
256 	pagep->upg_nfree++;
257 	mutex_exit(&state->hs_dbr_lock);
258 }
259 
260 /*
261  * hermon_dbr_page_alloc()
262  *	first page allocation - called from attach or open
263  *	in this case, we want exactly one page per call, and aligned on a
264  *	page - and may need to be mapped to the user for access
265  */
266 int
267 hermon_dbr_page_alloc(hermon_state_t *state, hermon_dbr_info_t **dinfo)
268 {
269 	int			status;
270 	ddi_dma_handle_t	dma_hdl;
271 	ddi_acc_handle_t	acc_hdl;
272 	ddi_dma_attr_t		dma_attr;
273 	ddi_dma_cookie_t	cookie;
274 	uint_t			cookie_cnt;
275 	int			i;
276 	hermon_dbr_info_t 	*info;
277 	caddr_t			dmaaddr;
278 	uint64_t		dmalen;
279 
280 	info = kmem_zalloc(sizeof (hermon_dbr_info_t), KM_SLEEP);
281 
282 	/*
283 	 * Initialize many of the default DMA attributes.  Then set additional
284 	 * alignment restrictions if necessary for the dbr memory, meaning
285 	 * page aligned.  Also use the configured value for IOMMU bypass
286 	 */
287 	hermon_dma_attr_init(state, &dma_attr);
288 	dma_attr.dma_attr_align = PAGESIZE;
289 	dma_attr.dma_attr_sgllen = 1;	/* make sure only one cookie */
290 
291 	status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
292 	    DDI_DMA_SLEEP, NULL, &dma_hdl);
293 	if (status != DDI_SUCCESS) {
294 		kmem_free((void *)info, sizeof (hermon_dbr_info_t));
295 		cmn_err(CE_NOTE, "dbr DMA handle alloc failed\n");
296 		return (DDI_FAILURE);
297 	}
298 
299 	status = ddi_dma_mem_alloc(dma_hdl, PAGESIZE,
300 	    &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
301 	    NULL, &dmaaddr, (size_t *)&dmalen, &acc_hdl);
302 	if (status != DDI_SUCCESS)	{
303 		ddi_dma_free_handle(&dma_hdl);
304 		cmn_err(CE_CONT, "dbr DMA mem alloc failed(status %d)", status);
305 		kmem_free((void *)info, sizeof (hermon_dbr_info_t));
306 		return (DDI_FAILURE);
307 	}
308 
309 	/* this memory won't be IB registered, so do the bind here */
310 	status = ddi_dma_addr_bind_handle(dma_hdl, NULL,
311 	    dmaaddr, (size_t)dmalen, DDI_DMA_RDWR |
312 	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt);
313 	if (status != DDI_SUCCESS) {
314 		ddi_dma_mem_free(&acc_hdl);
315 		ddi_dma_free_handle(&dma_hdl);
316 		kmem_free((void *)info, sizeof (hermon_dbr_info_t));
317 		cmn_err(CE_CONT, "dbr DMA bind handle failed (status %d)",
318 		    status);
319 		return (DDI_FAILURE);
320 	}
321 	*dinfo = info;		/* Pass back the pointer */
322 
323 	/* init the info structure with returned info */
324 	info->dbr_dmahdl = dma_hdl;
325 	info->dbr_acchdl = acc_hdl;
326 	info->dbr_page   = (hermon_dbr_t *)(void *)dmaaddr;
327 	info->dbr_link = NULL;
328 	/* extract the phys addr from the cookie */
329 	info->dbr_paddr = cookie.dmac_laddress;
330 	info->dbr_firstfree = 0;
331 	info->dbr_nfree = HERMON_NUM_DBR_PER_PAGE;
332 	/* link all DBrs onto the free list */
333 	for (i = 0; i < HERMON_NUM_DBR_PER_PAGE; i++) {
334 		info->dbr_page[i] = i + 1;
335 	}
336 
337 	return (DDI_SUCCESS);
338 }
339 
340 
341 /*
342  * hermon_dbr_alloc()
343  *	DBr record allocation - called from alloc cq/qp/srq
344  *	will check for available dbrs in current
345  *	page - if needed it will allocate another and link them
346  */
347 
348 int
349 hermon_dbr_alloc(hermon_state_t *state, uint_t index, ddi_acc_handle_t *acchdl,
350     hermon_dbr_t **vdbr, uint64_t *pdbr, uint64_t *mapoffset)
351 {
352 	hermon_dbr_t		*record = NULL;
353 	hermon_dbr_info_t	*info = NULL;
354 	uint32_t		idx;
355 	int			status;
356 
357 	if (index != state->hs_kernel_uar_index)
358 		return (hermon_user_dbr_alloc(state, index, acchdl, vdbr, pdbr,
359 		    mapoffset));
360 
361 	mutex_enter(&state->hs_dbr_lock);
362 	for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link)
363 		if (info->dbr_nfree != 0)
364 			break;		/* found a page w/ one available */
365 
366 	if (info == NULL) {	/* did NOT find a page with one available */
367 		status = hermon_dbr_page_alloc(state, &info);
368 		if (status != DDI_SUCCESS) {
369 			/* do error handling */
370 			mutex_exit(&state->hs_dbr_lock);
371 			return (DDI_FAILURE);
372 		}
373 		/* got a new page, so link it in. */
374 		info->dbr_link = state->hs_kern_dbr;
375 		state->hs_kern_dbr = info;
376 	}
377 	idx = info->dbr_firstfree;
378 	record = info->dbr_page + idx;
379 	info->dbr_firstfree = *record;
380 	info->dbr_nfree--;
381 	*record = 0;
382 
383 	*acchdl = info->dbr_acchdl;
384 	*vdbr = record;
385 	*pdbr = info->dbr_paddr + idx * sizeof (hermon_dbr_t);
386 	mutex_exit(&state->hs_dbr_lock);
387 	return (DDI_SUCCESS);
388 }
389 
390 /*
391  * hermon_dbr_free()
392  *	DBr record deallocation - called from free cq/qp
393  *	will update the counter in the header, and invalidate
394  *	the dbr, but will NEVER free pages of dbrs - small
395  *	price to pay, but userland access never will anyway
396  */
397 void
398 hermon_dbr_free(hermon_state_t *state, uint_t indx, hermon_dbr_t *record)
399 {
400 	hermon_dbr_t		*page;
401 	hermon_dbr_info_t	*info;
402 
403 	if (indx != state->hs_kernel_uar_index) {
404 		hermon_user_dbr_free(state, indx, record);
405 		return;
406 	}
407 	page = (hermon_dbr_t *)(uintptr_t)((uintptr_t)record & PAGEMASK);
408 	mutex_enter(&state->hs_dbr_lock);
409 	for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link)
410 		if (info->dbr_page == page)
411 			break;
412 	ASSERT(info != NULL);
413 	*record = info->dbr_firstfree;
414 	info->dbr_firstfree = record - info->dbr_page;
415 	info->dbr_nfree++;
416 	mutex_exit(&state->hs_dbr_lock);
417 }
418 
419 /*
420  * hermon_dbr_kern_free()
421  *    Context: Can be called only from detach context.
422  *
423  *	Free all kernel dbr pages.  This includes the freeing of all the dma
424  *	resources acquired during the allocation of the pages.
425  *
426  *	Also, free all the user dbr pages.
427  */
428 void
429 hermon_dbr_kern_free(hermon_state_t *state)
430 {
431 	hermon_dbr_info_t	*info, *link;
432 	hermon_user_dbr_t	*udbr, *next;
433 	hermon_udbr_page_t	*pagep, *nextp;
434 	hermon_umap_db_entry_t	*umapdb;
435 	int			instance, status;
436 	uint64_t		value;
437 	extern			hermon_umap_db_t hermon_userland_rsrc_db;
438 
439 	mutex_enter(&state->hs_dbr_lock);
440 	for (info = state->hs_kern_dbr; info != NULL; info = link) {
441 		(void) ddi_dma_unbind_handle(info->dbr_dmahdl);
442 		ddi_dma_mem_free(&info->dbr_acchdl);	/* free page */
443 		ddi_dma_free_handle(&info->dbr_dmahdl);
444 		link = info->dbr_link;
445 		kmem_free(info, sizeof (hermon_dbr_info_t));
446 	}
447 
448 	udbr = state->hs_user_dbr;
449 	instance = state->hs_instance;
450 	mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
451 	while (udbr != NULL) {
452 		pagep = udbr->udbr_pagep;
453 		while (pagep != NULL) {
454 			/* probably need to remove "db" */
455 			(void) ddi_dma_unbind_handle(pagep->upg_dmahdl);
456 			ddi_dma_free_handle(&pagep->upg_dmahdl);
457 			freerbuf(pagep->upg_buf);
458 			ddi_umem_free(pagep->upg_umemcookie);
459 			status = hermon_umap_db_find_nolock(instance,
460 			    HERMON_DBR_KEY(udbr->udbr_index,
461 			    pagep->upg_index), MLNX_UMAP_DBRMEM_RSRC,
462 			    &value, HERMON_UMAP_DB_REMOVE, &umapdb);
463 			if (status == DDI_SUCCESS)
464 				hermon_umap_db_free(umapdb);
465 			kmem_free(pagep->upg_free,
466 			    PAGESIZE / sizeof (hermon_dbr_t) / 8);
467 			nextp = pagep->upg_link;
468 			kmem_free(pagep, sizeof (*pagep));
469 			pagep = nextp;
470 		}
471 		next = udbr->udbr_link;
472 		kmem_free(udbr, sizeof (*udbr));
473 		udbr = next;
474 	}
475 	mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
476 	mutex_exit(&state->hs_dbr_lock);
477 }
478 
479 /*
480  * hermon_ah_alloc()
481  *    Context: Can be called only from user or kernel context.
482  */
483 int
484 hermon_ah_alloc(hermon_state_t *state, hermon_pdhdl_t pd,
485     ibt_adds_vect_t *attr_p, hermon_ahhdl_t *ahhdl, uint_t sleepflag)
486 {
487 	hermon_rsrc_t		*rsrc;
488 	hermon_hw_udav_t	*udav;
489 	hermon_ahhdl_t		ah;
490 	int			status;
491 
492 	/*
493 	 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to
494 	 * indicate that we wish to allocate an "invalid" (i.e. empty)
495 	 * address handle XXX
496 	 */
497 
498 	/* Validate that specified port number is legal */
499 	if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) {
500 		return (IBT_HCA_PORT_INVALID);
501 	}
502 
503 	/*
504 	 * Allocate the software structure for tracking the address handle
505 	 * (i.e. the Hermon Address Handle struct).
506 	 */
507 	status = hermon_rsrc_alloc(state, HERMON_AHHDL, 1, sleepflag, &rsrc);
508 	if (status != DDI_SUCCESS) {
509 		return (IBT_INSUFF_RESOURCE);
510 	}
511 	ah = (hermon_ahhdl_t)rsrc->hr_addr;
512 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
513 
514 	/* Increment the reference count on the protection domain (PD) */
515 	hermon_pd_refcnt_inc(pd);
516 
517 	udav = (hermon_hw_udav_t *)kmem_zalloc(sizeof (hermon_hw_udav_t),
518 	    KM_SLEEP);
519 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav))
520 
521 	/*
522 	 * Fill in the UDAV data. We first zero out the UDAV, then populate
523 	 * it by then calling hermon_set_addr_path() to fill in the common
524 	 * portions that can be pulled from the "ibt_adds_vect_t" passed in
525 	 */
526 	status = hermon_set_addr_path(state, attr_p,
527 	    (hermon_hw_addr_path_t *)udav, HERMON_ADDRPATH_UDAV);
528 	if (status != DDI_SUCCESS) {
529 		hermon_pd_refcnt_dec(pd);
530 		hermon_rsrc_free(state, &rsrc);
531 		return (status);
532 	}
533 	udav->pd	= pd->pd_pdnum;
534 	udav->sl	= attr_p->av_srvl;
535 
536 	/*
537 	 * Fill in the rest of the Hermon Address Handle struct.
538 	 *
539 	 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field
540 	 * here because we may need to return it later to the IBTF (as a
541 	 * result of a subsequent query operation).  Unlike the other UDAV
542 	 * parameters, the value of "av_dgid.gid_guid" is not always preserved.
543 	 * The reason for this is described in hermon_set_addr_path().
544 	 */
545 	ah->ah_rsrcp	 = rsrc;
546 	ah->ah_pdhdl	 = pd;
547 	ah->ah_udav	 = udav;
548 	ah->ah_save_guid = attr_p->av_dgid.gid_guid;
549 	*ahhdl = ah;
550 
551 	return (DDI_SUCCESS);
552 }
553 
554 
555 /*
556  * hermon_ah_free()
557  *    Context: Can be called only from user or kernel context.
558  */
559 /* ARGSUSED */
560 int
561 hermon_ah_free(hermon_state_t *state, hermon_ahhdl_t *ahhdl, uint_t sleepflag)
562 {
563 	hermon_rsrc_t		*rsrc;
564 	hermon_pdhdl_t		pd;
565 	hermon_ahhdl_t		ah;
566 
567 	/*
568 	 * Pull all the necessary information from the Hermon Address Handle
569 	 * struct.  This is necessary here because the resource for the
570 	 * AH is going to be freed up as part of this operation.
571 	 */
572 	ah    = *ahhdl;
573 	mutex_enter(&ah->ah_lock);
574 	rsrc  = ah->ah_rsrcp;
575 	pd    = ah->ah_pdhdl;
576 	mutex_exit(&ah->ah_lock);
577 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
578 
579 	/* Free the UDAV memory */
580 	kmem_free(ah->ah_udav, sizeof (hermon_hw_udav_t));
581 
582 	/* Decrement the reference count on the protection domain (PD) */
583 	hermon_pd_refcnt_dec(pd);
584 
585 	/* Free the Hermon Address Handle structure */
586 	hermon_rsrc_free(state, &rsrc);
587 
588 	/* Set the ahhdl pointer to NULL and return success */
589 	*ahhdl = NULL;
590 
591 	return (DDI_SUCCESS);
592 }
593 
594 
595 /*
596  * hermon_ah_query()
597  *    Context: Can be called from interrupt or base context.
598  */
599 /* ARGSUSED */
600 int
601 hermon_ah_query(hermon_state_t *state, hermon_ahhdl_t ah, hermon_pdhdl_t *pd,
602     ibt_adds_vect_t *attr_p)
603 {
604 	mutex_enter(&ah->ah_lock);
605 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p))
606 
607 	/*
608 	 * Pull the PD and UDAV from the Hermon Address Handle structure
609 	 */
610 	*pd = ah->ah_pdhdl;
611 
612 	/*
613 	 * Fill in "ibt_adds_vect_t".  We call hermon_get_addr_path() to fill
614 	 * the common portions that can be pulled from the UDAV we pass in.
615 	 *
616 	 * NOTE: We will also fill the "av_dgid.gid_guid" field from the
617 	 * "ah_save_guid" field we have previously saved away.  The reason
618 	 * for this is described in hermon_ah_alloc() and hermon_ah_modify().
619 	 */
620 	hermon_get_addr_path(state, (hermon_hw_addr_path_t *)ah->ah_udav,
621 	    attr_p, HERMON_ADDRPATH_UDAV);
622 
623 	attr_p->av_dgid.gid_guid = ah->ah_save_guid;
624 
625 	mutex_exit(&ah->ah_lock);
626 	return (DDI_SUCCESS);
627 }
628 
629 
630 /*
631  * hermon_ah_modify()
632  *    Context: Can be called from interrupt or base context.
633  */
634 /* ARGSUSED */
635 int
636 hermon_ah_modify(hermon_state_t *state, hermon_ahhdl_t ah,
637     ibt_adds_vect_t *attr_p)
638 {
639 	hermon_hw_udav_t	old_udav;
640 	uint64_t		data_old;
641 	int			status, size, i;
642 
643 	/* Validate that specified port number is legal */
644 	if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) {
645 		return (IBT_HCA_PORT_INVALID);
646 	}
647 
648 	mutex_enter(&ah->ah_lock);
649 
650 	/* Save a copy of the current UDAV data in old_udav. */
651 	bcopy(ah->ah_udav, &old_udav, sizeof (hermon_hw_udav_t));
652 
653 	/*
654 	 * Fill in the new UDAV with the caller's data, passed in via the
655 	 * "ibt_adds_vect_t" structure.
656 	 *
657 	 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid"
658 	 * field here (just as we did during hermon_ah_alloc()) because we
659 	 * may need to return it later to the IBTF (as a result of a
660 	 * subsequent query operation).  As explained in hermon_ah_alloc(),
661 	 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid"
662 	 * is not always preserved. The reason for this is described in
663 	 * hermon_set_addr_path().
664 	 */
665 	status = hermon_set_addr_path(state, attr_p,
666 	    (hermon_hw_addr_path_t *)ah->ah_udav, HERMON_ADDRPATH_UDAV);
667 	if (status != DDI_SUCCESS) {
668 		mutex_exit(&ah->ah_lock);
669 		return (status);
670 	}
671 	ah->ah_save_guid = attr_p->av_dgid.gid_guid;
672 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(ah->ah_udav)))
673 	ah->ah_udav->sl  = attr_p->av_srvl;
674 
675 	/*
676 	 * Copy changes into the new UDAV.
677 	 *    Note:  We copy in 64-bit chunks.  For the first two of these
678 	 *    chunks it is necessary to read the current contents of the
679 	 *    UDAV, mask off the modifiable portions (maintaining any
680 	 *    of the "reserved" portions), and then mask on the new data.
681 	 */
682 	size = sizeof (hermon_hw_udav_t) >> 3;
683 	for (i = 0; i < size; i++) {
684 		data_old = ((uint64_t *)&old_udav)[i];
685 
686 		/*
687 		 * Apply mask to change only the relevant values.
688 		 */
689 		if (i == 0) {
690 			data_old = data_old & HERMON_UDAV_MODIFY_MASK0;
691 		} else if (i == 1) {
692 			data_old = data_old & HERMON_UDAV_MODIFY_MASK1;
693 		} else {
694 			data_old = 0;
695 		}
696 
697 		/* Store the updated values to the UDAV */
698 		((uint64_t *)ah->ah_udav)[i] |= data_old;
699 	}
700 
701 	/*
702 	 * Put the valid PD number back into the UDAV entry, as it
703 	 * might have been clobbered above.
704 	 */
705 	ah->ah_udav->pd = old_udav.pd;
706 
707 
708 	mutex_exit(&ah->ah_lock);
709 	return (DDI_SUCCESS);
710 }
711 
712 /*
713  * hermon_mcg_attach()
714  *    Context: Can be called only from user or kernel context.
715  */
716 int
717 hermon_mcg_attach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid,
718     ib_lid_t lid)
719 {
720 	hermon_rsrc_t		*rsrc;
721 	hermon_hw_mcg_t		*mcg_entry;
722 	hermon_hw_mcg_qp_list_t	*mcg_entry_qplist;
723 	hermon_mcghdl_t		mcg, newmcg;
724 	uint64_t		mgid_hash;
725 	uint32_t		end_indx;
726 	int			status;
727 	uint_t			qp_found;
728 
729 	/*
730 	 * It is only allowed to attach MCG to UD queue pairs.  Verify
731 	 * that the intended QP is of the appropriate transport type
732 	 */
733 	if (qp->qp_serv_type != HERMON_QP_UD) {
734 		return (IBT_QP_SRV_TYPE_INVALID);
735 	}
736 
737 	/*
738 	 * Check for invalid Multicast DLID.  Specifically, all Multicast
739 	 * LIDs should be within a well defined range.  If the specified LID
740 	 * is outside of that range, then return an error.
741 	 */
742 	if (hermon_mlid_is_valid(lid) == 0) {
743 		return (IBT_MC_MLID_INVALID);
744 	}
745 	/*
746 	 * Check for invalid Multicast GID.  All Multicast GIDs should have
747 	 * a well-defined pattern of bits and flags that are allowable.  If
748 	 * the specified GID does not meet the criteria, then return an error.
749 	 */
750 	if (hermon_mgid_is_valid(gid) == 0) {
751 		return (IBT_MC_MGID_INVALID);
752 	}
753 
754 	/*
755 	 * Compute the MGID hash value.  Since the MCG table is arranged as
756 	 * a number of separate hash chains, this operation converts the
757 	 * specified MGID into the starting index of an entry in the hash
758 	 * table (i.e. the index for the start of the appropriate hash chain).
759 	 * Subsequent operations below will walk the chain searching for the
760 	 * right place to add this new QP.
761 	 */
762 	status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
763 	    &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT());
764 	if (status != HERMON_CMD_SUCCESS) {
765 		cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n",
766 		    status);
767 		if (status == HERMON_CMD_INVALID_STATUS) {
768 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
769 		}
770 		return (ibc_get_ci_failure(0));
771 	}
772 
773 	/*
774 	 * Grab the multicast group mutex.  Then grab the pre-allocated
775 	 * temporary buffer used for holding and/or modifying MCG entries.
776 	 * Zero out the temporary MCG entry before we begin.
777 	 */
778 	mutex_enter(&state->hs_mcglock);
779 	mcg_entry = state->hs_mcgtmp;
780 	mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry);
781 	bzero(mcg_entry, HERMON_MCGMEM_SZ(state));
782 
783 	/*
784 	 * Walk through the array of MCG entries starting at "mgid_hash".
785 	 * Try to find the appropriate place for this new QP to be added.
786 	 * This could happen when the first entry of the chain has MGID == 0
787 	 * (which means that the hash chain is empty), or because we find
788 	 * an entry with the same MGID (in which case we'll add the QP to
789 	 * that MCG), or because we come to the end of the chain (in which
790 	 * case this is the first QP being added to the multicast group that
791 	 * corresponds to the MGID.  The hermon_mcg_walk_mgid_hash() routine
792 	 * walks the list and returns an index into the MCG table.  The entry
793 	 * at this index is then checked to determine which case we have
794 	 * fallen into (see below).  Note:  We are using the "shadow" MCG
795 	 * list (of hermon_mcg_t structs) for this lookup because the real
796 	 * MCG entries are in hardware (and the lookup process would be much
797 	 * more time consuming).
798 	 */
799 	end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL);
800 	mcg	 = &state->hs_mcghdl[end_indx];
801 
802 	/*
803 	 * If MGID == 0, then the hash chain is empty.  Just fill in the
804 	 * current entry.  Note:  No need to allocate an MCG table entry
805 	 * as all the hash chain "heads" are already preallocated.
806 	 */
807 	if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) {
808 
809 		/* Fill in the current entry in the "shadow" MCG list */
810 		hermon_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL);
811 
812 		/*
813 		 * Try to add the new QP number to the list.  This (and the
814 		 * above) routine fills in a temporary MCG.  The "mcg_entry"
815 		 * and "mcg_entry_qplist" pointers simply point to different
816 		 * offsets within the same temporary copy of the MCG (for
817 		 * convenience).  Note:  If this fails, we need to invalidate
818 		 * the entries we've already put into the "shadow" list entry
819 		 * above.
820 		 */
821 		status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
822 		    &qp_found);
823 		if (status != DDI_SUCCESS) {
824 			bzero(mcg, sizeof (struct hermon_sw_mcg_list_s));
825 			mutex_exit(&state->hs_mcglock);
826 			return (status);
827 		}
828 		if (!qp_found)
829 			mcg_entry->member_cnt = (mcg->mcg_num_qps + 1);
830 			    /* set the member count */
831 
832 		/*
833 		 * Once the temporary MCG has been filled in, write the entry
834 		 * into the appropriate location in the Hermon MCG entry table.
835 		 * If it's successful, then drop the lock and return success.
836 		 * Note: In general, this operation shouldn't fail.  If it
837 		 * does, then it is an indication that something (probably in
838 		 * HW, but maybe in SW) has gone seriously wrong.  We still
839 		 * want to zero out the entries that we've filled in above
840 		 * (in the hermon_mcg_setup_new_hdr() routine).
841 		 */
842 		status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
843 		    HERMON_CMD_NOSLEEP_SPIN);
844 		if (status != HERMON_CMD_SUCCESS) {
845 			bzero(mcg, sizeof (struct hermon_sw_mcg_list_s));
846 			mutex_exit(&state->hs_mcglock);
847 			HERMON_WARNING(state, "failed to write MCG entry");
848 			cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
849 			    "%08x\n", status);
850 			if (status == HERMON_CMD_INVALID_STATUS) {
851 				hermon_fm_ereport(state, HCA_SYS_ERR,
852 				    HCA_ERR_SRV_LOST);
853 			}
854 			return (ibc_get_ci_failure(0));
855 		}
856 
857 		/*
858 		 * Now that we know all the Hermon firmware accesses have been
859 		 * successful, we update the "shadow" MCG entry by incrementing
860 		 * the "number of attached QPs" count.
861 		 *
862 		 * We increment only if the QP is not already part of the
863 		 * MCG by checking the 'qp_found' flag returned from the
864 		 * qplist_add above.
865 		 */
866 		if (!qp_found) {
867 			mcg->mcg_num_qps++;
868 
869 			/*
870 			 * Increment the refcnt for this QP.  Because the QP
871 			 * was added to this MCG, the refcnt must be
872 			 * incremented.
873 			 */
874 			hermon_qp_mcg_refcnt_inc(qp);
875 		}
876 
877 		/*
878 		 * We drop the lock and return success.
879 		 */
880 		mutex_exit(&state->hs_mcglock);
881 		return (DDI_SUCCESS);
882 	}
883 
884 	/*
885 	 * If the specified MGID matches the MGID in the current entry, then
886 	 * we need to try to add the QP to the current MCG entry.  In this
887 	 * case, it means that we need to read the existing MCG entry (into
888 	 * the temporary MCG), add the new QP number to the temporary entry
889 	 * (using the same method we used above), and write the entry back
890 	 * to the hardware (same as above).
891 	 */
892 	if ((mcg->mcg_mgid_h == gid.gid_prefix) &&
893 	    (mcg->mcg_mgid_l == gid.gid_guid)) {
894 
895 		/*
896 		 * Read the current MCG entry into the temporary MCG.  Note:
897 		 * In general, this operation shouldn't fail.  If it does,
898 		 * then it is an indication that something (probably in HW,
899 		 * but maybe in SW) has gone seriously wrong.
900 		 */
901 		status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
902 		    HERMON_CMD_NOSLEEP_SPIN);
903 		if (status != HERMON_CMD_SUCCESS) {
904 			mutex_exit(&state->hs_mcglock);
905 			HERMON_WARNING(state, "failed to read MCG entry");
906 			cmn_err(CE_CONT, "Hermon: READ_MGM command failed: "
907 			    "%08x\n", status);
908 			if (status == HERMON_CMD_INVALID_STATUS) {
909 				hermon_fm_ereport(state, HCA_SYS_ERR,
910 				    HCA_ERR_SRV_LOST);
911 			}
912 			return (ibc_get_ci_failure(0));
913 		}
914 
915 		/*
916 		 * Try to add the new QP number to the list.  This routine
917 		 * fills in the necessary pieces of the temporary MCG.  The
918 		 * "mcg_entry_qplist" pointer is used to point to the portion
919 		 * of the temporary MCG that holds the QP numbers.
920 		 *
921 		 * Note: hermon_mcg_qplist_add() returns SUCCESS if it
922 		 * already found the QP in the list.  In this case, the QP is
923 		 * not added on to the list again.  Check the flag 'qp_found'
924 		 * if this value is needed to be known.
925 		 *
926 		 */
927 		status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
928 		    &qp_found);
929 		if (status != DDI_SUCCESS) {
930 			mutex_exit(&state->hs_mcglock);
931 			return (status);
932 		}
933 		if (!qp_found)
934 			mcg_entry->member_cnt = (mcg->mcg_num_qps + 1);
935 			    /* set the member count */
936 
937 		/*
938 		 * Once the temporary MCG has been updated, write the entry
939 		 * into the appropriate location in the Hermon MCG entry table.
940 		 * If it's successful, then drop the lock and return success.
941 		 * Note: In general, this operation shouldn't fail.  If it
942 		 * does, then it is an indication that something (probably in
943 		 * HW, but maybe in SW) has gone seriously wrong.
944 		 */
945 		status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
946 		    HERMON_CMD_NOSLEEP_SPIN);
947 		if (status != HERMON_CMD_SUCCESS) {
948 			mutex_exit(&state->hs_mcglock);
949 			HERMON_WARNING(state, "failed to write MCG entry");
950 			cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
951 			    "%08x\n", status);
952 			if (status == HERMON_CMD_INVALID_STATUS) {
953 				hermon_fm_ereport(state, HCA_SYS_ERR,
954 				    HCA_ERR_SRV_LOST);
955 			}
956 			return (ibc_get_ci_failure(0));
957 		}
958 
959 		/*
960 		 * Now that we know all the Hermon firmware accesses have been
961 		 * successful, we update the current "shadow" MCG entry by
962 		 * incrementing the "number of attached QPs" count.
963 		 *
964 		 * We increment only if the QP is not already part of the
965 		 * MCG by checking the 'qp_found' flag returned
966 		 * hermon_mcg_walk_mgid_hashfrom the qplist_add above.
967 		 */
968 		if (!qp_found) {
969 			mcg->mcg_num_qps++;
970 
971 			/*
972 			 * Increment the refcnt for this QP.  Because the QP
973 			 * was added to this MCG, the refcnt must be
974 			 * incremented.
975 			 */
976 			hermon_qp_mcg_refcnt_inc(qp);
977 		}
978 
979 		/*
980 		 * We drop the lock and return success.
981 		 */
982 		mutex_exit(&state->hs_mcglock);
983 		return (DDI_SUCCESS);
984 	}
985 
986 	/*
987 	 * If we've reached here, then we're at the end of the hash chain.
988 	 * We need to allocate a new MCG entry, fill it in, write it to Hermon,
989 	 * and update the previous entry to link the new one to the end of the
990 	 * chain.
991 	 */
992 
993 	/*
994 	 * Allocate an MCG table entry.  This will be filled in with all
995 	 * the necessary parameters to define the multicast group.  Then it
996 	 * will be written to the hardware in the next-to-last step below.
997 	 */
998 	status = hermon_rsrc_alloc(state, HERMON_MCG, 1, HERMON_NOSLEEP, &rsrc);
999 	if (status != DDI_SUCCESS) {
1000 		mutex_exit(&state->hs_mcglock);
1001 		return (IBT_INSUFF_RESOURCE);
1002 	}
1003 
1004 	/*
1005 	 * Fill in the new entry in the "shadow" MCG list.  Note:  Just as
1006 	 * it does above, hermon_mcg_setup_new_hdr() also fills in a portion
1007 	 * of the temporary MCG entry (the rest of which will be filled in by
1008 	 * hermon_mcg_qplist_add() below)
1009 	 */
1010 	newmcg = &state->hs_mcghdl[rsrc->hr_indx];
1011 	hermon_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc);
1012 
1013 	/*
1014 	 * Try to add the new QP number to the list.  This routine fills in
1015 	 * the final necessary pieces of the temporary MCG.  The
1016 	 * "mcg_entry_qplist" pointer is used to point to the portion of the
1017 	 * temporary MCG that holds the QP numbers.  If we fail here, we
1018 	 * must undo the previous resource allocation.
1019 	 *
1020 	 * Note: hermon_mcg_qplist_add() can we return SUCCESS if it already
1021 	 * found the QP in the list.  In this case, the QP is not added on to
1022 	 * the list again.  Check the flag 'qp_found' if this value is needed
1023 	 * to be known.
1024 	 */
1025 	status = hermon_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp,
1026 	    &qp_found);
1027 	if (status != DDI_SUCCESS) {
1028 		bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1029 		hermon_rsrc_free(state, &rsrc);
1030 		mutex_exit(&state->hs_mcglock);
1031 		return (status);
1032 	}
1033 	mcg_entry->member_cnt = (newmcg->mcg_num_qps + 1);
1034 	    /* set the member count */
1035 
1036 	/*
1037 	 * Once the temporary MCG has been updated, write the entry into the
1038 	 * appropriate location in the Hermon MCG entry table.  If this is
1039 	 * successful, then we need to chain the previous entry to this one.
1040 	 * Note: In general, this operation shouldn't fail.  If it does, then
1041 	 * it is an indication that something (probably in HW, but maybe in
1042 	 * SW) has gone seriously wrong.
1043 	 */
1044 	status = hermon_write_mgm_cmd_post(state, mcg_entry, rsrc->hr_indx,
1045 	    HERMON_CMD_NOSLEEP_SPIN);
1046 	if (status != HERMON_CMD_SUCCESS) {
1047 		bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1048 		hermon_rsrc_free(state, &rsrc);
1049 		mutex_exit(&state->hs_mcglock);
1050 		HERMON_WARNING(state, "failed to write MCG entry");
1051 		cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1052 		    status);
1053 		if (status == HERMON_CMD_INVALID_STATUS) {
1054 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1055 		}
1056 		return (ibc_get_ci_failure(0));
1057 	}
1058 
1059 	/*
1060 	 * Now read the current MCG entry (the one previously at the end of
1061 	 * hash chain) into the temporary MCG.  We are going to update its
1062 	 * "next_gid_indx" now and write the entry back to the MCG table.
1063 	 * Note:  In general, this operation shouldn't fail.  If it does, then
1064 	 * it is an indication that something (probably in HW, but maybe in SW)
1065 	 * has gone seriously wrong.  We will free up the MCG entry resource,
1066 	 * but we will not undo the previously written MCG entry in the HW.
1067 	 * This is OK, though, because the MCG entry is not currently attached
1068 	 * to any hash chain.
1069 	 */
1070 	status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
1071 	    HERMON_CMD_NOSLEEP_SPIN);
1072 	if (status != HERMON_CMD_SUCCESS) {
1073 		bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1074 		hermon_rsrc_free(state, &rsrc);
1075 		mutex_exit(&state->hs_mcglock);
1076 		HERMON_WARNING(state, "failed to read MCG entry");
1077 		cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1078 		    status);
1079 		if (status == HERMON_CMD_INVALID_STATUS) {
1080 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1081 		}
1082 		return (ibc_get_ci_failure(0));
1083 	}
1084 
1085 	/*
1086 	 * Finally, we update the "next_gid_indx" field in the temporary MCG
1087 	 * and attempt to write the entry back into the Hermon MCG table.  If
1088 	 * this succeeds, then we update the "shadow" list to reflect the
1089 	 * change, drop the lock, and return success.  Note:  In general, this
1090 	 * operation shouldn't fail.  If it does, then it is an indication
1091 	 * that something (probably in HW, but maybe in SW) has gone seriously
1092 	 * wrong.  Just as we do above, we will free up the MCG entry resource,
1093 	 * but we will not try to undo the previously written MCG entry.  This
1094 	 * is OK, though, because (since we failed here to update the end of
1095 	 * the chain) that other entry is not currently attached to any chain.
1096 	 */
1097 	mcg_entry->next_gid_indx = rsrc->hr_indx;
1098 	status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
1099 	    HERMON_CMD_NOSLEEP_SPIN);
1100 	if (status != HERMON_CMD_SUCCESS) {
1101 		bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1102 		hermon_rsrc_free(state, &rsrc);
1103 		mutex_exit(&state->hs_mcglock);
1104 		HERMON_WARNING(state, "failed to write MCG entry");
1105 		cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1106 		    status);
1107 		if (status == HERMON_CMD_INVALID_STATUS) {
1108 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1109 		}
1110 		return (ibc_get_ci_failure(0));
1111 	}
1112 	mcg = &state->hs_mcghdl[end_indx];
1113 	mcg->mcg_next_indx = rsrc->hr_indx;
1114 
1115 	/*
1116 	 * Now that we know all the Hermon firmware accesses have been
1117 	 * successful, we update the new "shadow" MCG entry by incrementing
1118 	 * the "number of attached QPs" count.  Then we drop the lock and
1119 	 * return success.
1120 	 */
1121 	newmcg->mcg_num_qps++;
1122 
1123 	/*
1124 	 * Increment the refcnt for this QP.  Because the QP
1125 	 * was added to this MCG, the refcnt must be
1126 	 * incremented.
1127 	 */
1128 	hermon_qp_mcg_refcnt_inc(qp);
1129 
1130 	mutex_exit(&state->hs_mcglock);
1131 	return (DDI_SUCCESS);
1132 }
1133 
1134 
1135 /*
1136  * hermon_mcg_detach()
1137  *    Context: Can be called only from user or kernel context.
1138  */
1139 int
1140 hermon_mcg_detach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid,
1141     ib_lid_t lid)
1142 {
1143 	hermon_hw_mcg_t		*mcg_entry;
1144 	hermon_hw_mcg_qp_list_t	*mcg_entry_qplist;
1145 	hermon_mcghdl_t		mcg;
1146 	uint64_t		mgid_hash;
1147 	uint32_t		end_indx, prev_indx;
1148 	int			status;
1149 
1150 	/*
1151 	 * Check for invalid Multicast DLID.  Specifically, all Multicast
1152 	 * LIDs should be within a well defined range.  If the specified LID
1153 	 * is outside of that range, then return an error.
1154 	 */
1155 	if (hermon_mlid_is_valid(lid) == 0) {
1156 		return (IBT_MC_MLID_INVALID);
1157 	}
1158 
1159 	/*
1160 	 * Compute the MGID hash value.  As described above, the MCG table is
1161 	 * arranged as a number of separate hash chains.  This operation
1162 	 * converts the specified MGID into the starting index of an entry in
1163 	 * the hash table (i.e. the index for the start of the appropriate
1164 	 * hash chain).  Subsequent operations below will walk the chain
1165 	 * searching for a matching entry from which to attempt to remove
1166 	 * the specified QP.
1167 	 */
1168 	status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
1169 	    &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT());
1170 	if (status != HERMON_CMD_SUCCESS) {
1171 		cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n",
1172 		    status);
1173 		if (status == HERMON_CMD_INVALID_STATUS) {
1174 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1175 		}
1176 		return (ibc_get_ci_failure(0));
1177 	}
1178 
1179 	/*
1180 	 * Grab the multicast group mutex.  Then grab the pre-allocated
1181 	 * temporary buffer used for holding and/or modifying MCG entries.
1182 	 */
1183 	mutex_enter(&state->hs_mcglock);
1184 	mcg_entry = state->hs_mcgtmp;
1185 	mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry);
1186 
1187 	/*
1188 	 * Walk through the array of MCG entries starting at "mgid_hash".
1189 	 * Try to find an MCG entry with a matching MGID.  The
1190 	 * hermon_mcg_walk_mgid_hash() routine walks the list and returns an
1191 	 * index into the MCG table.  The entry at this index is checked to
1192 	 * determine whether it is a match or not.  If it is a match, then
1193 	 * we continue on to attempt to remove the QP from the MCG.  If it
1194 	 * is not a match (or not a valid MCG entry), then we return an error.
1195 	 */
1196 	end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx);
1197 	mcg	 = &state->hs_mcghdl[end_indx];
1198 
1199 	/*
1200 	 * If MGID == 0 (the hash chain is empty) or if the specified MGID
1201 	 * does not match the MGID in the current entry, then return
1202 	 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not
1203 	 * valid).
1204 	 */
1205 	if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) ||
1206 	    ((mcg->mcg_mgid_h != gid.gid_prefix) ||
1207 	    (mcg->mcg_mgid_l != gid.gid_guid))) {
1208 		mutex_exit(&state->hs_mcglock);
1209 		return (IBT_MC_MGID_INVALID);
1210 	}
1211 
1212 	/*
1213 	 * Read the current MCG entry into the temporary MCG.  Note: In
1214 	 * general, this operation shouldn't fail.  If it does, then it is
1215 	 * an indication that something (probably in HW, but maybe in SW)
1216 	 * has gone seriously wrong.
1217 	 */
1218 	status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
1219 	    HERMON_CMD_NOSLEEP_SPIN);
1220 	if (status != HERMON_CMD_SUCCESS) {
1221 		mutex_exit(&state->hs_mcglock);
1222 		HERMON_WARNING(state, "failed to read MCG entry");
1223 		cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1224 		    status);
1225 		if (status == HERMON_CMD_INVALID_STATUS) {
1226 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1227 		}
1228 		return (ibc_get_ci_failure(0));
1229 	}
1230 
1231 	/*
1232 	 * Search the QP number list for a match.  If a match is found, then
1233 	 * remove the entry from the QP list.  Otherwise, if no match is found,
1234 	 * return an error.
1235 	 */
1236 	status = hermon_mcg_qplist_remove(mcg, mcg_entry_qplist, qp);
1237 	if (status != DDI_SUCCESS) {
1238 		mutex_exit(&state->hs_mcglock);
1239 		return (status);
1240 	}
1241 
1242 	/*
1243 	 * Decrement the MCG count for this QP.  When the 'qp_mcg'
1244 	 * field becomes 0, then this QP is no longer a member of any
1245 	 * MCG.
1246 	 */
1247 	hermon_qp_mcg_refcnt_dec(qp);
1248 
1249 	/*
1250 	 * If the current MCG's QP number list is about to be made empty
1251 	 * ("mcg_num_qps" == 1), then remove the entry itself from the hash
1252 	 * chain.  Otherwise, just write the updated MCG entry back to the
1253 	 * hardware.  In either case, once we successfully update the hardware
1254 	 * chain, then we decrement the "shadow" list entry's "mcg_num_qps"
1255 	 * count (or zero out the entire "shadow" list entry) before returning
1256 	 * success.  Note:  Zeroing out the "shadow" list entry is done
1257 	 * inside of hermon_mcg_hash_list_remove().
1258 	 */
1259 	if (mcg->mcg_num_qps == 1) {
1260 
1261 		/* Remove an MCG entry from the hash chain */
1262 		status = hermon_mcg_hash_list_remove(state, end_indx, prev_indx,
1263 		    mcg_entry);
1264 		if (status != DDI_SUCCESS) {
1265 			mutex_exit(&state->hs_mcglock);
1266 			return (status);
1267 		}
1268 
1269 	} else {
1270 		/*
1271 		 * Write the updated MCG entry back to the Hermon MCG table.
1272 		 * If this succeeds, then we update the "shadow" list to
1273 		 * reflect the change (i.e. decrement the "mcg_num_qps"),
1274 		 * drop the lock, and return success.  Note:  In general,
1275 		 * this operation shouldn't fail.  If it does, then it is an
1276 		 * indication that something (probably in HW, but maybe in SW)
1277 		 * has gone seriously wrong.
1278 		 */
1279 		mcg_entry->member_cnt = (mcg->mcg_num_qps - 1);
1280 		status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
1281 		    HERMON_CMD_NOSLEEP_SPIN);
1282 		if (status != HERMON_CMD_SUCCESS) {
1283 			mutex_exit(&state->hs_mcglock);
1284 			HERMON_WARNING(state, "failed to write MCG entry");
1285 			cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
1286 			    "%08x\n", status);
1287 			if (status == HERMON_CMD_INVALID_STATUS) {
1288 				hermon_fm_ereport(state, HCA_SYS_ERR,
1289 				    HCA_ERR_SRV_LOST);
1290 			}
1291 			return (ibc_get_ci_failure(0));
1292 		}
1293 		mcg->mcg_num_qps--;
1294 	}
1295 
1296 	mutex_exit(&state->hs_mcglock);
1297 	return (DDI_SUCCESS);
1298 }
1299 
1300 /*
1301  * hermon_qp_mcg_refcnt_inc()
1302  *    Context: Can be called from interrupt or base context.
1303  */
1304 static void
1305 hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp)
1306 {
1307 	/* Increment the QP's MCG reference count */
1308 	mutex_enter(&qp->qp_lock);
1309 	qp->qp_mcg_refcnt++;
1310 	mutex_exit(&qp->qp_lock);
1311 }
1312 
1313 
1314 /*
1315  * hermon_qp_mcg_refcnt_dec()
1316  *    Context: Can be called from interrupt or base context.
1317  */
1318 static void
1319 hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp)
1320 {
1321 	/* Decrement the QP's MCG reference count */
1322 	mutex_enter(&qp->qp_lock);
1323 	qp->qp_mcg_refcnt--;
1324 	mutex_exit(&qp->qp_lock);
1325 }
1326 
1327 
1328 /*
1329  * hermon_mcg_qplist_add()
1330  *    Context: Can be called from interrupt or base context.
1331  */
1332 static int
1333 hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg,
1334     hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp,
1335     uint_t *qp_found)
1336 {
1337 	uint_t		qplist_indx;
1338 
1339 	ASSERT(MUTEX_HELD(&state->hs_mcglock));
1340 
1341 	qplist_indx = mcg->mcg_num_qps;
1342 
1343 	/*
1344 	 * Determine if we have exceeded the maximum number of QP per
1345 	 * multicast group.  If we have, then return an error
1346 	 */
1347 	if (qplist_indx >= state->hs_cfg_profile->cp_num_qp_per_mcg) {
1348 		return (IBT_HCA_MCG_QP_EXCEEDED);
1349 	}
1350 
1351 	/*
1352 	 * Determine if the QP is already attached to this MCG table.  If it
1353 	 * is, then we break out and treat this operation as a NO-OP
1354 	 */
1355 	for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps;
1356 	    qplist_indx++) {
1357 		if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) {
1358 			break;
1359 		}
1360 	}
1361 
1362 	/*
1363 	 * If the QP was already on the list, set 'qp_found' to TRUE.  We still
1364 	 * return SUCCESS in this case, but the qplist will not have been
1365 	 * updated because the QP was already on the list.
1366 	 */
1367 	if (qplist_indx < mcg->mcg_num_qps) {
1368 		*qp_found = 1;
1369 	} else {
1370 		/*
1371 		 * Otherwise, append the new QP number to the end of the
1372 		 * current QP list.  Note: We will increment the "mcg_num_qps"
1373 		 * field on the "shadow" MCG list entry later (after we know
1374 		 * that all necessary Hermon firmware accesses have been
1375 		 * successful).
1376 		 *
1377 		 * Set 'qp_found' to 0 so we know the QP was added on to the
1378 		 * list for sure.
1379 		 */
1380 		mcg_qplist[qplist_indx].qpn =
1381 		    (qp->qp_qpnum | HERMON_MCG_QPN_BLOCK_LB);
1382 		*qp_found = 0;
1383 	}
1384 
1385 	return (DDI_SUCCESS);
1386 }
1387 
1388 
1389 
1390 /*
1391  * hermon_mcg_qplist_remove()
1392  *    Context: Can be called from interrupt or base context.
1393  */
1394 static int
1395 hermon_mcg_qplist_remove(hermon_mcghdl_t mcg,
1396     hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp)
1397 {
1398 	uint_t		i, qplist_indx;
1399 
1400 	/*
1401 	 * Search the MCG QP list for a matching QPN.  When
1402 	 * it's found, we swap the last entry with the current
1403 	 * one, set the last entry to zero, decrement the last
1404 	 * entry, and return.  If it's not found, then it's
1405 	 * and error.
1406 	 */
1407 	qplist_indx = mcg->mcg_num_qps;
1408 	for (i = 0; i < qplist_indx; i++) {
1409 		if (mcg_qplist[i].qpn == qp->qp_qpnum) {
1410 			mcg_qplist[i] = mcg_qplist[qplist_indx - 1];
1411 			mcg_qplist[qplist_indx - 1].qpn = 0;
1412 
1413 			return (DDI_SUCCESS);
1414 		}
1415 	}
1416 
1417 	return (IBT_QP_HDL_INVALID);
1418 }
1419 
1420 
1421 /*
1422  * hermon_mcg_walk_mgid_hash()
1423  *    Context: Can be called from interrupt or base context.
1424  */
1425 static uint_t
1426 hermon_mcg_walk_mgid_hash(hermon_state_t *state, uint64_t start_indx,
1427     ib_gid_t mgid, uint_t *p_indx)
1428 {
1429 	hermon_mcghdl_t	curr_mcghdl;
1430 	uint_t		curr_indx, prev_indx;
1431 
1432 	ASSERT(MUTEX_HELD(&state->hs_mcglock));
1433 
1434 	/* Start at the head of the hash chain */
1435 	curr_indx   = (uint_t)start_indx;
1436 	prev_indx   = curr_indx;
1437 	curr_mcghdl = &state->hs_mcghdl[curr_indx];
1438 
1439 	/* If the first entry in the chain has MGID == 0, then stop */
1440 	if ((curr_mcghdl->mcg_mgid_h == 0) &&
1441 	    (curr_mcghdl->mcg_mgid_l == 0)) {
1442 		goto end_mgid_hash_walk;
1443 	}
1444 
1445 	/* If the first entry in the chain matches the MGID, then stop */
1446 	if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1447 	    (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1448 		goto end_mgid_hash_walk;
1449 	}
1450 
1451 	/* Otherwise, walk the hash chain looking for a match */
1452 	while (curr_mcghdl->mcg_next_indx != 0) {
1453 		prev_indx = curr_indx;
1454 		curr_indx = curr_mcghdl->mcg_next_indx;
1455 		curr_mcghdl = &state->hs_mcghdl[curr_indx];
1456 
1457 		if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1458 		    (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1459 			break;
1460 		}
1461 	}
1462 
1463 end_mgid_hash_walk:
1464 	/*
1465 	 * If necessary, return the index of the previous entry too.  This
1466 	 * is primarily used for detaching a QP from a multicast group.  It
1467 	 * may be necessary, in that case, to delete an MCG entry from the
1468 	 * hash chain and having the index of the previous entry is helpful.
1469 	 */
1470 	if (p_indx != NULL) {
1471 		*p_indx = prev_indx;
1472 	}
1473 	return (curr_indx);
1474 }
1475 
1476 
1477 /*
1478  * hermon_mcg_setup_new_hdr()
1479  *    Context: Can be called from interrupt or base context.
1480  */
1481 static void
1482 hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, hermon_hw_mcg_t *mcg_hdr,
1483     ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc)
1484 {
1485 	/*
1486 	 * Fill in the fields of the "shadow" entry used by software
1487 	 * to track MCG hardware entry
1488 	 */
1489 	mcg->mcg_mgid_h	   = mgid.gid_prefix;
1490 	mcg->mcg_mgid_l	   = mgid.gid_guid;
1491 	mcg->mcg_rsrcp	   = mcg_rsrc;
1492 	mcg->mcg_next_indx = 0;
1493 	mcg->mcg_num_qps   = 0;
1494 
1495 	/*
1496 	 * Fill the header fields of the MCG entry (in the temporary copy)
1497 	 */
1498 	mcg_hdr->mgid_h		= mgid.gid_prefix;
1499 	mcg_hdr->mgid_l		= mgid.gid_guid;
1500 	mcg_hdr->next_gid_indx	= 0;
1501 }
1502 
1503 
1504 /*
1505  * hermon_mcg_hash_list_remove()
1506  *    Context: Can be called only from user or kernel context.
1507  */
1508 static int
1509 hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx,
1510     uint_t prev_indx, hermon_hw_mcg_t *mcg_entry)
1511 {
1512 	hermon_mcghdl_t		curr_mcg, prev_mcg, next_mcg;
1513 	uint_t			next_indx;
1514 	int			status;
1515 
1516 	/* Get the pointer to "shadow" list for current entry */
1517 	curr_mcg = &state->hs_mcghdl[curr_indx];
1518 
1519 	/*
1520 	 * If this is the first entry on a hash chain, then attempt to replace
1521 	 * the entry with the next entry on the chain.  If there are no
1522 	 * subsequent entries on the chain, then this is the only entry and
1523 	 * should be invalidated.
1524 	 */
1525 	if (curr_indx == prev_indx) {
1526 
1527 		/*
1528 		 * If this is the only entry on the chain, then invalidate it.
1529 		 * Note:  Invalidating an MCG entry means writing all zeros
1530 		 * to the entry.  This is only necessary for those MCG
1531 		 * entries that are the "head" entries of the individual hash
1532 		 * chains.  Regardless of whether this operation returns
1533 		 * success or failure, return that result to the caller.
1534 		 */
1535 		next_indx = curr_mcg->mcg_next_indx;
1536 		if (next_indx == 0) {
1537 			status = hermon_mcg_entry_invalidate(state, mcg_entry,
1538 			    curr_indx);
1539 			bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1540 			return (status);
1541 		}
1542 
1543 		/*
1544 		 * Otherwise, this is just the first entry on the chain, so
1545 		 * grab the next one
1546 		 */
1547 		next_mcg = &state->hs_mcghdl[next_indx];
1548 
1549 		/*
1550 		 * Read the next MCG entry into the temporary MCG.  Note:
1551 		 * In general, this operation shouldn't fail.  If it does,
1552 		 * then it is an indication that something (probably in HW,
1553 		 * but maybe in SW) has gone seriously wrong.
1554 		 */
1555 		status = hermon_read_mgm_cmd_post(state, mcg_entry, next_indx,
1556 		    HERMON_CMD_NOSLEEP_SPIN);
1557 		if (status != HERMON_CMD_SUCCESS) {
1558 			HERMON_WARNING(state, "failed to read MCG entry");
1559 			cmn_err(CE_CONT, "Hermon: READ_MGM command failed: "
1560 			    "%08x\n", status);
1561 			if (status == HERMON_CMD_INVALID_STATUS) {
1562 				hermon_fm_ereport(state, HCA_SYS_ERR,
1563 				    HCA_ERR_SRV_LOST);
1564 			}
1565 			return (ibc_get_ci_failure(0));
1566 		}
1567 
1568 		/*
1569 		 * Copy/Write the temporary MCG back to the hardware MCG list
1570 		 * using the current index.  This essentially removes the
1571 		 * current MCG entry from the list by writing over it with
1572 		 * the next one.  If this is successful, then we can do the
1573 		 * same operation for the "shadow" list.  And we can also
1574 		 * free up the Hermon MCG entry resource that was associated
1575 		 * with the (old) next entry.  Note:  In general, this
1576 		 * operation shouldn't fail.  If it does, then it is an
1577 		 * indication that something (probably in HW, but maybe in SW)
1578 		 * has gone seriously wrong.
1579 		 */
1580 		status = hermon_write_mgm_cmd_post(state, mcg_entry, curr_indx,
1581 		    HERMON_CMD_NOSLEEP_SPIN);
1582 		if (status != HERMON_CMD_SUCCESS) {
1583 			HERMON_WARNING(state, "failed to write MCG entry");
1584 			cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
1585 			    "%08x\n", status);
1586 			if (status == HERMON_CMD_INVALID_STATUS) {
1587 				hermon_fm_ereport(state, HCA_SYS_ERR,
1588 				    HCA_ERR_SRV_LOST);
1589 			}
1590 			return (ibc_get_ci_failure(0));
1591 		}
1592 
1593 		/*
1594 		 * Copy all the software tracking information from the next
1595 		 * entry on the "shadow" MCG list into the current entry on
1596 		 * the list.  Then invalidate (zero out) the other "shadow"
1597 		 * list entry.
1598 		 */
1599 		bcopy(next_mcg, curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1600 		bzero(next_mcg, sizeof (struct hermon_sw_mcg_list_s));
1601 
1602 		/*
1603 		 * Free up the Hermon MCG entry resource used by the "next"
1604 		 * MCG entry.  That resource is no longer needed by any
1605 		 * MCG entry which is first on a hash chain (like the "next"
1606 		 * entry has just become).
1607 		 */
1608 		hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1609 
1610 		return (DDI_SUCCESS);
1611 	}
1612 
1613 	/*
1614 	 * Else if this is the last entry on the hash chain (or a middle
1615 	 * entry, then we update the previous entry's "next_gid_index" field
1616 	 * to make it point instead to the next entry on the chain.  By
1617 	 * skipping over the removed entry in this way, we can then free up
1618 	 * any resources associated with the current entry.  Note:  We don't
1619 	 * need to invalidate the "skipped over" hardware entry because it
1620 	 * will no be longer connected to any hash chains, and if/when it is
1621 	 * finally re-used, it will be written with entirely new values.
1622 	 */
1623 
1624 	/*
1625 	 * Read the next MCG entry into the temporary MCG.  Note:  In general,
1626 	 * this operation shouldn't fail.  If it does, then it is an
1627 	 * indication that something (probably in HW, but maybe in SW) has
1628 	 * gone seriously wrong.
1629 	 */
1630 	status = hermon_read_mgm_cmd_post(state, mcg_entry, prev_indx,
1631 	    HERMON_CMD_NOSLEEP_SPIN);
1632 	if (status != HERMON_CMD_SUCCESS) {
1633 		HERMON_WARNING(state, "failed to read MCG entry");
1634 		cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1635 		    status);
1636 		if (status == HERMON_CMD_INVALID_STATUS) {
1637 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1638 		}
1639 		return (ibc_get_ci_failure(0));
1640 	}
1641 
1642 	/*
1643 	 * Finally, we update the "next_gid_indx" field in the temporary MCG
1644 	 * and attempt to write the entry back into the Hermon MCG table.  If
1645 	 * this succeeds, then we update the "shadow" list to reflect the
1646 	 * change, free up the Hermon MCG entry resource that was associated
1647 	 * with the current entry, and return success.  Note:  In general,
1648 	 * this operation shouldn't fail.  If it does, then it is an indication
1649 	 * that something (probably in HW, but maybe in SW) has gone seriously
1650 	 * wrong.
1651 	 */
1652 	mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx;
1653 	status = hermon_write_mgm_cmd_post(state, mcg_entry, prev_indx,
1654 	    HERMON_CMD_NOSLEEP_SPIN);
1655 	if (status != HERMON_CMD_SUCCESS) {
1656 		HERMON_WARNING(state, "failed to write MCG entry");
1657 		cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1658 		    status);
1659 		if (status == HERMON_CMD_INVALID_STATUS) {
1660 			hermon_fm_ereport(state, HCA_SYS_ERR,
1661 			    HCA_ERR_SRV_LOST);
1662 		}
1663 		return (ibc_get_ci_failure(0));
1664 	}
1665 
1666 	/*
1667 	 * Get the pointer to the "shadow" MCG list entry for the previous
1668 	 * MCG.  Update its "mcg_next_indx" to point to the next entry
1669 	 * the one after the current entry. Note:  This next index may be
1670 	 * zero, indicating the end of the list.
1671 	 */
1672 	prev_mcg = &state->hs_mcghdl[prev_indx];
1673 	prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx;
1674 
1675 	/*
1676 	 * Free up the Hermon MCG entry resource used by the current entry.
1677 	 * This resource is no longer needed because the chain now skips over
1678 	 * the current entry.  Then invalidate (zero out) the current "shadow"
1679 	 * list entry.
1680 	 */
1681 	hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1682 	bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1683 
1684 	return (DDI_SUCCESS);
1685 }
1686 
1687 
1688 /*
1689  * hermon_mcg_entry_invalidate()
1690  *    Context: Can be called only from user or kernel context.
1691  */
1692 static int
1693 hermon_mcg_entry_invalidate(hermon_state_t *state, hermon_hw_mcg_t *mcg_entry,
1694     uint_t indx)
1695 {
1696 	int		status;
1697 
1698 	/*
1699 	 * Invalidate the hardware MCG entry by zeroing out this temporary
1700 	 * MCG and writing it the the hardware.  Note: In general, this
1701 	 * operation shouldn't fail.  If it does, then it is an indication
1702 	 * that something (probably in HW, but maybe in SW) has gone seriously
1703 	 * wrong.
1704 	 */
1705 	bzero(mcg_entry, HERMON_MCGMEM_SZ(state));
1706 	status = hermon_write_mgm_cmd_post(state, mcg_entry, indx,
1707 	    HERMON_CMD_NOSLEEP_SPIN);
1708 	if (status != HERMON_CMD_SUCCESS) {
1709 		HERMON_WARNING(state, "failed to write MCG entry");
1710 		cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1711 		    status);
1712 		if (status == HERMON_CMD_INVALID_STATUS) {
1713 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1714 		}
1715 		return (ibc_get_ci_failure(0));
1716 	}
1717 
1718 	return (DDI_SUCCESS);
1719 }
1720 
1721 
1722 /*
1723  * hermon_mgid_is_valid()
1724  *    Context: Can be called from interrupt or base context.
1725  */
1726 static int
1727 hermon_mgid_is_valid(ib_gid_t gid)
1728 {
1729 	uint_t		topbits, flags, scope;
1730 
1731 	/*
1732 	 * According to IBA 1.1 specification (section 4.1.1) a valid
1733 	 * "multicast GID" must have its top eight bits set to all ones
1734 	 */
1735 	topbits = (gid.gid_prefix >> HERMON_MCG_TOPBITS_SHIFT) &
1736 	    HERMON_MCG_TOPBITS_MASK;
1737 	if (topbits != HERMON_MCG_TOPBITS) {
1738 		return (0);
1739 	}
1740 
1741 	/*
1742 	 * The next 4 bits are the "flag" bits.  These are valid only
1743 	 * if they are "0" (which correspond to permanently assigned/
1744 	 * "well-known" multicast GIDs) or "1" (for so-called "transient"
1745 	 * multicast GIDs).  All other values are reserved.
1746 	 */
1747 	flags = (gid.gid_prefix >> HERMON_MCG_FLAGS_SHIFT) &
1748 	    HERMON_MCG_FLAGS_MASK;
1749 	if (!((flags == HERMON_MCG_FLAGS_PERM) ||
1750 	    (flags == HERMON_MCG_FLAGS_NONPERM))) {
1751 		return (0);
1752 	}
1753 
1754 	/*
1755 	 * The next 4 bits are the "scope" bits.  These are valid only
1756 	 * if they are "2" (Link-local), "5" (Site-local), "8"
1757 	 * (Organization-local) or "E" (Global).  All other values
1758 	 * are reserved (or currently unassigned).
1759 	 */
1760 	scope = (gid.gid_prefix >> HERMON_MCG_SCOPE_SHIFT) &
1761 	    HERMON_MCG_SCOPE_MASK;
1762 	if (!((scope == HERMON_MCG_SCOPE_LINKLOC) ||
1763 	    (scope == HERMON_MCG_SCOPE_SITELOC)	 ||
1764 	    (scope == HERMON_MCG_SCOPE_ORGLOC)	 ||
1765 	    (scope == HERMON_MCG_SCOPE_GLOBAL))) {
1766 		return (0);
1767 	}
1768 
1769 	/*
1770 	 * If it passes all of the above checks, then we will consider it
1771 	 * a valid multicast GID.
1772 	 */
1773 	return (1);
1774 }
1775 
1776 
1777 /*
1778  * hermon_mlid_is_valid()
1779  *    Context: Can be called from interrupt or base context.
1780  */
1781 static int
1782 hermon_mlid_is_valid(ib_lid_t lid)
1783 {
1784 	/*
1785 	 * According to IBA 1.1 specification (section 4.1.1) a valid
1786 	 * "multicast DLID" must be between 0xC000 and 0xFFFE.
1787 	 */
1788 	if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) {
1789 		return (0);
1790 	}
1791 
1792 	return (1);
1793 }
1794 
1795 
1796 /*
1797  * hermon_pd_alloc()
1798  *    Context: Can be called only from user or kernel context.
1799  */
1800 int
1801 hermon_pd_alloc(hermon_state_t *state, hermon_pdhdl_t *pdhdl, uint_t sleepflag)
1802 {
1803 	hermon_rsrc_t	*rsrc;
1804 	hermon_pdhdl_t	pd;
1805 	int		status;
1806 
1807 	/*
1808 	 * Allocate the software structure for tracking the protection domain
1809 	 * (i.e. the Hermon Protection Domain handle).  By default each PD
1810 	 * structure will have a unique PD number assigned to it.  All that
1811 	 * is necessary is for software to initialize the PD reference count
1812 	 * (to zero) and return success.
1813 	 */
1814 	status = hermon_rsrc_alloc(state, HERMON_PDHDL, 1, sleepflag, &rsrc);
1815 	if (status != DDI_SUCCESS) {
1816 		return (IBT_INSUFF_RESOURCE);
1817 	}
1818 	pd = (hermon_pdhdl_t)rsrc->hr_addr;
1819 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd))
1820 
1821 	pd->pd_refcnt = 0;
1822 	*pdhdl = pd;
1823 
1824 	return (DDI_SUCCESS);
1825 }
1826 
1827 
1828 /*
1829  * hermon_pd_free()
1830  *    Context: Can be called only from user or kernel context.
1831  */
1832 int
1833 hermon_pd_free(hermon_state_t *state, hermon_pdhdl_t *pdhdl)
1834 {
1835 	hermon_rsrc_t	*rsrc;
1836 	hermon_pdhdl_t	pd;
1837 
1838 	/*
1839 	 * Pull all the necessary information from the Hermon Protection Domain
1840 	 * handle.  This is necessary here because the resource for the
1841 	 * PD is going to be freed up as part of this operation.
1842 	 */
1843 	pd   = *pdhdl;
1844 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd))
1845 	rsrc = pd->pd_rsrcp;
1846 
1847 	/*
1848 	 * Check the PD reference count.  If the reference count is non-zero,
1849 	 * then it means that this protection domain is still referenced by
1850 	 * some memory region, queue pair, address handle, or other IB object
1851 	 * If it is non-zero, then return an error.  Otherwise, free the
1852 	 * Hermon resource and return success.
1853 	 */
1854 	if (pd->pd_refcnt != 0) {
1855 		return (IBT_PD_IN_USE);
1856 	}
1857 
1858 	/* Free the Hermon Protection Domain handle */
1859 	hermon_rsrc_free(state, &rsrc);
1860 
1861 	/* Set the pdhdl pointer to NULL and return success */
1862 	*pdhdl = (hermon_pdhdl_t)NULL;
1863 
1864 	return (DDI_SUCCESS);
1865 }
1866 
1867 
1868 /*
1869  * hermon_pd_refcnt_inc()
1870  *    Context: Can be called from interrupt or base context.
1871  */
1872 void
1873 hermon_pd_refcnt_inc(hermon_pdhdl_t pd)
1874 {
1875 	/* Increment the protection domain's reference count */
1876 	atomic_inc_32(&pd->pd_refcnt);
1877 }
1878 
1879 
1880 /*
1881  * hermon_pd_refcnt_dec()
1882  *    Context: Can be called from interrupt or base context.
1883  */
1884 void
1885 hermon_pd_refcnt_dec(hermon_pdhdl_t pd)
1886 {
1887 	/* Decrement the protection domain's reference count */
1888 	atomic_dec_32(&pd->pd_refcnt);
1889 }
1890 
1891 
1892 /*
1893  * hermon_port_query()
1894  *    Context: Can be called only from user or kernel context.
1895  */
1896 int
1897 hermon_port_query(hermon_state_t *state, uint_t port, ibt_hca_portinfo_t *pi)
1898 {
1899 	sm_portinfo_t		portinfo;
1900 	sm_guidinfo_t		guidinfo;
1901 	sm_pkey_table_t		pkeytable;
1902 	ib_gid_t		*sgid;
1903 	uint_t			sgid_max, pkey_max, tbl_size;
1904 	int			i, j, indx, status;
1905 	ib_pkey_t		*pkeyp;
1906 	ib_guid_t		*guidp;
1907 
1908 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi))
1909 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state))
1910 
1911 	/* Validate that specified port number is legal */
1912 	if (!hermon_portnum_is_valid(state, port)) {
1913 		return (IBT_HCA_PORT_INVALID);
1914 	}
1915 	pkeyp = state->hs_pkey[port - 1];
1916 	guidp = state->hs_guid[port - 1];
1917 
1918 	/*
1919 	 * We use the Hermon MAD_IFC command to post a GetPortInfo MAD
1920 	 * to the firmware (for the specified port number).  This returns
1921 	 * a full PortInfo MAD (in "portinfo") which we subsequently
1922 	 * parse to fill in the "ibt_hca_portinfo_t" structure returned
1923 	 * to the IBTF.
1924 	 */
1925 	status = hermon_getportinfo_cmd_post(state, port,
1926 	    HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
1927 	if (status != HERMON_CMD_SUCCESS) {
1928 		cmn_err(CE_CONT, "Hermon: GetPortInfo (port %02d) command "
1929 		    "failed: %08x\n", port, status);
1930 		if (status == HERMON_CMD_INVALID_STATUS) {
1931 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1932 		}
1933 		return (ibc_get_ci_failure(0));
1934 	}
1935 
1936 	/*
1937 	 * Parse the PortInfo MAD and fill in the IBTF structure
1938 	 */
1939 	pi->p_base_lid		= portinfo.LID;
1940 	pi->p_qkey_violations	= portinfo.Q_KeyViolations;
1941 	pi->p_pkey_violations	= portinfo.P_KeyViolations;
1942 	pi->p_sm_sl		= portinfo.MasterSMSL;
1943 	pi->p_sm_lid		= portinfo.MasterSMLID;
1944 	pi->p_linkstate		= portinfo.PortState;
1945 	pi->p_port_num		= portinfo.LocalPortNum;
1946 	pi->p_phys_state	= portinfo.PortPhysicalState;
1947 	pi->p_width_supported	= portinfo.LinkWidthSupported;
1948 	pi->p_width_enabled	= portinfo.LinkWidthEnabled;
1949 	pi->p_width_active	= portinfo.LinkWidthActive;
1950 	pi->p_speed_supported	= portinfo.LinkSpeedSupported;
1951 	pi->p_speed_enabled	= portinfo.LinkSpeedEnabled;
1952 	pi->p_speed_active	= portinfo.LinkSpeedActive;
1953 	pi->p_mtu		= portinfo.MTUCap;
1954 	pi->p_lmc		= portinfo.LMC;
1955 	pi->p_max_vl		= portinfo.VLCap;
1956 	pi->p_subnet_timeout	= portinfo.SubnetTimeOut;
1957 	pi->p_msg_sz		= ((uint32_t)1 << HERMON_QP_LOG_MAX_MSGSZ);
1958 	tbl_size = state->hs_cfg_profile->cp_log_max_gidtbl;
1959 	pi->p_sgid_tbl_sz	= (1 << tbl_size);
1960 	tbl_size = state->hs_cfg_profile->cp_log_max_pkeytbl;
1961 	pi->p_pkey_tbl_sz	= (1 << tbl_size);
1962 	state->hs_sn_prefix[port - 1] = portinfo.GidPrefix;
1963 
1964 	/*
1965 	 * Convert InfiniBand-defined port capability flags to the format
1966 	 * specified by the IBTF
1967 	 */
1968 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM)
1969 		pi->p_capabilities |= IBT_PORT_CAP_SM;
1970 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED)
1971 		pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED;
1972 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD)
1973 		pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL;
1974 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD)
1975 		pi->p_capabilities |= IBT_PORT_CAP_DM;
1976 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD)
1977 		pi->p_capabilities |= IBT_PORT_CAP_VENDOR;
1978 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_CLNT_REREG_SUPPD)
1979 		pi->p_capabilities |= IBT_PORT_CAP_CLNT_REREG;
1980 
1981 	/*
1982 	 * Fill in the SGID table.  Since the only access to the Hermon
1983 	 * GID tables is through the firmware's MAD_IFC interface, we
1984 	 * post as many GetGUIDInfo MADs as necessary to read in the entire
1985 	 * contents of the SGID table (for the specified port).  Note:  The
1986 	 * GetGUIDInfo command only gets eight GUIDs per operation.  These
1987 	 * GUIDs are then appended to the GID prefix for the port (from the
1988 	 * GetPortInfo above) to form the entire SGID table.
1989 	 */
1990 	for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) {
1991 		status = hermon_getguidinfo_cmd_post(state, port, i >> 3,
1992 		    HERMON_SLEEPFLAG_FOR_CONTEXT(), &guidinfo);
1993 		if (status != HERMON_CMD_SUCCESS) {
1994 			cmn_err(CE_CONT, "Hermon: GetGUIDInfo (port %02d) "
1995 			    "command failed: %08x\n", port, status);
1996 			if (status == HERMON_CMD_INVALID_STATUS) {
1997 				hermon_fm_ereport(state, HCA_SYS_ERR,
1998 				    HCA_ERR_SRV_LOST);
1999 			}
2000 			return (ibc_get_ci_failure(0));
2001 		}
2002 
2003 		/* Figure out how many of the entries are valid */
2004 		sgid_max = min((pi->p_sgid_tbl_sz - i), 8);
2005 		for (j = 0; j < sgid_max; j++) {
2006 			indx = (i + j);
2007 			sgid = &pi->p_sgid_tbl[indx];
2008 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid))
2009 			sgid->gid_prefix = portinfo.GidPrefix;
2010 			guidp[indx] = sgid->gid_guid =
2011 			    guidinfo.GUIDBlocks[j];
2012 		}
2013 	}
2014 
2015 	/*
2016 	 * Fill in the PKey table.  Just as for the GID tables above, the
2017 	 * only access to the Hermon PKey tables is through the firmware's
2018 	 * MAD_IFC interface.  We post as many GetPKeyTable MADs as necessary
2019 	 * to read in the entire contents of the PKey table (for the specified
2020 	 * port).  Note:  The GetPKeyTable command only gets 32 PKeys per
2021 	 * operation.
2022 	 */
2023 	for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) {
2024 		status = hermon_getpkeytable_cmd_post(state, port, i,
2025 		    HERMON_SLEEPFLAG_FOR_CONTEXT(), &pkeytable);
2026 		if (status != HERMON_CMD_SUCCESS) {
2027 			cmn_err(CE_CONT, "Hermon: GetPKeyTable (port %02d) "
2028 			    "command failed: %08x\n", port, status);
2029 			if (status == HERMON_CMD_INVALID_STATUS) {
2030 				hermon_fm_ereport(state, HCA_SYS_ERR,
2031 				    HCA_ERR_SRV_LOST);
2032 			}
2033 			return (ibc_get_ci_failure(0));
2034 		}
2035 
2036 		/* Figure out how many of the entries are valid */
2037 		pkey_max = min((pi->p_pkey_tbl_sz - i), 32);
2038 		for (j = 0; j < pkey_max; j++) {
2039 			indx = (i + j);
2040 			pkeyp[indx] = pi->p_pkey_tbl[indx] =
2041 			    pkeytable.P_KeyTableBlocks[j];
2042 		}
2043 	}
2044 
2045 	return (DDI_SUCCESS);
2046 }
2047 
2048 
2049 /*
2050  * hermon_port_modify()
2051  *    Context: Can be called only from user or kernel context.
2052  */
2053 /* ARGSUSED */
2054 int
2055 hermon_port_modify(hermon_state_t *state, uint8_t port,
2056     ibt_port_modify_flags_t flags, uint8_t init_type)
2057 {
2058 	sm_portinfo_t		portinfo;
2059 	uint32_t		capmask;
2060 	int			status;
2061 	hermon_hw_set_port_t	set_port;
2062 
2063 	/*
2064 	 * Return an error if either of the unsupported flags are set
2065 	 */
2066 	if ((flags & IBT_PORT_SHUTDOWN) ||
2067 	    (flags & IBT_PORT_SET_INIT_TYPE)) {
2068 		return (IBT_NOT_SUPPORTED);
2069 	}
2070 
2071 	bzero(&set_port, sizeof (set_port));
2072 
2073 	/*
2074 	 * Determine whether we are trying to reset the QKey counter
2075 	 */
2076 	if (flags & IBT_PORT_RESET_QKEY)
2077 		set_port.rqk = 1;
2078 
2079 	/* Validate that specified port number is legal */
2080 	if (!hermon_portnum_is_valid(state, port)) {
2081 		return (IBT_HCA_PORT_INVALID);
2082 	}
2083 
2084 	/*
2085 	 * Use the Hermon MAD_IFC command to post a GetPortInfo MAD to the
2086 	 * firmware (for the specified port number).  This returns a full
2087 	 * PortInfo MAD (in "portinfo") from which we pull the current
2088 	 * capability mask.  We then modify the capability mask as directed
2089 	 * by the "pmod_flags" field, and write the updated capability mask
2090 	 * using the Hermon SET_IB command (below).
2091 	 */
2092 	status = hermon_getportinfo_cmd_post(state, port,
2093 	    HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
2094 	if (status != HERMON_CMD_SUCCESS) {
2095 		if (status == HERMON_CMD_INVALID_STATUS) {
2096 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2097 		}
2098 		return (ibc_get_ci_failure(0));
2099 	}
2100 
2101 	/*
2102 	 * Convert InfiniBand-defined port capability flags to the format
2103 	 * specified by the IBTF.  Specifically, we modify the capability
2104 	 * mask based on the specified values.
2105 	 */
2106 	capmask = portinfo.CapabilityMask;
2107 
2108 	if (flags & IBT_PORT_RESET_SM)
2109 		capmask &= ~SM_CAP_MASK_IS_SM;
2110 	else if (flags & IBT_PORT_SET_SM)
2111 		capmask |= SM_CAP_MASK_IS_SM;
2112 
2113 	if (flags & IBT_PORT_RESET_SNMP)
2114 		capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD;
2115 	else if (flags & IBT_PORT_SET_SNMP)
2116 		capmask |= SM_CAP_MASK_IS_SNMP_SUPPD;
2117 
2118 	if (flags & IBT_PORT_RESET_DEVMGT)
2119 		capmask &= ~SM_CAP_MASK_IS_DM_SUPPD;
2120 	else if (flags & IBT_PORT_SET_DEVMGT)
2121 		capmask |= SM_CAP_MASK_IS_DM_SUPPD;
2122 
2123 	if (flags & IBT_PORT_RESET_VENDOR)
2124 		capmask &= ~SM_CAP_MASK_IS_VM_SUPPD;
2125 	else if (flags & IBT_PORT_SET_VENDOR)
2126 		capmask |= SM_CAP_MASK_IS_VM_SUPPD;
2127 
2128 	set_port.cap_mask = capmask;
2129 
2130 	/*
2131 	 * Use the Hermon SET_PORT command to update the capability mask and
2132 	 * (possibly) reset the QKey violation counter for the specified port.
2133 	 * Note: In general, this operation shouldn't fail.  If it does, then
2134 	 * it is an indication that something (probably in HW, but maybe in
2135 	 * SW) has gone seriously wrong.
2136 	 */
2137 	status = hermon_set_port_cmd_post(state, &set_port, port,
2138 	    HERMON_SLEEPFLAG_FOR_CONTEXT());
2139 	if (status != HERMON_CMD_SUCCESS) {
2140 		HERMON_WARNING(state, "failed to modify port capabilities");
2141 		cmn_err(CE_CONT, "Hermon: SET_IB (port %02d) command failed: "
2142 		    "%08x\n", port, status);
2143 		if (status == HERMON_CMD_INVALID_STATUS) {
2144 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2145 		}
2146 		return (ibc_get_ci_failure(0));
2147 	}
2148 
2149 	return (DDI_SUCCESS);
2150 }
2151 
2152 
2153 /*
2154  * hermon_set_addr_path()
2155  *    Context: Can be called from interrupt or base context.
2156  *
2157  * Note: This routine is used for two purposes.  It is used to fill in the
2158  * Hermon UDAV fields, and it is used to fill in the address path information
2159  * for QPs.  Because the two Hermon structures are similar, common fields can
2160  * be filled in here.  Because they are different, however, we pass
2161  * an additional flag to indicate which type is being filled and do each one
2162  * uniquely
2163  */
2164 
2165 int hermon_srate_override = -1;	/* allows ease of testing */
2166 
2167 int
2168 hermon_set_addr_path(hermon_state_t *state, ibt_adds_vect_t *av,
2169     hermon_hw_addr_path_t *path, uint_t type)
2170 {
2171 	uint_t		gidtbl_sz;
2172 	hermon_hw_udav_t *udav;
2173 
2174 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av))
2175 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path))
2176 
2177 	udav = (hermon_hw_udav_t *)(void *)path;
2178 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav))
2179 	path->mlid	= av->av_src_path;
2180 	path->rlid	= av->av_dlid;
2181 
2182 	switch (av->av_srate) {
2183 	case IBT_SRATE_2:	/* 1xSDR-2.5Gb/s injection rate */
2184 		path->max_stat_rate = 7; break;
2185 	case IBT_SRATE_10:	/* 4xSDR-10.0Gb/s injection rate */
2186 		path->max_stat_rate = 8; break;
2187 	case IBT_SRATE_30:	/* 12xSDR-30Gb/s injection rate */
2188 		path->max_stat_rate = 9; break;
2189 	case IBT_SRATE_5:	/* 1xDDR-5Gb/s injection rate */
2190 		path->max_stat_rate = 10; break;
2191 	case IBT_SRATE_20:	/* 4xDDR-20Gb/s injection rate */
2192 		path->max_stat_rate = 11; break;
2193 	case IBT_SRATE_40:	/* 4xQDR-40Gb/s injection rate */
2194 		path->max_stat_rate = 12; break;
2195 	case IBT_SRATE_60:	/* 12xDDR-60Gb/s injection rate */
2196 		path->max_stat_rate = 13; break;
2197 	case IBT_SRATE_80:	/* 8xQDR-80Gb/s injection rate */
2198 		path->max_stat_rate = 14; break;
2199 	case IBT_SRATE_120:	/* 12xQDR-120Gb/s injection rate */
2200 		path->max_stat_rate = 15; break;
2201 	case IBT_SRATE_NOT_SPECIFIED:	/* Max */
2202 		path->max_stat_rate = 0; break;
2203 	default:
2204 		return (IBT_STATIC_RATE_INVALID);
2205 	}
2206 	if (hermon_srate_override != -1) /* for evaluating HCA firmware */
2207 		path->max_stat_rate = hermon_srate_override;
2208 
2209 	/* If "grh" flag is set, then check for valid SGID index too */
2210 	gidtbl_sz = (1 << state->hs_queryport.log_max_gid);
2211 	if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) {
2212 		return (IBT_SGID_INVALID);
2213 	}
2214 
2215 	/*
2216 	 * Fill in all "global" values regardless of the value in the GRH
2217 	 * flag.  Because "grh" is not set unless "av_send_grh" is set, the
2218 	 * hardware will ignore the other "global" values as necessary.  Note:
2219 	 * SW does this here to enable later query operations to return
2220 	 * exactly the same params that were passed when the addr path was
2221 	 * last written.
2222 	 */
2223 	path->grh = av->av_send_grh;
2224 	if (type == HERMON_ADDRPATH_QP) {
2225 		path->mgid_index = av->av_sgid_ix;
2226 	} else {
2227 		/*
2228 		 * For Hermon UDAV, the "mgid_index" field is the index into
2229 		 * a combined table (not a per-port table), but having sections
2230 		 * for each port. So some extra calculations are necessary.
2231 		 */
2232 
2233 		path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) +
2234 		    av->av_sgid_ix;
2235 
2236 		udav->portnum = av->av_port_num;
2237 	}
2238 
2239 	/*
2240 	 * According to Hermon PRM, the (31:0) part of rgid_l must be set to
2241 	 * "0x2" if the 'grh' or 'g' bit is cleared.  It also says that we
2242 	 * only need to do it for UDAV's.  So we enforce that here.
2243 	 *
2244 	 * NOTE: The entire 64 bits worth of GUID info is actually being
2245 	 * preserved (for UDAVs) by the callers of this function
2246 	 * (hermon_ah_alloc() and hermon_ah_modify()) and as long as the
2247 	 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are
2248 	 * "don't care".
2249 	 */
2250 	if ((path->grh) || (type == HERMON_ADDRPATH_QP)) {
2251 		path->flow_label = av->av_flow;
2252 		path->tclass	 = av->av_tclass;
2253 		path->hop_limit	 = av->av_hop;
2254 		bcopy(&(av->av_dgid.gid_prefix), &(path->rgid_h),
2255 		    sizeof (uint64_t));
2256 		bcopy(&(av->av_dgid.gid_guid), &(path->rgid_l),
2257 		    sizeof (uint64_t));
2258 	} else {
2259 		path->rgid_l	 = 0x2;
2260 		path->flow_label = 0;
2261 		path->tclass	 = 0;
2262 		path->hop_limit	 = 0;
2263 		path->rgid_h	 = 0;
2264 	}
2265 	/* extract the default service level */
2266 	udav->sl = (HERMON_DEF_SCHED_SELECTION & 0x3C) >> 2;
2267 
2268 	return (DDI_SUCCESS);
2269 }
2270 
2271 
2272 /*
2273  * hermon_get_addr_path()
2274  *    Context: Can be called from interrupt or base context.
2275  *
2276  * Note: Just like hermon_set_addr_path() above, this routine is used for two
2277  * purposes.  It is used to read in the Hermon UDAV fields, and it is used to
2278  * read in the address path information for QPs.  Because the two Hermon
2279  * structures are similar, common fields can be read in here.  But because
2280  * they are slightly different, we pass an additional flag to indicate which
2281  * type is being read.
2282  */
2283 void
2284 hermon_get_addr_path(hermon_state_t *state, hermon_hw_addr_path_t *path,
2285     ibt_adds_vect_t *av, uint_t type)
2286 {
2287 	uint_t		gidtbl_sz;
2288 
2289 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path))
2290 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av))
2291 
2292 	av->av_src_path	= path->mlid;
2293 	av->av_dlid	= path->rlid;
2294 
2295 	/* Set "av_ipd" value from max_stat_rate */
2296 	switch (path->max_stat_rate) {
2297 	case 7:				/* 1xSDR-2.5Gb/s injection rate */
2298 		av->av_srate = IBT_SRATE_2; break;
2299 	case 8:				/* 4xSDR-10.0Gb/s injection rate */
2300 		av->av_srate = IBT_SRATE_10; break;
2301 	case 9:				/* 12xSDR-30Gb/s injection rate */
2302 		av->av_srate = IBT_SRATE_30; break;
2303 	case 10:			/* 1xDDR-5Gb/s injection rate */
2304 		av->av_srate = IBT_SRATE_5; break;
2305 	case 11:			/* 4xDDR-20Gb/s injection rate */
2306 		av->av_srate = IBT_SRATE_20; break;
2307 	case 12:			/* xQDR-40Gb/s injection rate */
2308 		av->av_srate = IBT_SRATE_40; break;
2309 	case 13:			/* 12xDDR-60Gb/s injection rate */
2310 		av->av_srate = IBT_SRATE_60; break;
2311 	case 14:			/* 8xQDR-80Gb/s injection rate */
2312 		av->av_srate = IBT_SRATE_80; break;
2313 	case 15:			/* 12xQDR-120Gb/s injection rate */
2314 		av->av_srate = IBT_SRATE_120; break;
2315 	case 0:				/* max */
2316 		av->av_srate = IBT_SRATE_NOT_SPECIFIED; break;
2317 	default:			/* 1x injection rate */
2318 		av->av_srate = IBT_SRATE_1X;
2319 	}
2320 
2321 	/*
2322 	 * Extract all "global" values regardless of the value in the GRH
2323 	 * flag.  Because "av_send_grh" is set only if "grh" is set, software
2324 	 * knows to ignore the other "global" values as necessary.  Note: SW
2325 	 * does it this way to enable these query operations to return exactly
2326 	 * the same params that were passed when the addr path was last written.
2327 	 */
2328 	av->av_send_grh		= path->grh;
2329 	if (type == HERMON_ADDRPATH_QP) {
2330 		av->av_sgid_ix  = path->mgid_index;
2331 	} else {
2332 		/*
2333 		 * For Hermon UDAV, the "mgid_index" field is the index into
2334 		 * a combined table (not a per-port table).
2335 		 */
2336 		gidtbl_sz = (1 << state->hs_queryport.log_max_gid);
2337 		av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) *
2338 		    gidtbl_sz);
2339 
2340 		av->av_port_num = ((hermon_hw_udav_t *)(void *)path)->portnum;
2341 	}
2342 	av->av_flow		= path->flow_label;
2343 	av->av_tclass		= path->tclass;
2344 	av->av_hop		= path->hop_limit;
2345 	/* this is for alignment issue w/ the addr path struct in Hermon */
2346 	bcopy(&(path->rgid_h), &(av->av_dgid.gid_prefix), sizeof (uint64_t));
2347 	bcopy(&(path->rgid_l), &(av->av_dgid.gid_guid), sizeof (uint64_t));
2348 }
2349 
2350 
2351 /*
2352  * hermon_portnum_is_valid()
2353  *    Context: Can be called from interrupt or base context.
2354  */
2355 int
2356 hermon_portnum_is_valid(hermon_state_t *state, uint_t portnum)
2357 {
2358 	uint_t	max_port;
2359 
2360 	max_port = state->hs_cfg_profile->cp_num_ports;
2361 	if ((portnum <= max_port) && (portnum != 0)) {
2362 		return (1);
2363 	} else {
2364 		return (0);
2365 	}
2366 }
2367 
2368 
2369 /*
2370  * hermon_pkeyindex_is_valid()
2371  *    Context: Can be called from interrupt or base context.
2372  */
2373 int
2374 hermon_pkeyindex_is_valid(hermon_state_t *state, uint_t pkeyindx)
2375 {
2376 	uint_t	max_pkeyindx;
2377 
2378 	max_pkeyindx = 1 << state->hs_cfg_profile->cp_log_max_pkeytbl;
2379 	if (pkeyindx < max_pkeyindx) {
2380 		return (1);
2381 	} else {
2382 		return (0);
2383 	}
2384 }
2385 
2386 
2387 /*
2388  * hermon_queue_alloc()
2389  *    Context: Can be called from interrupt or base context.
2390  */
2391 int
2392 hermon_queue_alloc(hermon_state_t *state, hermon_qalloc_info_t *qa_info,
2393     uint_t sleepflag)
2394 {
2395 	ddi_dma_attr_t		dma_attr;
2396 	int			(*callback)(caddr_t);
2397 	uint64_t		realsize, alloc_mask;
2398 	uint_t			type;
2399 	int			flag, status;
2400 
2401 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info))
2402 
2403 	/* Set the callback flag appropriately */
2404 	callback = (sleepflag == HERMON_SLEEP) ? DDI_DMA_SLEEP :
2405 	    DDI_DMA_DONTWAIT;
2406 
2407 	/*
2408 	 * Initialize many of the default DMA attributes.  Then set additional
2409 	 * alignment restrictions as necessary for the queue memory.  Also
2410 	 * respect the configured value for IOMMU bypass
2411 	 */
2412 	hermon_dma_attr_init(state, &dma_attr);
2413 	dma_attr.dma_attr_align = qa_info->qa_bind_align;
2414 	type = state->hs_cfg_profile->cp_iommu_bypass;
2415 	if (type == HERMON_BINDMEM_BYPASS) {
2416 		dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
2417 	}
2418 
2419 	/* Allocate a DMA handle */
2420 	status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, callback, NULL,
2421 	    &qa_info->qa_dmahdl);
2422 	if (status != DDI_SUCCESS) {
2423 		return (DDI_FAILURE);
2424 	}
2425 
2426 	/*
2427 	 * Determine the amount of memory to allocate, depending on the values
2428 	 * in "qa_bind_align" and "qa_alloc_align".  The problem we are trying
2429 	 * to solve here is that allocating a DMA handle with IOMMU bypass
2430 	 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments
2431 	 * that are less restrictive than the page size.  Since we may need
2432 	 * stricter alignments on the memory allocated by ddi_dma_mem_alloc()
2433 	 * (e.g. in Hermon QP work queue memory allocation), we use the
2434 	 * following method to calculate how much additional memory to request,
2435 	 * and we enforce our own alignment on the allocated result.
2436 	 */
2437 	alloc_mask = qa_info->qa_alloc_align - 1;
2438 	if (qa_info->qa_bind_align == qa_info->qa_alloc_align) {
2439 		realsize = qa_info->qa_size;
2440 	} else {
2441 		realsize = qa_info->qa_size + alloc_mask;
2442 	}
2443 
2444 	/*
2445 	 * If we are to allocate the queue from system memory, then use
2446 	 * ddi_dma_mem_alloc() to find the space.  Otherwise, this is a
2447 	 * host memory allocation, use ddi_umem_alloc(). In either case,
2448 	 * return a pointer to the memory range allocated (including any
2449 	 * necessary alignment adjustments), the "real" memory pointer,
2450 	 * the "real" size, and a ddi_acc_handle_t to use when reading
2451 	 * from/writing to the memory.
2452 	 */
2453 	if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) {
2454 		/* Allocate system memory for the queue */
2455 		status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize,
2456 		    &state->hs_reg_accattr, DDI_DMA_CONSISTENT, callback, NULL,
2457 		    (caddr_t *)&qa_info->qa_buf_real,
2458 		    (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl);
2459 		if (status != DDI_SUCCESS) {
2460 			ddi_dma_free_handle(&qa_info->qa_dmahdl);
2461 			return (DDI_FAILURE);
2462 		}
2463 
2464 		/*
2465 		 * Save temporary copy of the real pointer.  (This may be
2466 		 * modified in the last step below).
2467 		 */
2468 		qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2469 
2470 		bzero(qa_info->qa_buf_real, qa_info->qa_buf_realsz);
2471 
2472 	} else { /* HERMON_QUEUE_LOCATION_USERLAND */
2473 
2474 		/* Allocate userland mappable memory for the queue */
2475 		flag = (sleepflag == HERMON_SLEEP) ? DDI_UMEM_SLEEP :
2476 		    DDI_UMEM_NOSLEEP;
2477 		qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag,
2478 		    &qa_info->qa_umemcookie);
2479 		if (qa_info->qa_buf_real == NULL) {
2480 			ddi_dma_free_handle(&qa_info->qa_dmahdl);
2481 			return (DDI_FAILURE);
2482 		}
2483 
2484 		/*
2485 		 * Save temporary copy of the real pointer.  (This may be
2486 		 * modified in the last step below).
2487 		 */
2488 		qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2489 
2490 	}
2491 
2492 	/*
2493 	 * The next to last step is to ensure that the final address
2494 	 * ("qa_buf_aligned") has the appropriate "alloc" alignment
2495 	 * restriction applied to it (if necessary).
2496 	 */
2497 	if (qa_info->qa_bind_align != qa_info->qa_alloc_align) {
2498 		qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t)
2499 		    qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask);
2500 	}
2501 	/*
2502 	 * The last step is to figure out the offset of the start relative
2503 	 * to the first page of the region - will be used in the eqc/cqc
2504 	 * passed to the HW
2505 	 */
2506 	qa_info->qa_pgoffs = (uint_t)((uintptr_t)
2507 	    qa_info->qa_buf_aligned & HERMON_PAGEMASK);
2508 
2509 	return (DDI_SUCCESS);
2510 }
2511 
2512 
2513 /*
2514  * hermon_queue_free()
2515  *    Context: Can be called from interrupt or base context.
2516  */
2517 void
2518 hermon_queue_free(hermon_qalloc_info_t *qa_info)
2519 {
2520 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info))
2521 
2522 	/*
2523 	 * Depending on how (i.e. from where) we allocated the memory for
2524 	 * this queue, we choose the appropriate method for releasing the
2525 	 * resources.
2526 	 */
2527 	if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) {
2528 
2529 		ddi_dma_mem_free(&qa_info->qa_acchdl);
2530 
2531 	} else if (qa_info->qa_location == HERMON_QUEUE_LOCATION_USERLAND) {
2532 
2533 		ddi_umem_free(qa_info->qa_umemcookie);
2534 
2535 	}
2536 
2537 	/* Always free the dma handle */
2538 	ddi_dma_free_handle(&qa_info->qa_dmahdl);
2539 }
2540 
2541 /*
2542  * hermon_destroy_fmr_pool()
2543  * Create a pool of FMRs.
2544  *     Context: Can be called from kernel context only.
2545  */
2546 int
2547 hermon_create_fmr_pool(hermon_state_t *state, hermon_pdhdl_t pd,
2548     ibt_fmr_pool_attr_t *fmr_attr, hermon_fmrhdl_t *fmrpoolp)
2549 {
2550 	hermon_fmrhdl_t	fmrpool;
2551 	hermon_fmr_list_t *fmr, *fmr_next;
2552 	hermon_mrhdl_t   mr;
2553 	char		taskqname[48];
2554 	int		status;
2555 	int		sleep;
2556 	int		i;
2557 
2558 	sleep = (fmr_attr->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP :
2559 	    HERMON_NOSLEEP;
2560 	if ((sleep == HERMON_SLEEP) &&
2561 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2562 		return (IBT_INVALID_PARAM);
2563 	}
2564 
2565 	fmrpool = (hermon_fmrhdl_t)kmem_zalloc(sizeof (*fmrpool), sleep);
2566 	if (fmrpool == NULL) {
2567 		status = IBT_INSUFF_RESOURCE;
2568 		goto fail;
2569 	}
2570 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmrpool))
2571 
2572 	mutex_init(&fmrpool->fmr_lock, NULL, MUTEX_DRIVER,
2573 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2574 
2575 	fmrpool->fmr_state	    = state;
2576 	fmrpool->fmr_flush_function = fmr_attr->fmr_func_hdlr;
2577 	fmrpool->fmr_flush_arg	    = fmr_attr->fmr_func_arg;
2578 	fmrpool->fmr_pool_size	    = 0;
2579 	fmrpool->fmr_cache	    = 0;
2580 	fmrpool->fmr_max_pages	    = fmr_attr->fmr_max_pages_per_fmr;
2581 	fmrpool->fmr_page_sz	    = fmr_attr->fmr_page_sz;
2582 	fmrpool->fmr_dirty_watermark = fmr_attr->fmr_dirty_watermark;
2583 	fmrpool->fmr_dirty_len	    = 0;
2584 	fmrpool->fmr_flags	    = fmr_attr->fmr_flags;
2585 
2586 	/* Create taskq to handle cleanup and flush processing */
2587 	(void) snprintf(taskqname, 50, "fmrpool/%d/%d @ 0x%" PRIx64,
2588 	    fmr_attr->fmr_pool_size, hermon_debug_fmrpool_cnt,
2589 	    (uint64_t)(uintptr_t)fmrpool);
2590 	fmrpool->fmr_taskq = ddi_taskq_create(state->hs_dip, taskqname,
2591 	    HERMON_TASKQ_NTHREADS, TASKQ_DEFAULTPRI, 0);
2592 	if (fmrpool->fmr_taskq == NULL) {
2593 		status = IBT_INSUFF_RESOURCE;
2594 		goto fail1;
2595 	}
2596 
2597 	fmrpool->fmr_free_list = NULL;
2598 	fmrpool->fmr_dirty_list = NULL;
2599 
2600 	if (fmr_attr->fmr_cache) {
2601 		hermon_fmr_cache_init(fmrpool);
2602 	}
2603 
2604 	for (i = 0; i < fmr_attr->fmr_pool_size; i++) {
2605 		status = hermon_mr_alloc_fmr(state, pd, fmrpool, &mr);
2606 		if (status != DDI_SUCCESS) {
2607 			goto fail2;
2608 		}
2609 
2610 		fmr = (hermon_fmr_list_t *)kmem_zalloc(
2611 		    sizeof (hermon_fmr_list_t), sleep);
2612 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr))
2613 
2614 		fmr->fmr = mr;
2615 		fmr->fmr_refcnt = 0;
2616 		fmr->fmr_remaps = 0;
2617 		fmr->fmr_pool = fmrpool;
2618 		fmr->fmr_in_cache = 0;
2619 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
2620 		mr->mr_fmr = fmr;
2621 
2622 		fmr->fmr_next = fmrpool->fmr_free_list;
2623 		fmrpool->fmr_free_list = fmr;
2624 		fmrpool->fmr_pool_size++;
2625 	}
2626 
2627 	/* Set to return pool */
2628 	*fmrpoolp = fmrpool;
2629 
2630 	return (IBT_SUCCESS);
2631 fail2:
2632 	hermon_fmr_cache_fini(fmrpool);
2633 	for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) {
2634 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr))
2635 		fmr_next = fmr->fmr_next;
2636 		(void) hermon_mr_dealloc_fmr(state, &fmr->fmr);
2637 		kmem_free(fmr, sizeof (hermon_fmr_list_t));
2638 	}
2639 	ddi_taskq_destroy(fmrpool->fmr_taskq);
2640 fail1:
2641 	kmem_free(fmrpool, sizeof (*fmrpool));
2642 fail:
2643 	if (status == DDI_FAILURE) {
2644 		return (ibc_get_ci_failure(0));
2645 	} else {
2646 		return (status);
2647 	}
2648 }
2649 
2650 /*
2651  * hermon_destroy_fmr_pool()
2652  * Destroy an FMR pool and free all associated resources.
2653  *     Context: Can be called from kernel context only.
2654  */
2655 int
2656 hermon_destroy_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool)
2657 {
2658 	hermon_fmr_list_t	*fmr, *fmr_next;
2659 	int			status;
2660 
2661 	mutex_enter(&fmrpool->fmr_lock);
2662 	status = hermon_fmr_cleanup(state, fmrpool);
2663 	if (status != DDI_SUCCESS) {
2664 		mutex_exit(&fmrpool->fmr_lock);
2665 		return (status);
2666 	}
2667 
2668 	if (fmrpool->fmr_cache) {
2669 		hermon_fmr_cache_fini(fmrpool);
2670 	}
2671 
2672 	for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) {
2673 		fmr_next = fmr->fmr_next;
2674 
2675 		(void) hermon_mr_dealloc_fmr(state, &fmr->fmr);
2676 		kmem_free(fmr, sizeof (hermon_fmr_list_t));
2677 	}
2678 	mutex_exit(&fmrpool->fmr_lock);
2679 
2680 	ddi_taskq_destroy(fmrpool->fmr_taskq);
2681 	mutex_destroy(&fmrpool->fmr_lock);
2682 
2683 	kmem_free(fmrpool, sizeof (*fmrpool));
2684 	return (DDI_SUCCESS);
2685 }
2686 
2687 /*
2688  * hermon_flush_fmr_pool()
2689  * Ensure that all unmapped FMRs are fully invalidated.
2690  *     Context: Can be called from kernel context only.
2691  */
2692 int
2693 hermon_flush_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool)
2694 {
2695 	int		status;
2696 
2697 	/*
2698 	 * Force the unmapping of all entries on the dirty list, regardless of
2699 	 * whether the watermark has been hit yet.
2700 	 */
2701 	/* grab the pool lock */
2702 	mutex_enter(&fmrpool->fmr_lock);
2703 	status = hermon_fmr_cleanup(state, fmrpool);
2704 	mutex_exit(&fmrpool->fmr_lock);
2705 	return (status);
2706 }
2707 
2708 /*
2709  * hermon_deregister_fmr()
2710  * Map memory into FMR
2711  *    Context: Can be called from interrupt or base context.
2712  */
2713 int
2714 hermon_register_physical_fmr(hermon_state_t *state, hermon_fmrhdl_t fmrpool,
2715     ibt_pmr_attr_t *mem_pattr, hermon_mrhdl_t *mr,
2716     ibt_pmr_desc_t *mem_desc_p)
2717 {
2718 	hermon_fmr_list_t	*fmr;
2719 	hermon_fmr_list_t	query;
2720 	avl_index_t		where;
2721 	int			status;
2722 
2723 	/* Check length */
2724 	mutex_enter(&fmrpool->fmr_lock);
2725 	if (mem_pattr->pmr_len < 1 || (mem_pattr->pmr_num_buf >
2726 	    fmrpool->fmr_max_pages)) {
2727 		mutex_exit(&fmrpool->fmr_lock);
2728 		return (IBT_MR_LEN_INVALID);
2729 	}
2730 
2731 	mutex_enter(&fmrpool->fmr_cachelock);
2732 	/* lookup in fmr cache */
2733 	/* if exists, grab it, and return it */
2734 	if (fmrpool->fmr_cache) {
2735 		query.fmr_desc.pmd_iova = mem_pattr->pmr_iova;
2736 		query.fmr_desc.pmd_phys_buf_list_sz = mem_pattr->pmr_len;
2737 		fmr = (hermon_fmr_list_t *)avl_find(&fmrpool->fmr_cache_avl,
2738 		    &query, &where);
2739 
2740 		/*
2741 		 * If valid FMR was found in cache, return that fmr info
2742 		 */
2743 		if (fmr != NULL) {
2744 			fmr->fmr_refcnt++;
2745 			/* Store pmr desc for use in cache */
2746 			(void) memcpy(mem_desc_p, &fmr->fmr_desc,
2747 			    sizeof (ibt_pmr_desc_t));
2748 			*mr = (hermon_mrhdl_t)fmr->fmr;
2749 			mutex_exit(&fmrpool->fmr_cachelock);
2750 			mutex_exit(&fmrpool->fmr_lock);
2751 			return (DDI_SUCCESS);
2752 		}
2753 	}
2754 
2755 	/* FMR does not exist in cache, proceed with registration */
2756 
2757 	/* grab next free entry */
2758 	fmr = fmrpool->fmr_free_list;
2759 	if (fmr == NULL) {
2760 		mutex_exit(&fmrpool->fmr_cachelock);
2761 		mutex_exit(&fmrpool->fmr_lock);
2762 		return (IBT_INSUFF_RESOURCE);
2763 	}
2764 
2765 	fmrpool->fmr_free_list = fmrpool->fmr_free_list->fmr_next;
2766 	fmr->fmr_next = NULL;
2767 
2768 	status = hermon_mr_register_physical_fmr(state, mem_pattr, fmr->fmr,
2769 	    mem_desc_p);
2770 	if (status != DDI_SUCCESS) {
2771 		mutex_exit(&fmrpool->fmr_cachelock);
2772 		mutex_exit(&fmrpool->fmr_lock);
2773 		return (status);
2774 	}
2775 
2776 	fmr->fmr_refcnt = 1;
2777 	fmr->fmr_remaps++;
2778 
2779 	/* Store pmr desc for use in cache */
2780 	(void) memcpy(&fmr->fmr_desc, mem_desc_p, sizeof (ibt_pmr_desc_t));
2781 	*mr = (hermon_mrhdl_t)fmr->fmr;
2782 
2783 	/* Store in cache */
2784 	if (fmrpool->fmr_cache) {
2785 		if (!fmr->fmr_in_cache) {
2786 			avl_insert(&fmrpool->fmr_cache_avl, fmr, where);
2787 			fmr->fmr_in_cache = 1;
2788 		}
2789 	}
2790 
2791 	mutex_exit(&fmrpool->fmr_cachelock);
2792 	mutex_exit(&fmrpool->fmr_lock);
2793 	return (DDI_SUCCESS);
2794 }
2795 
2796 /*
2797  * hermon_deregister_fmr()
2798  * Unmap FMR
2799  *    Context: Can be called from kernel context only.
2800  */
2801 int
2802 hermon_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr)
2803 {
2804 	hermon_fmr_list_t	*fmr;
2805 	hermon_fmrhdl_t		fmrpool;
2806 	int			status;
2807 
2808 	fmr = mr->mr_fmr;
2809 	fmrpool = fmr->fmr_pool;
2810 
2811 	/* Grab pool lock */
2812 	mutex_enter(&fmrpool->fmr_lock);
2813 	fmr->fmr_refcnt--;
2814 
2815 	if (fmr->fmr_refcnt == 0) {
2816 		/*
2817 		 * First, do some bit of invalidation, reducing our exposure to
2818 		 * having this region still registered in hardware.
2819 		 */
2820 		(void) hermon_mr_invalidate_fmr(state, mr);
2821 
2822 		/*
2823 		 * If we've exhausted our remaps then add the FMR to the dirty
2824 		 * list, not allowing it to be re-used until we have done a
2825 		 * flush.  Otherwise, simply add it back to the free list for
2826 		 * re-mapping.
2827 		 */
2828 		if (fmr->fmr_remaps <
2829 		    state->hs_cfg_profile->cp_fmr_max_remaps) {
2830 			/* add to free list */
2831 			fmr->fmr_next = fmrpool->fmr_free_list;
2832 			fmrpool->fmr_free_list = fmr;
2833 		} else {
2834 			/* add to dirty list */
2835 			fmr->fmr_next = fmrpool->fmr_dirty_list;
2836 			fmrpool->fmr_dirty_list = fmr;
2837 			fmrpool->fmr_dirty_len++;
2838 
2839 			status = ddi_taskq_dispatch(fmrpool->fmr_taskq,
2840 			    hermon_fmr_processing, fmrpool, DDI_NOSLEEP);
2841 			if (status == DDI_FAILURE) {
2842 				mutex_exit(&fmrpool->fmr_lock);
2843 				return (IBT_INSUFF_RESOURCE);
2844 			}
2845 		}
2846 	}
2847 	/* Release pool lock */
2848 	mutex_exit(&fmrpool->fmr_lock);
2849 
2850 	return (DDI_SUCCESS);
2851 }
2852 
2853 
2854 /*
2855  * hermon_fmr_processing()
2856  * If required, perform cleanup.
2857  *     Context: Called from taskq context only.
2858  */
2859 static void
2860 hermon_fmr_processing(void *fmr_args)
2861 {
2862 	hermon_fmrhdl_t		fmrpool;
2863 	int			status;
2864 
2865 	ASSERT(fmr_args != NULL);
2866 
2867 	fmrpool = (hermon_fmrhdl_t)fmr_args;
2868 
2869 	/* grab pool lock */
2870 	mutex_enter(&fmrpool->fmr_lock);
2871 	if (fmrpool->fmr_dirty_len >= fmrpool->fmr_dirty_watermark) {
2872 		status = hermon_fmr_cleanup(fmrpool->fmr_state, fmrpool);
2873 		if (status != DDI_SUCCESS) {
2874 			mutex_exit(&fmrpool->fmr_lock);
2875 			return;
2876 		}
2877 
2878 		if (fmrpool->fmr_flush_function != NULL) {
2879 			(void) fmrpool->fmr_flush_function(
2880 			    (ibc_fmr_pool_hdl_t)fmrpool,
2881 			    fmrpool->fmr_flush_arg);
2882 		}
2883 	}
2884 
2885 	/* let pool lock go */
2886 	mutex_exit(&fmrpool->fmr_lock);
2887 }
2888 
2889 /*
2890  * hermon_fmr_cleanup()
2891  * Perform cleaning processing, walking the list and performing the MTT sync
2892  * operation if required.
2893  *    Context: can be called from taskq or base context.
2894  */
2895 static int
2896 hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t fmrpool)
2897 {
2898 	hermon_fmr_list_t	*fmr;
2899 	hermon_fmr_list_t	*fmr_next;
2900 	int			sync_needed;
2901 	int			status;
2902 
2903 	ASSERT(MUTEX_HELD(&fmrpool->fmr_lock));
2904 
2905 	sync_needed = 0;
2906 	for (fmr = fmrpool->fmr_dirty_list; fmr; fmr = fmr_next) {
2907 		fmr_next = fmr->fmr_next;
2908 		fmr->fmr_remaps = 0;
2909 
2910 		(void) hermon_mr_deregister_fmr(state, fmr->fmr);
2911 
2912 		/*
2913 		 * Update lists.
2914 		 * - add fmr back to free list
2915 		 * - remove fmr from dirty list
2916 		 */
2917 		fmr->fmr_next = fmrpool->fmr_free_list;
2918 		fmrpool->fmr_free_list = fmr;
2919 
2920 
2921 		/*
2922 		 * Because we have updated the dirty list, and deregistered the
2923 		 * FMR entry, we do need to sync the TPT, so we set the
2924 		 * 'sync_needed' flag here so we sync once we finish dirty_list
2925 		 * processing.
2926 		 */
2927 		sync_needed = 1;
2928 	}
2929 
2930 	fmrpool->fmr_dirty_list = NULL;
2931 	fmrpool->fmr_dirty_len = 0;
2932 
2933 	if (sync_needed) {
2934 		status = hermon_sync_tpt_cmd_post(state,
2935 		    HERMON_CMD_NOSLEEP_SPIN);
2936 		if (status != HERMON_CMD_SUCCESS) {
2937 			return (status);
2938 		}
2939 	}
2940 
2941 	return (DDI_SUCCESS);
2942 }
2943 
2944 /*
2945  * hermon_fmr_avl_compare()
2946  *    Context: Can be called from user or kernel context.
2947  */
2948 static int
2949 hermon_fmr_avl_compare(const void *q, const void *e)
2950 {
2951 	hermon_fmr_list_t *entry, *query;
2952 
2953 	entry = (hermon_fmr_list_t *)e;
2954 	query = (hermon_fmr_list_t *)q;
2955 
2956 	if (query->fmr_desc.pmd_iova < entry->fmr_desc.pmd_iova) {
2957 		return (-1);
2958 	} else if (query->fmr_desc.pmd_iova > entry->fmr_desc.pmd_iova) {
2959 		return (+1);
2960 	} else {
2961 		return (0);
2962 	}
2963 }
2964 
2965 
2966 /*
2967  * hermon_fmr_cache_init()
2968  *    Context: Can be called from user or kernel context.
2969  */
2970 static void
2971 hermon_fmr_cache_init(hermon_fmrhdl_t fmr)
2972 {
2973 	/* Initialize the lock used for FMR cache AVL tree access */
2974 	mutex_init(&fmr->fmr_cachelock, NULL, MUTEX_DRIVER,
2975 	    DDI_INTR_PRI(fmr->fmr_state->hs_intrmsi_pri));
2976 
2977 	/* Initialize the AVL tree for the FMR cache */
2978 	avl_create(&fmr->fmr_cache_avl, hermon_fmr_avl_compare,
2979 	    sizeof (hermon_fmr_list_t),
2980 	    offsetof(hermon_fmr_list_t, fmr_avlnode));
2981 
2982 	fmr->fmr_cache = 1;
2983 }
2984 
2985 
2986 /*
2987  * hermon_fmr_cache_fini()
2988  *    Context: Can be called from user or kernel context.
2989  */
2990 static void
2991 hermon_fmr_cache_fini(hermon_fmrhdl_t fmr)
2992 {
2993 	void			*cookie;
2994 
2995 	/*
2996 	 * Empty all entries (if necessary) and destroy the AVL tree.
2997 	 * The FMRs themselves are freed as part of destroy_pool()
2998 	 */
2999 	cookie = NULL;
3000 	while (((void *)(hermon_fmr_list_t *)avl_destroy_nodes(
3001 	    &fmr->fmr_cache_avl, &cookie)) != NULL) {
3002 		/* loop through */
3003 	}
3004 	avl_destroy(&fmr->fmr_cache_avl);
3005 
3006 	/* Destroy the lock used for FMR cache */
3007 	mutex_destroy(&fmr->fmr_cachelock);
3008 }
3009 
3010 /*
3011  * hermon_get_dma_cookies()
3012  * Return DMA cookies in the pre-allocated paddr_list_p based on the length
3013  * needed.
3014  *    Context: Can be called from interrupt or base context.
3015  */
3016 int
3017 hermon_get_dma_cookies(hermon_state_t *state, ibt_phys_buf_t *paddr_list_p,
3018     ibt_va_attr_t *va_attrs, uint_t list_len, uint_t *cookiecnt,
3019     ibc_ma_hdl_t *ibc_ma_hdl_p)
3020 {
3021 	ddi_dma_handle_t	dma_hdl;
3022 	ddi_dma_attr_t		dma_attr;
3023 	ddi_dma_cookie_t	dmacookie;
3024 	int			(*callback)(caddr_t);
3025 	int			status;
3026 	int			i;
3027 
3028 	/* Set the callback flag appropriately */
3029 	callback = (va_attrs->va_flags & IBT_VA_NOSLEEP) ? DDI_DMA_DONTWAIT :
3030 	    DDI_DMA_SLEEP;
3031 	if ((callback == DDI_DMA_SLEEP) &&
3032 	    (HERMON_SLEEP != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
3033 		return (IBT_INVALID_PARAM);
3034 	}
3035 
3036 	/*
3037 	 * Initialize many of the default DMA attributes and allocate the DMA
3038 	 * handle.  Then, if we're bypassing the IOMMU, set the
3039 	 * DDI_DMA_FORCE_PHYSICAL flag.
3040 	 */
3041 	hermon_dma_attr_init(state, &dma_attr);
3042 
3043 #ifdef __x86
3044 	/*
3045 	 * On x86 we can specify a maximum segment length for our returned
3046 	 * cookies.
3047 	 */
3048 	if (va_attrs->va_flags & IBT_VA_FMR) {
3049 		dma_attr.dma_attr_seg = PAGESIZE - 1;
3050 	}
3051 #endif
3052 
3053 	/*
3054 	 * Check to see if the RO flag is set, and if so,
3055 	 * set that bit in the attr structure as well.
3056 	 *
3057 	 * NOTE 1:  This function is ONLY called by consumers, and only for
3058 	 *	    data buffers
3059 	 */
3060 	if (hermon_kernel_data_ro == HERMON_RO_ENABLED) {
3061 		dma_attr.dma_attr_flags |= DDI_DMA_RELAXED_ORDERING;
3062 	}
3063 
3064 	status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
3065 	    callback, NULL, &dma_hdl);
3066 	if (status != DDI_SUCCESS) {
3067 		switch (status) {
3068 		case DDI_DMA_NORESOURCES:
3069 			return (IBT_INSUFF_RESOURCE);
3070 		case DDI_DMA_BADATTR:
3071 		default:
3072 			return (ibc_get_ci_failure(0));
3073 		}
3074 	}
3075 
3076 	/*
3077 	 * Now bind the handle with the correct DMA attributes.
3078 	 */
3079 	if (va_attrs->va_flags & IBT_VA_BUF) {
3080 		status = ddi_dma_buf_bind_handle(dma_hdl, va_attrs->va_buf,
3081 		    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT,
3082 		    NULL, &dmacookie, cookiecnt);
3083 	} else {
3084 		status = ddi_dma_addr_bind_handle(dma_hdl, NULL,
3085 		    (caddr_t)(uintptr_t)va_attrs->va_vaddr, va_attrs->va_len,
3086 		    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT,
3087 		    NULL, &dmacookie, cookiecnt);
3088 	}
3089 	if (status != DDI_SUCCESS) {
3090 		ddi_dma_free_handle(&dma_hdl);
3091 
3092 		switch (status) {
3093 		case DDI_DMA_NORESOURCES:
3094 			return (IBT_INSUFF_RESOURCE);
3095 		case DDI_DMA_TOOBIG:
3096 			return (IBT_INVALID_PARAM);
3097 		case DDI_DMA_PARTIAL_MAP:
3098 		case DDI_DMA_INUSE:
3099 		case DDI_DMA_NOMAPPING:
3100 		default:
3101 			return (ibc_get_ci_failure(0));
3102 		}
3103 	}
3104 
3105 	/*
3106 	 * Verify our physical buffer list (PBL) is large enough to handle the
3107 	 * number of cookies that were returned.
3108 	 */
3109 	if (*cookiecnt > list_len) {
3110 		(void) ddi_dma_unbind_handle(dma_hdl);
3111 		ddi_dma_free_handle(&dma_hdl);
3112 		return (IBT_PBL_TOO_SMALL);
3113 	}
3114 
3115 	/*
3116 	 * We store the cookies returned by the DDI into our own PBL.  This
3117 	 * sets the cookies up for later processing (for example, if we want to
3118 	 * split up the cookies into smaller chunks).  We use the laddr and
3119 	 * size fields in each cookie to create each individual entry (PBE).
3120 	 */
3121 
3122 	/*
3123 	 * Store first cookie info first
3124 	 */
3125 	paddr_list_p[0].p_laddr = dmacookie.dmac_laddress;
3126 	paddr_list_p[0].p_size = dmacookie.dmac_size;
3127 
3128 	/*
3129 	 * Loop through each cookie, storing each cookie into our physical
3130 	 * buffer list.
3131 	 */
3132 	for (i = 1; i < *cookiecnt; i++) {
3133 		ddi_dma_nextcookie(dma_hdl, &dmacookie);
3134 
3135 		paddr_list_p[i].p_laddr = dmacookie.dmac_laddress;
3136 		paddr_list_p[i].p_size  = dmacookie.dmac_size;
3137 	}
3138 
3139 	/* return handle */
3140 	*ibc_ma_hdl_p = (ibc_ma_hdl_t)dma_hdl;
3141 	return (DDI_SUCCESS);
3142 }
3143 
3144 /*
3145  * hermon_split_dma_cookies()
3146  * Split up cookies passed in from paddr_list_p, returning the new list in the
3147  * same buffers, based on the pagesize to split the cookies into.
3148  *    Context: Can be called from interrupt or base context.
3149  */
3150 /* ARGSUSED */
3151 int
3152 hermon_split_dma_cookies(hermon_state_t *state, ibt_phys_buf_t *paddr_list,
3153     ib_memlen_t *paddr_offset, uint_t list_len, uint_t *cookiecnt,
3154     uint_t pagesize)
3155 {
3156 	uint64_t	pageoffset;
3157 	uint64_t	pagemask;
3158 	uint_t		pageshift;
3159 	uint_t		current_cookiecnt;
3160 	uint_t		cookies_needed;
3161 	uint64_t	last_size, extra_cookie;
3162 	int		i_increment;
3163 	int		i, k;
3164 	int		status;
3165 
3166 	/* Setup pagesize calculations */
3167 	pageoffset = pagesize - 1;
3168 	pagemask = (~pageoffset);
3169 	pageshift = highbit(pagesize) - 1;
3170 
3171 	/*
3172 	 * Setup first cookie offset based on pagesize requested.
3173 	 */
3174 	*paddr_offset = paddr_list[0].p_laddr & pageoffset;
3175 	paddr_list[0].p_laddr &= pagemask;
3176 
3177 	/* Save away the current number of cookies that are passed in */
3178 	current_cookiecnt = *cookiecnt;
3179 
3180 	/* Perform splitting up of current cookies into pagesize blocks */
3181 	for (i = 0; i < current_cookiecnt; i += i_increment) {
3182 		/*
3183 		 * If the cookie is smaller than pagesize, or already is
3184 		 * pagesize, then we are already within our limits, so we skip
3185 		 * it.
3186 		 */
3187 		if (paddr_list[i].p_size <= pagesize) {
3188 			i_increment = 1;
3189 			continue;
3190 		}
3191 
3192 		/*
3193 		 * If this is our first cookie, then we have to deal with the
3194 		 * offset that may be present in the first address.  So add
3195 		 * that to our size, to calculate potential change to the last
3196 		 * cookie's size.
3197 		 *
3198 		 * Also, calculate the number of cookies that we'll need to
3199 		 * split up this block into.
3200 		 */
3201 		if (i == 0) {
3202 			last_size = (paddr_list[i].p_size + *paddr_offset) &
3203 			    pageoffset;
3204 			cookies_needed = (paddr_list[i].p_size +
3205 			    *paddr_offset) >> pageshift;
3206 		} else {
3207 			last_size = 0;
3208 			cookies_needed = paddr_list[i].p_size >> pageshift;
3209 		}
3210 
3211 		/*
3212 		 * If our size is not a multiple of pagesize, we need one more
3213 		 * cookie.
3214 		 */
3215 		if (last_size) {
3216 			extra_cookie = 1;
3217 		} else {
3218 			extra_cookie = 0;
3219 		}
3220 
3221 		/*
3222 		 * Split cookie into pagesize chunks, shifting list of cookies
3223 		 * down, using more cookie slots in the PBL if necessary.
3224 		 */
3225 		status = hermon_dma_cookie_shift(paddr_list, i, list_len,
3226 		    current_cookiecnt - i, cookies_needed + extra_cookie);
3227 		if (status != 0) {
3228 			return (status);
3229 		}
3230 
3231 		/*
3232 		 * If the very first cookie, we must take possible offset into
3233 		 * account.
3234 		 */
3235 		if (i == 0) {
3236 			paddr_list[i].p_size = pagesize - *paddr_offset;
3237 		} else {
3238 			paddr_list[i].p_size = pagesize;
3239 		}
3240 
3241 		/*
3242 		 * We have shifted the existing cookies down the PBL, now fill
3243 		 * in the blank entries by splitting up our current block.
3244 		 */
3245 		for (k = 1; k < cookies_needed; k++) {
3246 			paddr_list[i + k].p_laddr =
3247 			    paddr_list[i + k - 1].p_laddr + pagesize;
3248 			paddr_list[i + k].p_size = pagesize;
3249 		}
3250 
3251 		/* If we have one extra cookie (of less than pagesize...) */
3252 		if (extra_cookie) {
3253 			paddr_list[i + k].p_laddr =
3254 			    paddr_list[i + k - 1].p_laddr + pagesize;
3255 			paddr_list[i + k].p_size = (size_t)last_size;
3256 		}
3257 
3258 		/* Increment cookiecnt appropriately based on cookies used */
3259 		i_increment = cookies_needed + extra_cookie;
3260 		current_cookiecnt += i_increment - 1;
3261 	}
3262 
3263 	/* Update to new cookie count */
3264 	*cookiecnt = current_cookiecnt;
3265 	return (DDI_SUCCESS);
3266 }
3267 
3268 /*
3269  * hermon_dma_cookie_shift()
3270  *    Context: Can be called from interrupt or base context.
3271  */
3272 int
3273 hermon_dma_cookie_shift(ibt_phys_buf_t *paddr_list, int start, int end,
3274     int cookiecnt, int num_shift)
3275 {
3276 	int shift_start;
3277 	int i;
3278 
3279 	/* Calculating starting point in the PBL list */
3280 	shift_start = start + cookiecnt - 1;
3281 
3282 	/* Check if we're at the end of our PBL list */
3283 	if ((shift_start + num_shift - 1) >= end) {
3284 		return (IBT_PBL_TOO_SMALL);
3285 	}
3286 
3287 	for (i = shift_start; i > start; i--) {
3288 		paddr_list[i + num_shift - 1] = paddr_list[i];
3289 	}
3290 
3291 	return (DDI_SUCCESS);
3292 }
3293 
3294 
3295 /*
3296  * hermon_free_dma_cookies()
3297  *    Context: Can be called from interrupt or base context.
3298  */
3299 int
3300 hermon_free_dma_cookies(ibc_ma_hdl_t ma_hdl)
3301 {
3302 	ddi_dma_handle_t	dma_hdl;
3303 	int			status;
3304 
3305 	dma_hdl = (ddi_dma_handle_t)ma_hdl;
3306 
3307 	status = ddi_dma_unbind_handle(dma_hdl);
3308 	if (status != DDI_SUCCESS) {
3309 		return (ibc_get_ci_failure(0));
3310 	}
3311 	ddi_dma_free_handle(&dma_hdl);
3312 
3313 	return (DDI_SUCCESS);
3314 }
3315