1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * hermon_misc.c
29  *    Hermon Miscellaneous routines - Address Handle, Multicast, Protection
30  *    Domain, and port-related operations
31  *
32  *    Implements all the routines necessary for allocating, freeing, querying
33  *    and modifying Address Handles and Protection Domains.  Also implements
34  *    all the routines necessary for adding and removing Queue Pairs to/from
35  *    Multicast Groups.  Lastly, it implements the routines necessary for
36  *    port-related query and modify operations.
37  */
38 
39 #include <sys/types.h>
40 #include <sys/conf.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/modctl.h>
44 #include <sys/bitmap.h>
45 #include <sys/sysmacros.h>
46 
47 #include <sys/ib/adapters/hermon/hermon.h>
48 
49 extern uint32_t hermon_kernel_data_ro;
50 
51 /* used for helping uniquify fmr pool taskq name */
52 static uint_t hermon_debug_fmrpool_cnt = 0x00000000;
53 
54 static int hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg,
55     hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, uint_t *qp_found);
56 static int hermon_mcg_qplist_remove(hermon_mcghdl_t mcg,
57     hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp);
58 static void hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp);
59 static void hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp);
60 static uint_t hermon_mcg_walk_mgid_hash(hermon_state_t *state,
61     uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx);
62 static void hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg,
63     hermon_hw_mcg_t *mcg_hdr, ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc);
64 static int hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx,
65     uint_t prev_indx, hermon_hw_mcg_t *mcg_entry);
66 static int hermon_mcg_entry_invalidate(hermon_state_t *state,
67     hermon_hw_mcg_t *mcg_entry, uint_t indx);
68 static int hermon_mgid_is_valid(ib_gid_t gid);
69 static int hermon_mlid_is_valid(ib_lid_t lid);
70 static void hermon_fmr_processing(void *fmr_args);
71 static int hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t pool);
72 static void hermon_fmr_cache_init(hermon_fmrhdl_t fmr);
73 static void hermon_fmr_cache_fini(hermon_fmrhdl_t fmr);
74 static int hermon_fmr_avl_compare(const void *q, const void *e);
75 
76 
77 #define	HERMON_MAX_DBR_PAGES_PER_USER	64
78 #define	HERMON_DBR_KEY(index, page) \
79 	(((uint64_t)index) * HERMON_MAX_DBR_PAGES_PER_USER + (page))
80 
81 static hermon_udbr_page_t *
82 hermon_dbr_new_user_page(hermon_state_t *state, uint_t index,
83     uint_t page)
84 {
85 	hermon_udbr_page_t *pagep;
86 	ddi_dma_attr_t dma_attr;
87 	uint_t cookiecnt;
88 	int i, status;
89 	uint64_t *p;
90 	hermon_umap_db_entry_t *umapdb;
91 
92 	pagep = kmem_alloc(sizeof (*pagep), KM_SLEEP);
93 	pagep->upg_index = page;
94 	pagep->upg_nfree = PAGESIZE / sizeof (hermon_dbr_t);
95 	pagep->upg_firstfree = 0;
96 	pagep->upg_kvaddr = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP,
97 	    &pagep->upg_umemcookie); /* not HERMON_PAGESIZE here */
98 
99 	/* link free entries */
100 	p = (uint64_t *)(void *)pagep->upg_kvaddr;
101 	for (i = pagep->upg_firstfree; i < pagep->upg_nfree; i++)
102 		p[i] = i + 1;
103 	pagep->upg_buf = ddi_umem_iosetup(pagep->upg_umemcookie, 0,
104 	    PAGESIZE, B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
105 
106 	hermon_dma_attr_init(state, &dma_attr);
107 	status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
108 	    DDI_DMA_SLEEP, NULL, &pagep->upg_dmahdl);
109 	if (status != DDI_SUCCESS) {
110 		IBTF_DPRINTF_L2("hermon", "hermon_new_user_page: "
111 		    "ddi_dma_buf_bind_handle failed: %d", status);
112 		return (NULL);
113 	}
114 	status = ddi_dma_buf_bind_handle(pagep->upg_dmahdl,
115 	    pagep->upg_buf, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
116 	    DDI_DMA_SLEEP, NULL, &pagep->upg_dmacookie, &cookiecnt);
117 	if (status != DDI_SUCCESS) {
118 		IBTF_DPRINTF_L2("hermon", "hermon_dbr_new_user_page: "
119 		    "ddi_dma_buf_bind_handle failed: %d", status);
120 		ddi_dma_free_handle(&pagep->upg_dmahdl);
121 		return (NULL);
122 	}
123 	ASSERT(cookiecnt == 1);
124 
125 	/* create db entry for mmap */
126 	umapdb = hermon_umap_db_alloc(state->hs_instance,
127 	    HERMON_DBR_KEY(index, page), MLNX_UMAP_DBRMEM_RSRC,
128 	    (uint64_t)(uintptr_t)pagep);
129 	hermon_umap_db_add(umapdb);
130 	return (pagep);
131 }
132 
133 
134 /*ARGSUSED*/
135 static int
136 hermon_user_dbr_alloc(hermon_state_t *state, uint_t index,
137     ddi_acc_handle_t *acchdl, hermon_dbr_t **vdbr, uint64_t *pdbr,
138     uint64_t *mapoffset)
139 {
140 	hermon_user_dbr_t *udbr;
141 	hermon_udbr_page_t *pagep;
142 	uint_t next_page;
143 	int j;
144 
145 	mutex_enter(&state->hs_dbr_lock);
146 	for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link)
147 		if (udbr->udbr_index == index)
148 			break;
149 	if (udbr == NULL) {
150 		udbr = kmem_alloc(sizeof (*udbr), KM_SLEEP);
151 		udbr->udbr_link = state->hs_user_dbr;
152 		state->hs_user_dbr = udbr;
153 		udbr->udbr_index = index;
154 		udbr->udbr_pagep = NULL;
155 	}
156 	pagep = udbr->udbr_pagep;
157 	next_page = (pagep == NULL) ? 0 : (pagep->upg_index + 1);
158 	while (pagep != NULL)
159 		if (pagep->upg_nfree > 0)
160 			break;
161 		else
162 			pagep = pagep->upg_link;
163 	if (pagep == NULL) {
164 		pagep = hermon_dbr_new_user_page(state, index, next_page);
165 		if (pagep == NULL) {
166 			mutex_exit(&state->hs_dbr_lock);
167 			return (DDI_FAILURE);
168 		}
169 		pagep->upg_link = udbr->udbr_pagep;
170 		udbr->udbr_pagep = pagep;
171 	}
172 	j = pagep->upg_firstfree;	/* index within page */
173 	pagep->upg_firstfree = ((uint64_t *)(void *)pagep->upg_kvaddr)[j];
174 	pagep->upg_nfree--;
175 	((uint64_t *)(void *)pagep->upg_kvaddr)[j] = 0;	/* clear dbr */
176 	*mapoffset = ((HERMON_DBR_KEY(index, pagep->upg_index) <<
177 	    MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_DBRMEM_RSRC) << PAGESHIFT;
178 	*vdbr = (hermon_dbr_t *)((uint64_t *)(void *)pagep->upg_kvaddr + j);
179 	*pdbr = pagep->upg_dmacookie.dmac_laddress + j * sizeof (uint64_t);
180 
181 	mutex_exit(&state->hs_dbr_lock);
182 	return (DDI_SUCCESS);
183 }
184 
185 static void
186 hermon_user_dbr_free(hermon_state_t *state, uint_t index, hermon_dbr_t *record)
187 {
188 	hermon_user_dbr_t	*udbr;
189 	hermon_udbr_page_t	*pagep;
190 	caddr_t			kvaddr;
191 	uint_t			dbr_index;
192 	uint_t			max_free = PAGESIZE / sizeof (hermon_dbr_t);
193 
194 	dbr_index = (uintptr_t)record & PAGEOFFSET; /* offset (not yet index) */
195 	kvaddr = (caddr_t)record - dbr_index;
196 	dbr_index /= sizeof (hermon_dbr_t); /* now it's the index */
197 
198 	mutex_enter(&state->hs_dbr_lock);
199 	for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link)
200 		if (udbr->udbr_index == index)
201 			break;
202 	if (udbr == NULL) {
203 		IBTF_DPRINTF_L2("hermon", "free user dbr: udbr struct not "
204 		    "found for index %x", index);
205 		mutex_exit(&state->hs_dbr_lock);
206 		return;
207 	}
208 	for (pagep = udbr->udbr_pagep; pagep != NULL; pagep = pagep->upg_link)
209 		if (pagep->upg_kvaddr == kvaddr)
210 			break;
211 	if (pagep == NULL) {
212 		IBTF_DPRINTF_L2("hermon", "free user dbr: pagep struct not"
213 		    " found for index %x, kvaddr %p, DBR index %x",
214 		    index, kvaddr, dbr_index);
215 		mutex_exit(&state->hs_dbr_lock);
216 		return;
217 	}
218 	if (pagep->upg_nfree >= max_free) {
219 		IBTF_DPRINTF_L2("hermon", "free user dbr: overflow: "
220 		    "UCE index %x, DBR index %x", index, dbr_index);
221 		mutex_exit(&state->hs_dbr_lock);
222 		return;
223 	}
224 	ASSERT(dbr_index < max_free);
225 	((uint64_t *)(void *)kvaddr)[dbr_index] = pagep->upg_firstfree;
226 	pagep->upg_firstfree = dbr_index;
227 	pagep->upg_nfree++;
228 	mutex_exit(&state->hs_dbr_lock);
229 
230 	/* XXX still need to unlink and free struct */
231 	/* XXX munmap needs to be managed */
232 }
233 
234 /*
235  * hermon_dbr_page_alloc()
236  *	first page allocation - called from attach or open
237  *	in this case, we want exactly one page per call, and aligned on a
238  *	page - and may need to be mapped to the user for access
239  */
240 int
241 hermon_dbr_page_alloc(hermon_state_t *state, hermon_dbr_info_t **dinfo)
242 {
243 	int			status;
244 	ddi_dma_handle_t	dma_hdl;
245 	ddi_acc_handle_t	acc_hdl;
246 	ddi_dma_attr_t		dma_attr;
247 	ddi_dma_cookie_t	cookie;
248 	uint_t			cookie_cnt;
249 	int			i;
250 	hermon_dbr_info_t 	*info;
251 	caddr_t			dmaaddr;
252 	uint64_t		dmalen;
253 
254 	info = kmem_zalloc(sizeof (hermon_dbr_info_t), KM_SLEEP);
255 
256 	/*
257 	 * Initialize many of the default DMA attributes.  Then set additional
258 	 * alignment restrictions if necessary for the dbr memory, meaning
259 	 * page aligned.  Also use the configured value for IOMMU bypass
260 	 */
261 	hermon_dma_attr_init(state, &dma_attr);
262 	dma_attr.dma_attr_align = PAGESIZE;
263 	dma_attr.dma_attr_sgllen = 1;	/* make sure only one cookie */
264 
265 	status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
266 	    DDI_DMA_SLEEP, NULL, &dma_hdl);
267 	if (status != DDI_SUCCESS) {
268 		kmem_free((void *)info, sizeof (hermon_dbr_info_t));
269 		cmn_err(CE_NOTE, "dbr DMA handle alloc failed\n");
270 		return (DDI_FAILURE);
271 	}
272 
273 	status = ddi_dma_mem_alloc(dma_hdl, PAGESIZE,
274 	    &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
275 	    NULL, &dmaaddr, (size_t *)&dmalen, &acc_hdl);
276 	if (status != DDI_SUCCESS)	{
277 		ddi_dma_free_handle(&dma_hdl);
278 		cmn_err(CE_CONT, "dbr DMA mem alloc failed(status %d)", status);
279 		kmem_free((void *)info, sizeof (hermon_dbr_info_t));
280 		return (DDI_FAILURE);
281 	}
282 
283 	/* this memory won't be IB registered, so do the bind here */
284 	status = ddi_dma_addr_bind_handle(dma_hdl, NULL,
285 	    dmaaddr, (size_t)dmalen, DDI_DMA_RDWR |
286 	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt);
287 	if (status != DDI_SUCCESS) {
288 		ddi_dma_mem_free(&acc_hdl);
289 		ddi_dma_free_handle(&dma_hdl);
290 		kmem_free((void *)info, sizeof (hermon_dbr_info_t));
291 		cmn_err(CE_CONT, "dbr DMA bind handle failed (status %d)",
292 		    status);
293 		return (DDI_FAILURE);
294 	}
295 	*dinfo = info;		/* Pass back the pointer */
296 
297 	/* init the info structure with returned info */
298 	info->dbr_dmahdl = dma_hdl;
299 	info->dbr_acchdl = acc_hdl;
300 	info->dbr_page   = (hermon_dbr_t *)(void *)dmaaddr;
301 	info->dbr_link = NULL;
302 	/* extract the phys addr from the cookie */
303 	info->dbr_paddr = cookie.dmac_laddress;
304 	info->dbr_firstfree = 0;
305 	info->dbr_nfree = HERMON_NUM_DBR_PER_PAGE;
306 	/* link all DBrs onto the free list */
307 	for (i = 0; i < HERMON_NUM_DBR_PER_PAGE; i++) {
308 		info->dbr_page[i] = i + 1;
309 	}
310 
311 	return (DDI_SUCCESS);
312 }
313 
314 
315 /*
316  * hermon_dbr_alloc()
317  *	DBr record allocation - called from alloc cq/qp/srq
318  *	will check for available dbrs in current
319  *	page - if needed it will allocate another and link them
320  */
321 
322 int
323 hermon_dbr_alloc(hermon_state_t *state, uint_t index, ddi_acc_handle_t *acchdl,
324     hermon_dbr_t **vdbr, uint64_t *pdbr, uint64_t *mapoffset)
325 {
326 	hermon_dbr_t		*record = NULL;
327 	hermon_dbr_info_t	*info = NULL;
328 	uint32_t		idx;
329 	int			status;
330 
331 	if (index != state->hs_kernel_uar_index)
332 		return (hermon_user_dbr_alloc(state, index, acchdl, vdbr, pdbr,
333 		    mapoffset));
334 
335 	mutex_enter(&state->hs_dbr_lock);
336 	for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link)
337 		if (info->dbr_nfree != 0)
338 			break;		/* found a page w/ one available */
339 
340 	if (info == NULL) {	/* did NOT find a page with one available */
341 		status = hermon_dbr_page_alloc(state, &info);
342 		if (status != DDI_SUCCESS) {
343 			/* do error handling */
344 			mutex_exit(&state->hs_dbr_lock);
345 			return (DDI_FAILURE);
346 		}
347 		/* got a new page, so link it in. */
348 		info->dbr_link = state->hs_kern_dbr;
349 		state->hs_kern_dbr = info;
350 	}
351 	idx = info->dbr_firstfree;
352 	record = info->dbr_page + idx;
353 	info->dbr_firstfree = *record;
354 	info->dbr_nfree--;
355 	*record = 0;
356 
357 	*acchdl = info->dbr_acchdl;
358 	*vdbr = record;
359 	*pdbr = info->dbr_paddr + idx * sizeof (hermon_dbr_t);
360 	mutex_exit(&state->hs_dbr_lock);
361 	return (DDI_SUCCESS);
362 }
363 
364 /*
365  * hermon_dbr_free()
366  *	DBr record deallocation - called from free cq/qp
367  *	will update the counter in the header, and invalidate
368  *	the dbr, but will NEVER free pages of dbrs - small
369  *	price to pay, but userland access never will anyway
370  */
371 void
372 hermon_dbr_free(hermon_state_t *state, uint_t indx, hermon_dbr_t *record)
373 {
374 	hermon_dbr_t		*page;
375 	hermon_dbr_info_t	*info;
376 
377 	if (indx != state->hs_kernel_uar_index) {
378 		hermon_user_dbr_free(state, indx, record);
379 		return;
380 	}
381 	page = (hermon_dbr_t *)(uintptr_t)((uintptr_t)record & PAGEMASK);
382 	mutex_enter(&state->hs_dbr_lock);
383 	for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link)
384 		if (info->dbr_page == page)
385 			break;
386 	ASSERT(info != NULL);
387 	*record = info->dbr_firstfree;
388 	info->dbr_firstfree = record - info->dbr_page;
389 	info->dbr_nfree++;
390 	mutex_exit(&state->hs_dbr_lock);
391 }
392 
393 /*
394  * hermon_dbr_kern_free()
395  *    Context: Can be called only from detach context.
396  *
397  *	Free all kernel dbr pages.  This includes the freeing of all the dma
398  *	resources acquired during the allocation of the pages.
399  *
400  *	Also, free all the user dbr pages.
401  */
402 void
403 hermon_dbr_kern_free(hermon_state_t *state)
404 {
405 	hermon_dbr_info_t	*info, *link;
406 	hermon_user_dbr_t	*udbr, *next;
407 	hermon_udbr_page_t	*pagep, *nextp;
408 	hermon_umap_db_entry_t	*umapdb;
409 	int			instance, status;
410 	uint64_t		value;
411 	extern			hermon_umap_db_t hermon_userland_rsrc_db;
412 
413 	mutex_enter(&state->hs_dbr_lock);
414 	for (info = state->hs_kern_dbr; info != NULL; info = link) {
415 		(void) ddi_dma_unbind_handle(info->dbr_dmahdl);
416 		ddi_dma_mem_free(&info->dbr_acchdl);	/* free page */
417 		ddi_dma_free_handle(&info->dbr_dmahdl);
418 		link = info->dbr_link;
419 		kmem_free(info, sizeof (hermon_dbr_info_t));
420 	}
421 
422 	udbr = state->hs_user_dbr;
423 	instance = state->hs_instance;
424 	mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
425 	while (udbr != NULL) {
426 		pagep = udbr->udbr_pagep;
427 		while (pagep != NULL) {
428 			/* probably need to remove "db" */
429 			(void) ddi_dma_unbind_handle(pagep->upg_dmahdl);
430 			ddi_dma_free_handle(&pagep->upg_dmahdl);
431 			freerbuf(pagep->upg_buf);
432 			ddi_umem_free(pagep->upg_umemcookie);
433 			status = hermon_umap_db_find_nolock(instance,
434 			    HERMON_DBR_KEY(udbr->udbr_index,
435 			    pagep->upg_index), MLNX_UMAP_DBRMEM_RSRC,
436 			    &value, HERMON_UMAP_DB_REMOVE, &umapdb);
437 			if (status == DDI_SUCCESS)
438 				hermon_umap_db_free(umapdb);
439 			nextp = pagep->upg_link;
440 			kmem_free(pagep, sizeof (*pagep));
441 			pagep = nextp;
442 		}
443 		next = udbr->udbr_link;
444 		kmem_free(udbr, sizeof (*udbr));
445 		udbr = next;
446 	}
447 	mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
448 	mutex_exit(&state->hs_dbr_lock);
449 }
450 
451 /*
452  * hermon_ah_alloc()
453  *    Context: Can be called only from user or kernel context.
454  */
455 int
456 hermon_ah_alloc(hermon_state_t *state, hermon_pdhdl_t pd,
457     ibt_adds_vect_t *attr_p, hermon_ahhdl_t *ahhdl, uint_t sleepflag)
458 {
459 	hermon_rsrc_t		*rsrc;
460 	hermon_hw_udav_t	*udav;
461 	hermon_ahhdl_t		ah;
462 	int			status;
463 
464 	/*
465 	 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to
466 	 * indicate that we wish to allocate an "invalid" (i.e. empty)
467 	 * address handle XXX
468 	 */
469 
470 	/* Validate that specified port number is legal */
471 	if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) {
472 		return (IBT_HCA_PORT_INVALID);
473 	}
474 
475 	/*
476 	 * Allocate the software structure for tracking the address handle
477 	 * (i.e. the Hermon Address Handle struct).
478 	 */
479 	status = hermon_rsrc_alloc(state, HERMON_AHHDL, 1, sleepflag, &rsrc);
480 	if (status != DDI_SUCCESS) {
481 		return (IBT_INSUFF_RESOURCE);
482 	}
483 	ah = (hermon_ahhdl_t)rsrc->hr_addr;
484 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
485 
486 	/* Increment the reference count on the protection domain (PD) */
487 	hermon_pd_refcnt_inc(pd);
488 
489 	udav = (hermon_hw_udav_t *)kmem_zalloc(sizeof (hermon_hw_udav_t),
490 	    KM_SLEEP);
491 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav))
492 
493 	/*
494 	 * Fill in the UDAV data. We first zero out the UDAV, then populate
495 	 * it by then calling hermon_set_addr_path() to fill in the common
496 	 * portions that can be pulled from the "ibt_adds_vect_t" passed in
497 	 */
498 	status = hermon_set_addr_path(state, attr_p,
499 	    (hermon_hw_addr_path_t *)udav, HERMON_ADDRPATH_UDAV);
500 	if (status != DDI_SUCCESS) {
501 		hermon_pd_refcnt_dec(pd);
502 		hermon_rsrc_free(state, &rsrc);
503 		return (status);
504 	}
505 	udav->pd	= pd->pd_pdnum;
506 	udav->sl	= attr_p->av_srvl;
507 
508 	/*
509 	 * Fill in the rest of the Hermon Address Handle struct.
510 	 *
511 	 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field
512 	 * here because we may need to return it later to the IBTF (as a
513 	 * result of a subsequent query operation).  Unlike the other UDAV
514 	 * parameters, the value of "av_dgid.gid_guid" is not always preserved.
515 	 * The reason for this is described in hermon_set_addr_path().
516 	 */
517 	ah->ah_rsrcp	 = rsrc;
518 	ah->ah_pdhdl	 = pd;
519 	ah->ah_udav	 = udav;
520 	ah->ah_save_guid = attr_p->av_dgid.gid_guid;
521 	*ahhdl = ah;
522 
523 	return (DDI_SUCCESS);
524 }
525 
526 
527 /*
528  * hermon_ah_free()
529  *    Context: Can be called only from user or kernel context.
530  */
531 /* ARGSUSED */
532 int
533 hermon_ah_free(hermon_state_t *state, hermon_ahhdl_t *ahhdl, uint_t sleepflag)
534 {
535 	hermon_rsrc_t		*rsrc;
536 	hermon_pdhdl_t		pd;
537 	hermon_ahhdl_t		ah;
538 
539 	/*
540 	 * Pull all the necessary information from the Hermon Address Handle
541 	 * struct.  This is necessary here because the resource for the
542 	 * AH is going to be freed up as part of this operation.
543 	 */
544 	ah    = *ahhdl;
545 	mutex_enter(&ah->ah_lock);
546 	rsrc  = ah->ah_rsrcp;
547 	pd    = ah->ah_pdhdl;
548 	mutex_exit(&ah->ah_lock);
549 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
550 
551 	/* Free the UDAV memory */
552 	kmem_free(ah->ah_udav, sizeof (hermon_hw_udav_t));
553 
554 	/* Decrement the reference count on the protection domain (PD) */
555 	hermon_pd_refcnt_dec(pd);
556 
557 	/* Free the Hermon Address Handle structure */
558 	hermon_rsrc_free(state, &rsrc);
559 
560 	/* Set the ahhdl pointer to NULL and return success */
561 	*ahhdl = NULL;
562 
563 	return (DDI_SUCCESS);
564 }
565 
566 
567 /*
568  * hermon_ah_query()
569  *    Context: Can be called from interrupt or base context.
570  */
571 /* ARGSUSED */
572 int
573 hermon_ah_query(hermon_state_t *state, hermon_ahhdl_t ah, hermon_pdhdl_t *pd,
574     ibt_adds_vect_t *attr_p)
575 {
576 	mutex_enter(&ah->ah_lock);
577 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p))
578 
579 	/*
580 	 * Pull the PD and UDAV from the Hermon Address Handle structure
581 	 */
582 	*pd = ah->ah_pdhdl;
583 
584 	/*
585 	 * Fill in "ibt_adds_vect_t".  We call hermon_get_addr_path() to fill
586 	 * the common portions that can be pulled from the UDAV we pass in.
587 	 *
588 	 * NOTE: We will also fill the "av_dgid.gid_guid" field from the
589 	 * "ah_save_guid" field we have previously saved away.  The reason
590 	 * for this is described in hermon_ah_alloc() and hermon_ah_modify().
591 	 */
592 	hermon_get_addr_path(state, (hermon_hw_addr_path_t *)ah->ah_udav,
593 	    attr_p, HERMON_ADDRPATH_UDAV);
594 
595 	attr_p->av_dgid.gid_guid = ah->ah_save_guid;
596 
597 	mutex_exit(&ah->ah_lock);
598 	return (DDI_SUCCESS);
599 }
600 
601 
602 /*
603  * hermon_ah_modify()
604  *    Context: Can be called from interrupt or base context.
605  */
606 /* ARGSUSED */
607 int
608 hermon_ah_modify(hermon_state_t *state, hermon_ahhdl_t ah,
609     ibt_adds_vect_t *attr_p)
610 {
611 	hermon_hw_udav_t	old_udav;
612 	uint64_t		data_old;
613 	int			status, size, i;
614 
615 	/* Validate that specified port number is legal */
616 	if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) {
617 		return (IBT_HCA_PORT_INVALID);
618 	}
619 
620 	mutex_enter(&ah->ah_lock);
621 
622 	/* Save a copy of the current UDAV data in old_udav. */
623 	bcopy(ah->ah_udav, &old_udav, sizeof (hermon_hw_udav_t));
624 
625 	/*
626 	 * Fill in the new UDAV with the caller's data, passed in via the
627 	 * "ibt_adds_vect_t" structure.
628 	 *
629 	 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid"
630 	 * field here (just as we did during hermon_ah_alloc()) because we
631 	 * may need to return it later to the IBTF (as a result of a
632 	 * subsequent query operation).  As explained in hermon_ah_alloc(),
633 	 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid"
634 	 * is not always preserved. The reason for this is described in
635 	 * hermon_set_addr_path().
636 	 */
637 	status = hermon_set_addr_path(state, attr_p,
638 	    (hermon_hw_addr_path_t *)ah->ah_udav, HERMON_ADDRPATH_UDAV);
639 	if (status != DDI_SUCCESS) {
640 		mutex_exit(&ah->ah_lock);
641 		return (status);
642 	}
643 	ah->ah_save_guid = attr_p->av_dgid.gid_guid;
644 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(ah->ah_udav)))
645 	ah->ah_udav->sl  = attr_p->av_srvl;
646 
647 	/*
648 	 * Copy changes into the new UDAV.
649 	 *    Note:  We copy in 64-bit chunks.  For the first two of these
650 	 *    chunks it is necessary to read the current contents of the
651 	 *    UDAV, mask off the modifiable portions (maintaining any
652 	 *    of the "reserved" portions), and then mask on the new data.
653 	 */
654 	size = sizeof (hermon_hw_udav_t) >> 3;
655 	for (i = 0; i < size; i++) {
656 		data_old = ((uint64_t *)&old_udav)[i];
657 
658 		/*
659 		 * Apply mask to change only the relevant values.
660 		 */
661 		if (i == 0) {
662 			data_old = data_old & HERMON_UDAV_MODIFY_MASK0;
663 		} else if (i == 1) {
664 			data_old = data_old & HERMON_UDAV_MODIFY_MASK1;
665 		} else {
666 			data_old = 0;
667 		}
668 
669 		/* Store the updated values to the UDAV */
670 		((uint64_t *)ah->ah_udav)[i] |= data_old;
671 	}
672 
673 	/*
674 	 * Put the valid PD number back into the UDAV entry, as it
675 	 * might have been clobbered above.
676 	 */
677 	ah->ah_udav->pd = old_udav.pd;
678 
679 
680 	mutex_exit(&ah->ah_lock);
681 	return (DDI_SUCCESS);
682 }
683 
684 /*
685  * hermon_mcg_attach()
686  *    Context: Can be called only from user or kernel context.
687  */
688 int
689 hermon_mcg_attach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid,
690     ib_lid_t lid)
691 {
692 	hermon_rsrc_t		*rsrc;
693 	hermon_hw_mcg_t		*mcg_entry;
694 	hermon_hw_mcg_qp_list_t	*mcg_entry_qplist;
695 	hermon_mcghdl_t		mcg, newmcg;
696 	uint64_t		mgid_hash;
697 	uint32_t		end_indx;
698 	int			status;
699 	uint_t			qp_found;
700 
701 	/*
702 	 * It is only allowed to attach MCG to UD queue pairs.  Verify
703 	 * that the intended QP is of the appropriate transport type
704 	 */
705 	if (qp->qp_serv_type != HERMON_QP_UD) {
706 		return (IBT_QP_SRV_TYPE_INVALID);
707 	}
708 
709 	/*
710 	 * Check for invalid Multicast DLID.  Specifically, all Multicast
711 	 * LIDs should be within a well defined range.  If the specified LID
712 	 * is outside of that range, then return an error.
713 	 */
714 	if (hermon_mlid_is_valid(lid) == 0) {
715 		return (IBT_MC_MLID_INVALID);
716 	}
717 	/*
718 	 * Check for invalid Multicast GID.  All Multicast GIDs should have
719 	 * a well-defined pattern of bits and flags that are allowable.  If
720 	 * the specified GID does not meet the criteria, then return an error.
721 	 */
722 	if (hermon_mgid_is_valid(gid) == 0) {
723 		return (IBT_MC_MGID_INVALID);
724 	}
725 
726 	/*
727 	 * Compute the MGID hash value.  Since the MCG table is arranged as
728 	 * a number of separate hash chains, this operation converts the
729 	 * specified MGID into the starting index of an entry in the hash
730 	 * table (i.e. the index for the start of the appropriate hash chain).
731 	 * Subsequent operations below will walk the chain searching for the
732 	 * right place to add this new QP.
733 	 */
734 	status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
735 	    &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT());
736 	if (status != HERMON_CMD_SUCCESS) {
737 		cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n",
738 		    status);
739 		if (status == HERMON_CMD_INVALID_STATUS) {
740 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
741 		}
742 		return (ibc_get_ci_failure(0));
743 	}
744 
745 	/*
746 	 * Grab the multicast group mutex.  Then grab the pre-allocated
747 	 * temporary buffer used for holding and/or modifying MCG entries.
748 	 * Zero out the temporary MCG entry before we begin.
749 	 */
750 	mutex_enter(&state->hs_mcglock);
751 	mcg_entry = state->hs_mcgtmp;
752 	mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry);
753 	bzero(mcg_entry, HERMON_MCGMEM_SZ(state));
754 
755 	/*
756 	 * Walk through the array of MCG entries starting at "mgid_hash".
757 	 * Try to find the appropriate place for this new QP to be added.
758 	 * This could happen when the first entry of the chain has MGID == 0
759 	 * (which means that the hash chain is empty), or because we find
760 	 * an entry with the same MGID (in which case we'll add the QP to
761 	 * that MCG), or because we come to the end of the chain (in which
762 	 * case this is the first QP being added to the multicast group that
763 	 * corresponds to the MGID.  The hermon_mcg_walk_mgid_hash() routine
764 	 * walks the list and returns an index into the MCG table.  The entry
765 	 * at this index is then checked to determine which case we have
766 	 * fallen into (see below).  Note:  We are using the "shadow" MCG
767 	 * list (of hermon_mcg_t structs) for this lookup because the real
768 	 * MCG entries are in hardware (and the lookup process would be much
769 	 * more time consuming).
770 	 */
771 	end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL);
772 	mcg	 = &state->hs_mcghdl[end_indx];
773 
774 	/*
775 	 * If MGID == 0, then the hash chain is empty.  Just fill in the
776 	 * current entry.  Note:  No need to allocate an MCG table entry
777 	 * as all the hash chain "heads" are already preallocated.
778 	 */
779 	if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) {
780 
781 		/* Fill in the current entry in the "shadow" MCG list */
782 		hermon_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL);
783 
784 		/*
785 		 * Try to add the new QP number to the list.  This (and the
786 		 * above) routine fills in a temporary MCG.  The "mcg_entry"
787 		 * and "mcg_entry_qplist" pointers simply point to different
788 		 * offsets within the same temporary copy of the MCG (for
789 		 * convenience).  Note:  If this fails, we need to invalidate
790 		 * the entries we've already put into the "shadow" list entry
791 		 * above.
792 		 */
793 		status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
794 		    &qp_found);
795 		if (status != DDI_SUCCESS) {
796 			bzero(mcg, sizeof (struct hermon_sw_mcg_list_s));
797 			mutex_exit(&state->hs_mcglock);
798 			return (status);
799 		}
800 		if (!qp_found)
801 			mcg_entry->member_cnt = (mcg->mcg_num_qps + 1);
802 			    /* set the member count */
803 
804 		/*
805 		 * Once the temporary MCG has been filled in, write the entry
806 		 * into the appropriate location in the Hermon MCG entry table.
807 		 * If it's successful, then drop the lock and return success.
808 		 * Note: In general, this operation shouldn't fail.  If it
809 		 * does, then it is an indication that something (probably in
810 		 * HW, but maybe in SW) has gone seriously wrong.  We still
811 		 * want to zero out the entries that we've filled in above
812 		 * (in the hermon_mcg_setup_new_hdr() routine).
813 		 */
814 		status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
815 		    HERMON_CMD_NOSLEEP_SPIN);
816 		if (status != HERMON_CMD_SUCCESS) {
817 			bzero(mcg, sizeof (struct hermon_sw_mcg_list_s));
818 			mutex_exit(&state->hs_mcglock);
819 			HERMON_WARNING(state, "failed to write MCG entry");
820 			cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
821 			    "%08x\n", status);
822 			if (status == HERMON_CMD_INVALID_STATUS) {
823 				hermon_fm_ereport(state, HCA_SYS_ERR,
824 				    HCA_ERR_SRV_LOST);
825 			}
826 			return (ibc_get_ci_failure(0));
827 		}
828 
829 		/*
830 		 * Now that we know all the Hermon firmware accesses have been
831 		 * successful, we update the "shadow" MCG entry by incrementing
832 		 * the "number of attached QPs" count.
833 		 *
834 		 * We increment only if the QP is not already part of the
835 		 * MCG by checking the 'qp_found' flag returned from the
836 		 * qplist_add above.
837 		 */
838 		if (!qp_found) {
839 			mcg->mcg_num_qps++;
840 
841 			/*
842 			 * Increment the refcnt for this QP.  Because the QP
843 			 * was added to this MCG, the refcnt must be
844 			 * incremented.
845 			 */
846 			hermon_qp_mcg_refcnt_inc(qp);
847 		}
848 
849 		/*
850 		 * We drop the lock and return success.
851 		 */
852 		mutex_exit(&state->hs_mcglock);
853 		return (DDI_SUCCESS);
854 	}
855 
856 	/*
857 	 * If the specified MGID matches the MGID in the current entry, then
858 	 * we need to try to add the QP to the current MCG entry.  In this
859 	 * case, it means that we need to read the existing MCG entry (into
860 	 * the temporary MCG), add the new QP number to the temporary entry
861 	 * (using the same method we used above), and write the entry back
862 	 * to the hardware (same as above).
863 	 */
864 	if ((mcg->mcg_mgid_h == gid.gid_prefix) &&
865 	    (mcg->mcg_mgid_l == gid.gid_guid)) {
866 
867 		/*
868 		 * Read the current MCG entry into the temporary MCG.  Note:
869 		 * In general, this operation shouldn't fail.  If it does,
870 		 * then it is an indication that something (probably in HW,
871 		 * but maybe in SW) has gone seriously wrong.
872 		 */
873 		status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
874 		    HERMON_CMD_NOSLEEP_SPIN);
875 		if (status != HERMON_CMD_SUCCESS) {
876 			mutex_exit(&state->hs_mcglock);
877 			HERMON_WARNING(state, "failed to read MCG entry");
878 			cmn_err(CE_CONT, "Hermon: READ_MGM command failed: "
879 			    "%08x\n", status);
880 			if (status == HERMON_CMD_INVALID_STATUS) {
881 				hermon_fm_ereport(state, HCA_SYS_ERR,
882 				    HCA_ERR_SRV_LOST);
883 			}
884 			return (ibc_get_ci_failure(0));
885 		}
886 
887 		/*
888 		 * Try to add the new QP number to the list.  This routine
889 		 * fills in the necessary pieces of the temporary MCG.  The
890 		 * "mcg_entry_qplist" pointer is used to point to the portion
891 		 * of the temporary MCG that holds the QP numbers.
892 		 *
893 		 * Note: hermon_mcg_qplist_add() returns SUCCESS if it
894 		 * already found the QP in the list.  In this case, the QP is
895 		 * not added on to the list again.  Check the flag 'qp_found'
896 		 * if this value is needed to be known.
897 		 *
898 		 */
899 		status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
900 		    &qp_found);
901 		if (status != DDI_SUCCESS) {
902 			mutex_exit(&state->hs_mcglock);
903 			return (status);
904 		}
905 		if (!qp_found)
906 			mcg_entry->member_cnt = (mcg->mcg_num_qps + 1);
907 			    /* set the member count */
908 
909 		/*
910 		 * Once the temporary MCG has been updated, write the entry
911 		 * into the appropriate location in the Hermon MCG entry table.
912 		 * If it's successful, then drop the lock and return success.
913 		 * Note: In general, this operation shouldn't fail.  If it
914 		 * does, then it is an indication that something (probably in
915 		 * HW, but maybe in SW) has gone seriously wrong.
916 		 */
917 		status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
918 		    HERMON_CMD_NOSLEEP_SPIN);
919 		if (status != HERMON_CMD_SUCCESS) {
920 			mutex_exit(&state->hs_mcglock);
921 			HERMON_WARNING(state, "failed to write MCG entry");
922 			cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
923 			    "%08x\n", status);
924 			if (status == HERMON_CMD_INVALID_STATUS) {
925 				hermon_fm_ereport(state, HCA_SYS_ERR,
926 				    HCA_ERR_SRV_LOST);
927 			}
928 			return (ibc_get_ci_failure(0));
929 		}
930 
931 		/*
932 		 * Now that we know all the Hermon firmware accesses have been
933 		 * successful, we update the current "shadow" MCG entry by
934 		 * incrementing the "number of attached QPs" count.
935 		 *
936 		 * We increment only if the QP is not already part of the
937 		 * MCG by checking the 'qp_found' flag returned
938 		 * hermon_mcg_walk_mgid_hashfrom the qplist_add above.
939 		 */
940 		if (!qp_found) {
941 			mcg->mcg_num_qps++;
942 
943 			/*
944 			 * Increment the refcnt for this QP.  Because the QP
945 			 * was added to this MCG, the refcnt must be
946 			 * incremented.
947 			 */
948 			hermon_qp_mcg_refcnt_inc(qp);
949 		}
950 
951 		/*
952 		 * We drop the lock and return success.
953 		 */
954 		mutex_exit(&state->hs_mcglock);
955 		return (DDI_SUCCESS);
956 	}
957 
958 	/*
959 	 * If we've reached here, then we're at the end of the hash chain.
960 	 * We need to allocate a new MCG entry, fill it in, write it to Hermon,
961 	 * and update the previous entry to link the new one to the end of the
962 	 * chain.
963 	 */
964 
965 	/*
966 	 * Allocate an MCG table entry.  This will be filled in with all
967 	 * the necessary parameters to define the multicast group.  Then it
968 	 * will be written to the hardware in the next-to-last step below.
969 	 */
970 	status = hermon_rsrc_alloc(state, HERMON_MCG, 1, HERMON_NOSLEEP, &rsrc);
971 	if (status != DDI_SUCCESS) {
972 		mutex_exit(&state->hs_mcglock);
973 		return (IBT_INSUFF_RESOURCE);
974 	}
975 
976 	/*
977 	 * Fill in the new entry in the "shadow" MCG list.  Note:  Just as
978 	 * it does above, hermon_mcg_setup_new_hdr() also fills in a portion
979 	 * of the temporary MCG entry (the rest of which will be filled in by
980 	 * hermon_mcg_qplist_add() below)
981 	 */
982 	newmcg = &state->hs_mcghdl[rsrc->hr_indx];
983 	hermon_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc);
984 
985 	/*
986 	 * Try to add the new QP number to the list.  This routine fills in
987 	 * the final necessary pieces of the temporary MCG.  The
988 	 * "mcg_entry_qplist" pointer is used to point to the portion of the
989 	 * temporary MCG that holds the QP numbers.  If we fail here, we
990 	 * must undo the previous resource allocation.
991 	 *
992 	 * Note: hermon_mcg_qplist_add() can we return SUCCESS if it already
993 	 * found the QP in the list.  In this case, the QP is not added on to
994 	 * the list again.  Check the flag 'qp_found' if this value is needed
995 	 * to be known.
996 	 */
997 	status = hermon_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp,
998 	    &qp_found);
999 	if (status != DDI_SUCCESS) {
1000 		bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1001 		hermon_rsrc_free(state, &rsrc);
1002 		mutex_exit(&state->hs_mcglock);
1003 		return (status);
1004 	}
1005 	mcg_entry->member_cnt = (newmcg->mcg_num_qps + 1);
1006 	    /* set the member count */
1007 
1008 	/*
1009 	 * Once the temporary MCG has been updated, write the entry into the
1010 	 * appropriate location in the Hermon MCG entry table.  If this is
1011 	 * successful, then we need to chain the previous entry to this one.
1012 	 * Note: In general, this operation shouldn't fail.  If it does, then
1013 	 * it is an indication that something (probably in HW, but maybe in
1014 	 * SW) has gone seriously wrong.
1015 	 */
1016 	status = hermon_write_mgm_cmd_post(state, mcg_entry, rsrc->hr_indx,
1017 	    HERMON_CMD_NOSLEEP_SPIN);
1018 	if (status != HERMON_CMD_SUCCESS) {
1019 		bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1020 		hermon_rsrc_free(state, &rsrc);
1021 		mutex_exit(&state->hs_mcglock);
1022 		HERMON_WARNING(state, "failed to write MCG entry");
1023 		cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1024 		    status);
1025 		if (status == HERMON_CMD_INVALID_STATUS) {
1026 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1027 		}
1028 		return (ibc_get_ci_failure(0));
1029 	}
1030 
1031 	/*
1032 	 * Now read the current MCG entry (the one previously at the end of
1033 	 * hash chain) into the temporary MCG.  We are going to update its
1034 	 * "next_gid_indx" now and write the entry back to the MCG table.
1035 	 * Note:  In general, this operation shouldn't fail.  If it does, then
1036 	 * it is an indication that something (probably in HW, but maybe in SW)
1037 	 * has gone seriously wrong.  We will free up the MCG entry resource,
1038 	 * but we will not undo the previously written MCG entry in the HW.
1039 	 * This is OK, though, because the MCG entry is not currently attached
1040 	 * to any hash chain.
1041 	 */
1042 	status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
1043 	    HERMON_CMD_NOSLEEP_SPIN);
1044 	if (status != HERMON_CMD_SUCCESS) {
1045 		bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1046 		hermon_rsrc_free(state, &rsrc);
1047 		mutex_exit(&state->hs_mcglock);
1048 		HERMON_WARNING(state, "failed to read MCG entry");
1049 		cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1050 		    status);
1051 		if (status == HERMON_CMD_INVALID_STATUS) {
1052 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1053 		}
1054 		return (ibc_get_ci_failure(0));
1055 	}
1056 
1057 	/*
1058 	 * Finally, we update the "next_gid_indx" field in the temporary MCG
1059 	 * and attempt to write the entry back into the Hermon MCG table.  If
1060 	 * this succeeds, then we update the "shadow" list to reflect the
1061 	 * change, drop the lock, and return success.  Note:  In general, this
1062 	 * operation shouldn't fail.  If it does, then it is an indication
1063 	 * that something (probably in HW, but maybe in SW) has gone seriously
1064 	 * wrong.  Just as we do above, we will free up the MCG entry resource,
1065 	 * but we will not try to undo the previously written MCG entry.  This
1066 	 * is OK, though, because (since we failed here to update the end of
1067 	 * the chain) that other entry is not currently attached to any chain.
1068 	 */
1069 	mcg_entry->next_gid_indx = rsrc->hr_indx;
1070 	status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
1071 	    HERMON_CMD_NOSLEEP_SPIN);
1072 	if (status != HERMON_CMD_SUCCESS) {
1073 		bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1074 		hermon_rsrc_free(state, &rsrc);
1075 		mutex_exit(&state->hs_mcglock);
1076 		HERMON_WARNING(state, "failed to write MCG entry");
1077 		cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1078 		    status);
1079 		if (status == HERMON_CMD_INVALID_STATUS) {
1080 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1081 		}
1082 		return (ibc_get_ci_failure(0));
1083 	}
1084 	mcg = &state->hs_mcghdl[end_indx];
1085 	mcg->mcg_next_indx = rsrc->hr_indx;
1086 
1087 	/*
1088 	 * Now that we know all the Hermon firmware accesses have been
1089 	 * successful, we update the new "shadow" MCG entry by incrementing
1090 	 * the "number of attached QPs" count.  Then we drop the lock and
1091 	 * return success.
1092 	 */
1093 	newmcg->mcg_num_qps++;
1094 
1095 	/*
1096 	 * Increment the refcnt for this QP.  Because the QP
1097 	 * was added to this MCG, the refcnt must be
1098 	 * incremented.
1099 	 */
1100 	hermon_qp_mcg_refcnt_inc(qp);
1101 
1102 	mutex_exit(&state->hs_mcglock);
1103 	return (DDI_SUCCESS);
1104 }
1105 
1106 
1107 /*
1108  * hermon_mcg_detach()
1109  *    Context: Can be called only from user or kernel context.
1110  */
1111 int
1112 hermon_mcg_detach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid,
1113     ib_lid_t lid)
1114 {
1115 	hermon_hw_mcg_t		*mcg_entry;
1116 	hermon_hw_mcg_qp_list_t	*mcg_entry_qplist;
1117 	hermon_mcghdl_t		mcg;
1118 	uint64_t		mgid_hash;
1119 	uint32_t		end_indx, prev_indx;
1120 	int			status;
1121 
1122 	/*
1123 	 * Check for invalid Multicast DLID.  Specifically, all Multicast
1124 	 * LIDs should be within a well defined range.  If the specified LID
1125 	 * is outside of that range, then return an error.
1126 	 */
1127 	if (hermon_mlid_is_valid(lid) == 0) {
1128 		return (IBT_MC_MLID_INVALID);
1129 	}
1130 
1131 	/*
1132 	 * Compute the MGID hash value.  As described above, the MCG table is
1133 	 * arranged as a number of separate hash chains.  This operation
1134 	 * converts the specified MGID into the starting index of an entry in
1135 	 * the hash table (i.e. the index for the start of the appropriate
1136 	 * hash chain).  Subsequent operations below will walk the chain
1137 	 * searching for a matching entry from which to attempt to remove
1138 	 * the specified QP.
1139 	 */
1140 	status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
1141 	    &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT());
1142 	if (status != HERMON_CMD_SUCCESS) {
1143 		cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n",
1144 		    status);
1145 		if (status == HERMON_CMD_INVALID_STATUS) {
1146 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1147 		}
1148 		return (ibc_get_ci_failure(0));
1149 	}
1150 
1151 	/*
1152 	 * Grab the multicast group mutex.  Then grab the pre-allocated
1153 	 * temporary buffer used for holding and/or modifying MCG entries.
1154 	 */
1155 	mutex_enter(&state->hs_mcglock);
1156 	mcg_entry = state->hs_mcgtmp;
1157 	mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry);
1158 
1159 	/*
1160 	 * Walk through the array of MCG entries starting at "mgid_hash".
1161 	 * Try to find an MCG entry with a matching MGID.  The
1162 	 * hermon_mcg_walk_mgid_hash() routine walks the list and returns an
1163 	 * index into the MCG table.  The entry at this index is checked to
1164 	 * determine whether it is a match or not.  If it is a match, then
1165 	 * we continue on to attempt to remove the QP from the MCG.  If it
1166 	 * is not a match (or not a valid MCG entry), then we return an error.
1167 	 */
1168 	end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx);
1169 	mcg	 = &state->hs_mcghdl[end_indx];
1170 
1171 	/*
1172 	 * If MGID == 0 (the hash chain is empty) or if the specified MGID
1173 	 * does not match the MGID in the current entry, then return
1174 	 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not
1175 	 * valid).
1176 	 */
1177 	if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) ||
1178 	    ((mcg->mcg_mgid_h != gid.gid_prefix) ||
1179 	    (mcg->mcg_mgid_l != gid.gid_guid))) {
1180 		mutex_exit(&state->hs_mcglock);
1181 		return (IBT_MC_MGID_INVALID);
1182 	}
1183 
1184 	/*
1185 	 * Read the current MCG entry into the temporary MCG.  Note: In
1186 	 * general, this operation shouldn't fail.  If it does, then it is
1187 	 * an indication that something (probably in HW, but maybe in SW)
1188 	 * has gone seriously wrong.
1189 	 */
1190 	status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
1191 	    HERMON_CMD_NOSLEEP_SPIN);
1192 	if (status != HERMON_CMD_SUCCESS) {
1193 		mutex_exit(&state->hs_mcglock);
1194 		HERMON_WARNING(state, "failed to read MCG entry");
1195 		cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1196 		    status);
1197 		if (status == HERMON_CMD_INVALID_STATUS) {
1198 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1199 		}
1200 		return (ibc_get_ci_failure(0));
1201 	}
1202 
1203 	/*
1204 	 * Search the QP number list for a match.  If a match is found, then
1205 	 * remove the entry from the QP list.  Otherwise, if no match is found,
1206 	 * return an error.
1207 	 */
1208 	status = hermon_mcg_qplist_remove(mcg, mcg_entry_qplist, qp);
1209 	if (status != DDI_SUCCESS) {
1210 		mutex_exit(&state->hs_mcglock);
1211 		return (status);
1212 	}
1213 
1214 	/*
1215 	 * Decrement the MCG count for this QP.  When the 'qp_mcg'
1216 	 * field becomes 0, then this QP is no longer a member of any
1217 	 * MCG.
1218 	 */
1219 	hermon_qp_mcg_refcnt_dec(qp);
1220 
1221 	/*
1222 	 * If the current MCG's QP number list is about to be made empty
1223 	 * ("mcg_num_qps" == 1), then remove the entry itself from the hash
1224 	 * chain.  Otherwise, just write the updated MCG entry back to the
1225 	 * hardware.  In either case, once we successfully update the hardware
1226 	 * chain, then we decrement the "shadow" list entry's "mcg_num_qps"
1227 	 * count (or zero out the entire "shadow" list entry) before returning
1228 	 * success.  Note:  Zeroing out the "shadow" list entry is done
1229 	 * inside of hermon_mcg_hash_list_remove().
1230 	 */
1231 	if (mcg->mcg_num_qps == 1) {
1232 
1233 		/* Remove an MCG entry from the hash chain */
1234 		status = hermon_mcg_hash_list_remove(state, end_indx, prev_indx,
1235 		    mcg_entry);
1236 		if (status != DDI_SUCCESS) {
1237 			mutex_exit(&state->hs_mcglock);
1238 			return (status);
1239 		}
1240 
1241 	} else {
1242 		/*
1243 		 * Write the updated MCG entry back to the Hermon MCG table.
1244 		 * If this succeeds, then we update the "shadow" list to
1245 		 * reflect the change (i.e. decrement the "mcg_num_qps"),
1246 		 * drop the lock, and return success.  Note:  In general,
1247 		 * this operation shouldn't fail.  If it does, then it is an
1248 		 * indication that something (probably in HW, but maybe in SW)
1249 		 * has gone seriously wrong.
1250 		 */
1251 		mcg_entry->member_cnt = (mcg->mcg_num_qps - 1);
1252 		status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
1253 		    HERMON_CMD_NOSLEEP_SPIN);
1254 		if (status != HERMON_CMD_SUCCESS) {
1255 			mutex_exit(&state->hs_mcglock);
1256 			HERMON_WARNING(state, "failed to write MCG entry");
1257 			cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
1258 			    "%08x\n", status);
1259 			if (status == HERMON_CMD_INVALID_STATUS) {
1260 				hermon_fm_ereport(state, HCA_SYS_ERR,
1261 				    HCA_ERR_SRV_LOST);
1262 			}
1263 			return (ibc_get_ci_failure(0));
1264 		}
1265 		mcg->mcg_num_qps--;
1266 	}
1267 
1268 	mutex_exit(&state->hs_mcglock);
1269 	return (DDI_SUCCESS);
1270 }
1271 
1272 /*
1273  * hermon_qp_mcg_refcnt_inc()
1274  *    Context: Can be called from interrupt or base context.
1275  */
1276 static void
1277 hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp)
1278 {
1279 	/* Increment the QP's MCG reference count */
1280 	mutex_enter(&qp->qp_lock);
1281 	qp->qp_mcg_refcnt++;
1282 	mutex_exit(&qp->qp_lock);
1283 }
1284 
1285 
1286 /*
1287  * hermon_qp_mcg_refcnt_dec()
1288  *    Context: Can be called from interrupt or base context.
1289  */
1290 static void
1291 hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp)
1292 {
1293 	/* Decrement the QP's MCG reference count */
1294 	mutex_enter(&qp->qp_lock);
1295 	qp->qp_mcg_refcnt--;
1296 	mutex_exit(&qp->qp_lock);
1297 }
1298 
1299 
1300 /*
1301  * hermon_mcg_qplist_add()
1302  *    Context: Can be called from interrupt or base context.
1303  */
1304 static int
1305 hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg,
1306     hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp,
1307     uint_t *qp_found)
1308 {
1309 	uint_t		qplist_indx;
1310 
1311 	ASSERT(MUTEX_HELD(&state->hs_mcglock));
1312 
1313 	qplist_indx = mcg->mcg_num_qps;
1314 
1315 	/*
1316 	 * Determine if we have exceeded the maximum number of QP per
1317 	 * multicast group.  If we have, then return an error
1318 	 */
1319 	if (qplist_indx >= state->hs_cfg_profile->cp_num_qp_per_mcg) {
1320 		return (IBT_HCA_MCG_QP_EXCEEDED);
1321 	}
1322 
1323 	/*
1324 	 * Determine if the QP is already attached to this MCG table.  If it
1325 	 * is, then we break out and treat this operation as a NO-OP
1326 	 */
1327 	for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps;
1328 	    qplist_indx++) {
1329 		if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) {
1330 			break;
1331 		}
1332 	}
1333 
1334 	/*
1335 	 * If the QP was already on the list, set 'qp_found' to TRUE.  We still
1336 	 * return SUCCESS in this case, but the qplist will not have been
1337 	 * updated because the QP was already on the list.
1338 	 */
1339 	if (qplist_indx < mcg->mcg_num_qps) {
1340 		*qp_found = 1;
1341 	} else {
1342 		/*
1343 		 * Otherwise, append the new QP number to the end of the
1344 		 * current QP list.  Note: We will increment the "mcg_num_qps"
1345 		 * field on the "shadow" MCG list entry later (after we know
1346 		 * that all necessary Hermon firmware accesses have been
1347 		 * successful).
1348 		 *
1349 		 * Set 'qp_found' to 0 so we know the QP was added on to the
1350 		 * list for sure.
1351 		 */
1352 		mcg_qplist[qplist_indx].qpn =
1353 		    (qp->qp_qpnum | HERMON_MCG_QPN_BLOCK_LB);
1354 		*qp_found = 0;
1355 	}
1356 
1357 	return (DDI_SUCCESS);
1358 }
1359 
1360 
1361 
1362 /*
1363  * hermon_mcg_qplist_remove()
1364  *    Context: Can be called from interrupt or base context.
1365  */
1366 static int
1367 hermon_mcg_qplist_remove(hermon_mcghdl_t mcg,
1368     hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp)
1369 {
1370 	uint_t		i, qplist_indx;
1371 
1372 	/*
1373 	 * Search the MCG QP list for a matching QPN.  When
1374 	 * it's found, we swap the last entry with the current
1375 	 * one, set the last entry to zero, decrement the last
1376 	 * entry, and return.  If it's not found, then it's
1377 	 * and error.
1378 	 */
1379 	qplist_indx = mcg->mcg_num_qps;
1380 	for (i = 0; i < qplist_indx; i++) {
1381 		if (mcg_qplist[i].qpn == qp->qp_qpnum) {
1382 			mcg_qplist[i] = mcg_qplist[qplist_indx - 1];
1383 			mcg_qplist[qplist_indx - 1].qpn = 0;
1384 
1385 			return (DDI_SUCCESS);
1386 		}
1387 	}
1388 
1389 	return (IBT_QP_HDL_INVALID);
1390 }
1391 
1392 
1393 /*
1394  * hermon_mcg_walk_mgid_hash()
1395  *    Context: Can be called from interrupt or base context.
1396  */
1397 static uint_t
1398 hermon_mcg_walk_mgid_hash(hermon_state_t *state, uint64_t start_indx,
1399     ib_gid_t mgid, uint_t *p_indx)
1400 {
1401 	hermon_mcghdl_t	curr_mcghdl;
1402 	uint_t		curr_indx, prev_indx;
1403 
1404 	ASSERT(MUTEX_HELD(&state->hs_mcglock));
1405 
1406 	/* Start at the head of the hash chain */
1407 	curr_indx   = (uint_t)start_indx;
1408 	prev_indx   = curr_indx;
1409 	curr_mcghdl = &state->hs_mcghdl[curr_indx];
1410 
1411 	/* If the first entry in the chain has MGID == 0, then stop */
1412 	if ((curr_mcghdl->mcg_mgid_h == 0) &&
1413 	    (curr_mcghdl->mcg_mgid_l == 0)) {
1414 		goto end_mgid_hash_walk;
1415 	}
1416 
1417 	/* If the first entry in the chain matches the MGID, then stop */
1418 	if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1419 	    (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1420 		goto end_mgid_hash_walk;
1421 	}
1422 
1423 	/* Otherwise, walk the hash chain looking for a match */
1424 	while (curr_mcghdl->mcg_next_indx != 0) {
1425 		prev_indx = curr_indx;
1426 		curr_indx = curr_mcghdl->mcg_next_indx;
1427 		curr_mcghdl = &state->hs_mcghdl[curr_indx];
1428 
1429 		if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1430 		    (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1431 			break;
1432 		}
1433 	}
1434 
1435 end_mgid_hash_walk:
1436 	/*
1437 	 * If necessary, return the index of the previous entry too.  This
1438 	 * is primarily used for detaching a QP from a multicast group.  It
1439 	 * may be necessary, in that case, to delete an MCG entry from the
1440 	 * hash chain and having the index of the previous entry is helpful.
1441 	 */
1442 	if (p_indx != NULL) {
1443 		*p_indx = prev_indx;
1444 	}
1445 	return (curr_indx);
1446 }
1447 
1448 
1449 /*
1450  * hermon_mcg_setup_new_hdr()
1451  *    Context: Can be called from interrupt or base context.
1452  */
1453 static void
1454 hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, hermon_hw_mcg_t *mcg_hdr,
1455     ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc)
1456 {
1457 	/*
1458 	 * Fill in the fields of the "shadow" entry used by software
1459 	 * to track MCG hardware entry
1460 	 */
1461 	mcg->mcg_mgid_h	   = mgid.gid_prefix;
1462 	mcg->mcg_mgid_l	   = mgid.gid_guid;
1463 	mcg->mcg_rsrcp	   = mcg_rsrc;
1464 	mcg->mcg_next_indx = 0;
1465 	mcg->mcg_num_qps   = 0;
1466 
1467 	/*
1468 	 * Fill the header fields of the MCG entry (in the temporary copy)
1469 	 */
1470 	mcg_hdr->mgid_h		= mgid.gid_prefix;
1471 	mcg_hdr->mgid_l		= mgid.gid_guid;
1472 	mcg_hdr->next_gid_indx	= 0;
1473 }
1474 
1475 
1476 /*
1477  * hermon_mcg_hash_list_remove()
1478  *    Context: Can be called only from user or kernel context.
1479  */
1480 static int
1481 hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx,
1482     uint_t prev_indx, hermon_hw_mcg_t *mcg_entry)
1483 {
1484 	hermon_mcghdl_t		curr_mcg, prev_mcg, next_mcg;
1485 	uint_t			next_indx;
1486 	int			status;
1487 
1488 	/* Get the pointer to "shadow" list for current entry */
1489 	curr_mcg = &state->hs_mcghdl[curr_indx];
1490 
1491 	/*
1492 	 * If this is the first entry on a hash chain, then attempt to replace
1493 	 * the entry with the next entry on the chain.  If there are no
1494 	 * subsequent entries on the chain, then this is the only entry and
1495 	 * should be invalidated.
1496 	 */
1497 	if (curr_indx == prev_indx) {
1498 
1499 		/*
1500 		 * If this is the only entry on the chain, then invalidate it.
1501 		 * Note:  Invalidating an MCG entry means writing all zeros
1502 		 * to the entry.  This is only necessary for those MCG
1503 		 * entries that are the "head" entries of the individual hash
1504 		 * chains.  Regardless of whether this operation returns
1505 		 * success or failure, return that result to the caller.
1506 		 */
1507 		next_indx = curr_mcg->mcg_next_indx;
1508 		if (next_indx == 0) {
1509 			status = hermon_mcg_entry_invalidate(state, mcg_entry,
1510 			    curr_indx);
1511 			bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1512 			return (status);
1513 		}
1514 
1515 		/*
1516 		 * Otherwise, this is just the first entry on the chain, so
1517 		 * grab the next one
1518 		 */
1519 		next_mcg = &state->hs_mcghdl[next_indx];
1520 
1521 		/*
1522 		 * Read the next MCG entry into the temporary MCG.  Note:
1523 		 * In general, this operation shouldn't fail.  If it does,
1524 		 * then it is an indication that something (probably in HW,
1525 		 * but maybe in SW) has gone seriously wrong.
1526 		 */
1527 		status = hermon_read_mgm_cmd_post(state, mcg_entry, next_indx,
1528 		    HERMON_CMD_NOSLEEP_SPIN);
1529 		if (status != HERMON_CMD_SUCCESS) {
1530 			HERMON_WARNING(state, "failed to read MCG entry");
1531 			cmn_err(CE_CONT, "Hermon: READ_MGM command failed: "
1532 			    "%08x\n", status);
1533 			if (status == HERMON_CMD_INVALID_STATUS) {
1534 				hermon_fm_ereport(state, HCA_SYS_ERR,
1535 				    HCA_ERR_SRV_LOST);
1536 			}
1537 			return (ibc_get_ci_failure(0));
1538 		}
1539 
1540 		/*
1541 		 * Copy/Write the temporary MCG back to the hardware MCG list
1542 		 * using the current index.  This essentially removes the
1543 		 * current MCG entry from the list by writing over it with
1544 		 * the next one.  If this is successful, then we can do the
1545 		 * same operation for the "shadow" list.  And we can also
1546 		 * free up the Hermon MCG entry resource that was associated
1547 		 * with the (old) next entry.  Note:  In general, this
1548 		 * operation shouldn't fail.  If it does, then it is an
1549 		 * indication that something (probably in HW, but maybe in SW)
1550 		 * has gone seriously wrong.
1551 		 */
1552 		status = hermon_write_mgm_cmd_post(state, mcg_entry, curr_indx,
1553 		    HERMON_CMD_NOSLEEP_SPIN);
1554 		if (status != HERMON_CMD_SUCCESS) {
1555 			HERMON_WARNING(state, "failed to write MCG entry");
1556 			cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
1557 			    "%08x\n", status);
1558 			if (status == HERMON_CMD_INVALID_STATUS) {
1559 				hermon_fm_ereport(state, HCA_SYS_ERR,
1560 				    HCA_ERR_SRV_LOST);
1561 			}
1562 			return (ibc_get_ci_failure(0));
1563 		}
1564 
1565 		/*
1566 		 * Copy all the software tracking information from the next
1567 		 * entry on the "shadow" MCG list into the current entry on
1568 		 * the list.  Then invalidate (zero out) the other "shadow"
1569 		 * list entry.
1570 		 */
1571 		bcopy(next_mcg, curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1572 		bzero(next_mcg, sizeof (struct hermon_sw_mcg_list_s));
1573 
1574 		/*
1575 		 * Free up the Hermon MCG entry resource used by the "next"
1576 		 * MCG entry.  That resource is no longer needed by any
1577 		 * MCG entry which is first on a hash chain (like the "next"
1578 		 * entry has just become).
1579 		 */
1580 		hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1581 
1582 		return (DDI_SUCCESS);
1583 	}
1584 
1585 	/*
1586 	 * Else if this is the last entry on the hash chain (or a middle
1587 	 * entry, then we update the previous entry's "next_gid_index" field
1588 	 * to make it point instead to the next entry on the chain.  By
1589 	 * skipping over the removed entry in this way, we can then free up
1590 	 * any resources associated with the current entry.  Note:  We don't
1591 	 * need to invalidate the "skipped over" hardware entry because it
1592 	 * will no be longer connected to any hash chains, and if/when it is
1593 	 * finally re-used, it will be written with entirely new values.
1594 	 */
1595 
1596 	/*
1597 	 * Read the next MCG entry into the temporary MCG.  Note:  In general,
1598 	 * this operation shouldn't fail.  If it does, then it is an
1599 	 * indication that something (probably in HW, but maybe in SW) has
1600 	 * gone seriously wrong.
1601 	 */
1602 	status = hermon_read_mgm_cmd_post(state, mcg_entry, prev_indx,
1603 	    HERMON_CMD_NOSLEEP_SPIN);
1604 	if (status != HERMON_CMD_SUCCESS) {
1605 		HERMON_WARNING(state, "failed to read MCG entry");
1606 		cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1607 		    status);
1608 		if (status == HERMON_CMD_INVALID_STATUS) {
1609 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1610 		}
1611 		return (ibc_get_ci_failure(0));
1612 	}
1613 
1614 	/*
1615 	 * Finally, we update the "next_gid_indx" field in the temporary MCG
1616 	 * and attempt to write the entry back into the Hermon MCG table.  If
1617 	 * this succeeds, then we update the "shadow" list to reflect the
1618 	 * change, free up the Hermon MCG entry resource that was associated
1619 	 * with the current entry, and return success.  Note:  In general,
1620 	 * this operation shouldn't fail.  If it does, then it is an indication
1621 	 * that something (probably in HW, but maybe in SW) has gone seriously
1622 	 * wrong.
1623 	 */
1624 	mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx;
1625 	status = hermon_write_mgm_cmd_post(state, mcg_entry, prev_indx,
1626 	    HERMON_CMD_NOSLEEP_SPIN);
1627 	if (status != HERMON_CMD_SUCCESS) {
1628 		HERMON_WARNING(state, "failed to write MCG entry");
1629 		cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1630 		    status);
1631 		if (status == HERMON_CMD_INVALID_STATUS) {
1632 			hermon_fm_ereport(state, HCA_SYS_ERR,
1633 			    HCA_ERR_SRV_LOST);
1634 		}
1635 		return (ibc_get_ci_failure(0));
1636 	}
1637 
1638 	/*
1639 	 * Get the pointer to the "shadow" MCG list entry for the previous
1640 	 * MCG.  Update its "mcg_next_indx" to point to the next entry
1641 	 * the one after the current entry. Note:  This next index may be
1642 	 * zero, indicating the end of the list.
1643 	 */
1644 	prev_mcg = &state->hs_mcghdl[prev_indx];
1645 	prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx;
1646 
1647 	/*
1648 	 * Free up the Hermon MCG entry resource used by the current entry.
1649 	 * This resource is no longer needed because the chain now skips over
1650 	 * the current entry.  Then invalidate (zero out) the current "shadow"
1651 	 * list entry.
1652 	 */
1653 	hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1654 	bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1655 
1656 	return (DDI_SUCCESS);
1657 }
1658 
1659 
1660 /*
1661  * hermon_mcg_entry_invalidate()
1662  *    Context: Can be called only from user or kernel context.
1663  */
1664 static int
1665 hermon_mcg_entry_invalidate(hermon_state_t *state, hermon_hw_mcg_t *mcg_entry,
1666     uint_t indx)
1667 {
1668 	int		status;
1669 
1670 	/*
1671 	 * Invalidate the hardware MCG entry by zeroing out this temporary
1672 	 * MCG and writing it the the hardware.  Note: In general, this
1673 	 * operation shouldn't fail.  If it does, then it is an indication
1674 	 * that something (probably in HW, but maybe in SW) has gone seriously
1675 	 * wrong.
1676 	 */
1677 	bzero(mcg_entry, HERMON_MCGMEM_SZ(state));
1678 	status = hermon_write_mgm_cmd_post(state, mcg_entry, indx,
1679 	    HERMON_CMD_NOSLEEP_SPIN);
1680 	if (status != HERMON_CMD_SUCCESS) {
1681 		HERMON_WARNING(state, "failed to write MCG entry");
1682 		cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1683 		    status);
1684 		if (status == HERMON_CMD_INVALID_STATUS) {
1685 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1686 		}
1687 		return (ibc_get_ci_failure(0));
1688 	}
1689 
1690 	return (DDI_SUCCESS);
1691 }
1692 
1693 
1694 /*
1695  * hermon_mgid_is_valid()
1696  *    Context: Can be called from interrupt or base context.
1697  */
1698 static int
1699 hermon_mgid_is_valid(ib_gid_t gid)
1700 {
1701 	uint_t		topbits, flags, scope;
1702 
1703 	/*
1704 	 * According to IBA 1.1 specification (section 4.1.1) a valid
1705 	 * "multicast GID" must have its top eight bits set to all ones
1706 	 */
1707 	topbits = (gid.gid_prefix >> HERMON_MCG_TOPBITS_SHIFT) &
1708 	    HERMON_MCG_TOPBITS_MASK;
1709 	if (topbits != HERMON_MCG_TOPBITS) {
1710 		return (0);
1711 	}
1712 
1713 	/*
1714 	 * The next 4 bits are the "flag" bits.  These are valid only
1715 	 * if they are "0" (which correspond to permanently assigned/
1716 	 * "well-known" multicast GIDs) or "1" (for so-called "transient"
1717 	 * multicast GIDs).  All other values are reserved.
1718 	 */
1719 	flags = (gid.gid_prefix >> HERMON_MCG_FLAGS_SHIFT) &
1720 	    HERMON_MCG_FLAGS_MASK;
1721 	if (!((flags == HERMON_MCG_FLAGS_PERM) ||
1722 	    (flags == HERMON_MCG_FLAGS_NONPERM))) {
1723 		return (0);
1724 	}
1725 
1726 	/*
1727 	 * The next 4 bits are the "scope" bits.  These are valid only
1728 	 * if they are "2" (Link-local), "5" (Site-local), "8"
1729 	 * (Organization-local) or "E" (Global).  All other values
1730 	 * are reserved (or currently unassigned).
1731 	 */
1732 	scope = (gid.gid_prefix >> HERMON_MCG_SCOPE_SHIFT) &
1733 	    HERMON_MCG_SCOPE_MASK;
1734 	if (!((scope == HERMON_MCG_SCOPE_LINKLOC) ||
1735 	    (scope == HERMON_MCG_SCOPE_SITELOC)	 ||
1736 	    (scope == HERMON_MCG_SCOPE_ORGLOC)	 ||
1737 	    (scope == HERMON_MCG_SCOPE_GLOBAL))) {
1738 		return (0);
1739 	}
1740 
1741 	/*
1742 	 * If it passes all of the above checks, then we will consider it
1743 	 * a valid multicast GID.
1744 	 */
1745 	return (1);
1746 }
1747 
1748 
1749 /*
1750  * hermon_mlid_is_valid()
1751  *    Context: Can be called from interrupt or base context.
1752  */
1753 static int
1754 hermon_mlid_is_valid(ib_lid_t lid)
1755 {
1756 	/*
1757 	 * According to IBA 1.1 specification (section 4.1.1) a valid
1758 	 * "multicast DLID" must be between 0xC000 and 0xFFFE.
1759 	 */
1760 	if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) {
1761 		return (0);
1762 	}
1763 
1764 	return (1);
1765 }
1766 
1767 
1768 /*
1769  * hermon_pd_alloc()
1770  *    Context: Can be called only from user or kernel context.
1771  */
1772 int
1773 hermon_pd_alloc(hermon_state_t *state, hermon_pdhdl_t *pdhdl, uint_t sleepflag)
1774 {
1775 	hermon_rsrc_t	*rsrc;
1776 	hermon_pdhdl_t	pd;
1777 	int		status;
1778 
1779 	/*
1780 	 * Allocate the software structure for tracking the protection domain
1781 	 * (i.e. the Hermon Protection Domain handle).  By default each PD
1782 	 * structure will have a unique PD number assigned to it.  All that
1783 	 * is necessary is for software to initialize the PD reference count
1784 	 * (to zero) and return success.
1785 	 */
1786 	status = hermon_rsrc_alloc(state, HERMON_PDHDL, 1, sleepflag, &rsrc);
1787 	if (status != DDI_SUCCESS) {
1788 		return (IBT_INSUFF_RESOURCE);
1789 	}
1790 	pd = (hermon_pdhdl_t)rsrc->hr_addr;
1791 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd))
1792 
1793 	pd->pd_refcnt = 0;
1794 	*pdhdl = pd;
1795 
1796 	return (DDI_SUCCESS);
1797 }
1798 
1799 
1800 /*
1801  * hermon_pd_free()
1802  *    Context: Can be called only from user or kernel context.
1803  */
1804 int
1805 hermon_pd_free(hermon_state_t *state, hermon_pdhdl_t *pdhdl)
1806 {
1807 	hermon_rsrc_t	*rsrc;
1808 	hermon_pdhdl_t	pd;
1809 
1810 	/*
1811 	 * Pull all the necessary information from the Hermon Protection Domain
1812 	 * handle.  This is necessary here because the resource for the
1813 	 * PD is going to be freed up as part of this operation.
1814 	 */
1815 	pd   = *pdhdl;
1816 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd))
1817 	rsrc = pd->pd_rsrcp;
1818 
1819 	/*
1820 	 * Check the PD reference count.  If the reference count is non-zero,
1821 	 * then it means that this protection domain is still referenced by
1822 	 * some memory region, queue pair, address handle, or other IB object
1823 	 * If it is non-zero, then return an error.  Otherwise, free the
1824 	 * Hermon resource and return success.
1825 	 */
1826 	if (pd->pd_refcnt != 0) {
1827 		return (IBT_PD_IN_USE);
1828 	}
1829 
1830 	/* Free the Hermon Protection Domain handle */
1831 	hermon_rsrc_free(state, &rsrc);
1832 
1833 	/* Set the pdhdl pointer to NULL and return success */
1834 	*pdhdl = (hermon_pdhdl_t)NULL;
1835 
1836 	return (DDI_SUCCESS);
1837 }
1838 
1839 
1840 /*
1841  * hermon_pd_refcnt_inc()
1842  *    Context: Can be called from interrupt or base context.
1843  */
1844 void
1845 hermon_pd_refcnt_inc(hermon_pdhdl_t pd)
1846 {
1847 	/* Increment the protection domain's reference count */
1848 	atomic_inc_32(&pd->pd_refcnt);
1849 }
1850 
1851 
1852 /*
1853  * hermon_pd_refcnt_dec()
1854  *    Context: Can be called from interrupt or base context.
1855  */
1856 void
1857 hermon_pd_refcnt_dec(hermon_pdhdl_t pd)
1858 {
1859 	/* Decrement the protection domain's reference count */
1860 	atomic_dec_32(&pd->pd_refcnt);
1861 }
1862 
1863 
1864 /*
1865  * hermon_port_query()
1866  *    Context: Can be called only from user or kernel context.
1867  */
1868 int
1869 hermon_port_query(hermon_state_t *state, uint_t port, ibt_hca_portinfo_t *pi)
1870 {
1871 	sm_portinfo_t		portinfo;
1872 	sm_guidinfo_t		guidinfo;
1873 	sm_pkey_table_t		pkeytable;
1874 	ib_gid_t		*sgid;
1875 	uint_t			sgid_max, pkey_max, tbl_size;
1876 	int			i, j, indx, status;
1877 	ib_pkey_t		*pkeyp;
1878 	ib_guid_t		*guidp;
1879 
1880 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi))
1881 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state))
1882 
1883 	/* Validate that specified port number is legal */
1884 	if (!hermon_portnum_is_valid(state, port)) {
1885 		return (IBT_HCA_PORT_INVALID);
1886 	}
1887 	pkeyp = state->hs_pkey[port - 1];
1888 	guidp = state->hs_guid[port - 1];
1889 
1890 	/*
1891 	 * We use the Hermon MAD_IFC command to post a GetPortInfo MAD
1892 	 * to the firmware (for the specified port number).  This returns
1893 	 * a full PortInfo MAD (in "portinfo") which we subsequently
1894 	 * parse to fill in the "ibt_hca_portinfo_t" structure returned
1895 	 * to the IBTF.
1896 	 */
1897 	status = hermon_getportinfo_cmd_post(state, port,
1898 	    HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
1899 	if (status != HERMON_CMD_SUCCESS) {
1900 		cmn_err(CE_CONT, "Hermon: GetPortInfo (port %02d) command "
1901 		    "failed: %08x\n", port, status);
1902 		if (status == HERMON_CMD_INVALID_STATUS) {
1903 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1904 		}
1905 		return (ibc_get_ci_failure(0));
1906 	}
1907 
1908 	/*
1909 	 * Parse the PortInfo MAD and fill in the IBTF structure
1910 	 */
1911 	pi->p_base_lid		= portinfo.LID;
1912 	pi->p_qkey_violations	= portinfo.Q_KeyViolations;
1913 	pi->p_pkey_violations	= portinfo.P_KeyViolations;
1914 	pi->p_sm_sl		= portinfo.MasterSMSL;
1915 	pi->p_sm_lid		= portinfo.MasterSMLID;
1916 	pi->p_linkstate		= portinfo.PortState;
1917 	pi->p_port_num		= portinfo.LocalPortNum;
1918 	pi->p_phys_state	= portinfo.PortPhysicalState;
1919 	pi->p_width_supported	= portinfo.LinkWidthSupported;
1920 	pi->p_width_enabled	= portinfo.LinkWidthEnabled;
1921 	pi->p_width_active	= portinfo.LinkWidthActive;
1922 	pi->p_speed_supported	= portinfo.LinkSpeedSupported;
1923 	pi->p_speed_enabled	= portinfo.LinkSpeedEnabled;
1924 	pi->p_speed_active	= portinfo.LinkSpeedActive;
1925 	pi->p_mtu		= portinfo.MTUCap;
1926 	pi->p_lmc		= portinfo.LMC;
1927 	pi->p_max_vl		= portinfo.VLCap;
1928 	pi->p_subnet_timeout	= portinfo.SubnetTimeOut;
1929 	pi->p_msg_sz		= ((uint32_t)1 << HERMON_QP_LOG_MAX_MSGSZ);
1930 	tbl_size = state->hs_cfg_profile->cp_log_max_gidtbl;
1931 	pi->p_sgid_tbl_sz	= (1 << tbl_size);
1932 	tbl_size = state->hs_cfg_profile->cp_log_max_pkeytbl;
1933 	pi->p_pkey_tbl_sz	= (1 << tbl_size);
1934 	state->hs_sn_prefix[port - 1] = portinfo.GidPrefix;
1935 
1936 	/*
1937 	 * Convert InfiniBand-defined port capability flags to the format
1938 	 * specified by the IBTF
1939 	 */
1940 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM)
1941 		pi->p_capabilities |= IBT_PORT_CAP_SM;
1942 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED)
1943 		pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED;
1944 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD)
1945 		pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL;
1946 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD)
1947 		pi->p_capabilities |= IBT_PORT_CAP_DM;
1948 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD)
1949 		pi->p_capabilities |= IBT_PORT_CAP_VENDOR;
1950 	if (portinfo.CapabilityMask & SM_CAP_MASK_IS_CLNT_REREG_SUPPD)
1951 		pi->p_capabilities |= IBT_PORT_CAP_CLNT_REREG;
1952 
1953 	/*
1954 	 * Fill in the SGID table.  Since the only access to the Hermon
1955 	 * GID tables is through the firmware's MAD_IFC interface, we
1956 	 * post as many GetGUIDInfo MADs as necessary to read in the entire
1957 	 * contents of the SGID table (for the specified port).  Note:  The
1958 	 * GetGUIDInfo command only gets eight GUIDs per operation.  These
1959 	 * GUIDs are then appended to the GID prefix for the port (from the
1960 	 * GetPortInfo above) to form the entire SGID table.
1961 	 */
1962 	for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) {
1963 		status = hermon_getguidinfo_cmd_post(state, port, i >> 3,
1964 		    HERMON_SLEEPFLAG_FOR_CONTEXT(), &guidinfo);
1965 		if (status != HERMON_CMD_SUCCESS) {
1966 			cmn_err(CE_CONT, "Hermon: GetGUIDInfo (port %02d) "
1967 			    "command failed: %08x\n", port, status);
1968 			if (status == HERMON_CMD_INVALID_STATUS) {
1969 				hermon_fm_ereport(state, HCA_SYS_ERR,
1970 				    HCA_ERR_SRV_LOST);
1971 			}
1972 			return (ibc_get_ci_failure(0));
1973 		}
1974 
1975 		/* Figure out how many of the entries are valid */
1976 		sgid_max = min((pi->p_sgid_tbl_sz - i), 8);
1977 		for (j = 0; j < sgid_max; j++) {
1978 			indx = (i + j);
1979 			sgid = &pi->p_sgid_tbl[indx];
1980 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid))
1981 			sgid->gid_prefix = portinfo.GidPrefix;
1982 			guidp[indx] = sgid->gid_guid =
1983 			    guidinfo.GUIDBlocks[j];
1984 		}
1985 	}
1986 
1987 	/*
1988 	 * Fill in the PKey table.  Just as for the GID tables above, the
1989 	 * only access to the Hermon PKey tables is through the firmware's
1990 	 * MAD_IFC interface.  We post as many GetPKeyTable MADs as necessary
1991 	 * to read in the entire contents of the PKey table (for the specified
1992 	 * port).  Note:  The GetPKeyTable command only gets 32 PKeys per
1993 	 * operation.
1994 	 */
1995 	for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) {
1996 		status = hermon_getpkeytable_cmd_post(state, port, i,
1997 		    HERMON_SLEEPFLAG_FOR_CONTEXT(), &pkeytable);
1998 		if (status != HERMON_CMD_SUCCESS) {
1999 			cmn_err(CE_CONT, "Hermon: GetPKeyTable (port %02d) "
2000 			    "command failed: %08x\n", port, status);
2001 			if (status == HERMON_CMD_INVALID_STATUS) {
2002 				hermon_fm_ereport(state, HCA_SYS_ERR,
2003 				    HCA_ERR_SRV_LOST);
2004 			}
2005 			return (ibc_get_ci_failure(0));
2006 		}
2007 
2008 		/* Figure out how many of the entries are valid */
2009 		pkey_max = min((pi->p_pkey_tbl_sz - i), 32);
2010 		for (j = 0; j < pkey_max; j++) {
2011 			indx = (i + j);
2012 			pkeyp[indx] = pi->p_pkey_tbl[indx] =
2013 			    pkeytable.P_KeyTableBlocks[j];
2014 		}
2015 	}
2016 
2017 	return (DDI_SUCCESS);
2018 }
2019 
2020 
2021 /*
2022  * hermon_port_modify()
2023  *    Context: Can be called only from user or kernel context.
2024  */
2025 /* ARGSUSED */
2026 int
2027 hermon_port_modify(hermon_state_t *state, uint8_t port,
2028     ibt_port_modify_flags_t flags, uint8_t init_type)
2029 {
2030 	sm_portinfo_t		portinfo;
2031 	uint32_t		capmask;
2032 	int			status;
2033 	hermon_hw_set_port_t	set_port;
2034 
2035 	/*
2036 	 * Return an error if either of the unsupported flags are set
2037 	 */
2038 	if ((flags & IBT_PORT_SHUTDOWN) ||
2039 	    (flags & IBT_PORT_SET_INIT_TYPE)) {
2040 		return (IBT_NOT_SUPPORTED);
2041 	}
2042 
2043 	bzero(&set_port, sizeof (set_port));
2044 
2045 	/*
2046 	 * Determine whether we are trying to reset the QKey counter
2047 	 */
2048 	if (flags & IBT_PORT_RESET_QKEY)
2049 		set_port.rqk = 1;
2050 
2051 	/* Validate that specified port number is legal */
2052 	if (!hermon_portnum_is_valid(state, port)) {
2053 		return (IBT_HCA_PORT_INVALID);
2054 	}
2055 
2056 	/*
2057 	 * Use the Hermon MAD_IFC command to post a GetPortInfo MAD to the
2058 	 * firmware (for the specified port number).  This returns a full
2059 	 * PortInfo MAD (in "portinfo") from which we pull the current
2060 	 * capability mask.  We then modify the capability mask as directed
2061 	 * by the "pmod_flags" field, and write the updated capability mask
2062 	 * using the Hermon SET_IB command (below).
2063 	 */
2064 	status = hermon_getportinfo_cmd_post(state, port,
2065 	    HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
2066 	if (status != HERMON_CMD_SUCCESS) {
2067 		if (status == HERMON_CMD_INVALID_STATUS) {
2068 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2069 		}
2070 		return (ibc_get_ci_failure(0));
2071 	}
2072 
2073 	/*
2074 	 * Convert InfiniBand-defined port capability flags to the format
2075 	 * specified by the IBTF.  Specifically, we modify the capability
2076 	 * mask based on the specified values.
2077 	 */
2078 	capmask = portinfo.CapabilityMask;
2079 
2080 	if (flags & IBT_PORT_RESET_SM)
2081 		capmask &= ~SM_CAP_MASK_IS_SM;
2082 	else if (flags & IBT_PORT_SET_SM)
2083 		capmask |= SM_CAP_MASK_IS_SM;
2084 
2085 	if (flags & IBT_PORT_RESET_SNMP)
2086 		capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD;
2087 	else if (flags & IBT_PORT_SET_SNMP)
2088 		capmask |= SM_CAP_MASK_IS_SNMP_SUPPD;
2089 
2090 	if (flags & IBT_PORT_RESET_DEVMGT)
2091 		capmask &= ~SM_CAP_MASK_IS_DM_SUPPD;
2092 	else if (flags & IBT_PORT_SET_DEVMGT)
2093 		capmask |= SM_CAP_MASK_IS_DM_SUPPD;
2094 
2095 	if (flags & IBT_PORT_RESET_VENDOR)
2096 		capmask &= ~SM_CAP_MASK_IS_VM_SUPPD;
2097 	else if (flags & IBT_PORT_SET_VENDOR)
2098 		capmask |= SM_CAP_MASK_IS_VM_SUPPD;
2099 
2100 	set_port.cap_mask = capmask;
2101 
2102 	/*
2103 	 * Use the Hermon SET_PORT command to update the capability mask and
2104 	 * (possibly) reset the QKey violation counter for the specified port.
2105 	 * Note: In general, this operation shouldn't fail.  If it does, then
2106 	 * it is an indication that something (probably in HW, but maybe in
2107 	 * SW) has gone seriously wrong.
2108 	 */
2109 	status = hermon_set_port_cmd_post(state, &set_port, port,
2110 	    HERMON_SLEEPFLAG_FOR_CONTEXT());
2111 	if (status != HERMON_CMD_SUCCESS) {
2112 		HERMON_WARNING(state, "failed to modify port capabilities");
2113 		cmn_err(CE_CONT, "Hermon: SET_IB (port %02d) command failed: "
2114 		    "%08x\n", port, status);
2115 		if (status == HERMON_CMD_INVALID_STATUS) {
2116 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2117 		}
2118 		return (ibc_get_ci_failure(0));
2119 	}
2120 
2121 	return (DDI_SUCCESS);
2122 }
2123 
2124 
2125 /*
2126  * hermon_set_addr_path()
2127  *    Context: Can be called from interrupt or base context.
2128  *
2129  * Note: This routine is used for two purposes.  It is used to fill in the
2130  * Hermon UDAV fields, and it is used to fill in the address path information
2131  * for QPs.  Because the two Hermon structures are similar, common fields can
2132  * be filled in here.  Because they are different, however, we pass
2133  * an additional flag to indicate which type is being filled and do each one
2134  * uniquely
2135  */
2136 
2137 int hermon_srate_override = -1;	/* allows ease of testing */
2138 
2139 int
2140 hermon_set_addr_path(hermon_state_t *state, ibt_adds_vect_t *av,
2141     hermon_hw_addr_path_t *path, uint_t type)
2142 {
2143 	uint_t		gidtbl_sz;
2144 	hermon_hw_udav_t *udav;
2145 
2146 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av))
2147 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path))
2148 
2149 	udav = (hermon_hw_udav_t *)(void *)path;
2150 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav))
2151 	path->mlid	= av->av_src_path;
2152 	path->rlid	= av->av_dlid;
2153 
2154 	switch (av->av_srate) {
2155 	case IBT_SRATE_2:	/* 1xSDR-2.5Gb/s injection rate */
2156 		path->max_stat_rate = 7; break;
2157 	case IBT_SRATE_10:	/* 4xSDR-10.0Gb/s injection rate */
2158 		path->max_stat_rate = 8; break;
2159 	case IBT_SRATE_30:	/* 12xSDR-30Gb/s injection rate */
2160 		path->max_stat_rate = 9; break;
2161 	case IBT_SRATE_5:	/* 1xDDR-5Gb/s injection rate */
2162 		path->max_stat_rate = 10; break;
2163 	case IBT_SRATE_20:	/* 4xDDR-20Gb/s injection rate */
2164 		path->max_stat_rate = 11; break;
2165 	case IBT_SRATE_40:	/* 4xQDR-40Gb/s injection rate */
2166 		path->max_stat_rate = 12; break;
2167 	case IBT_SRATE_60:	/* 12xDDR-60Gb/s injection rate */
2168 		path->max_stat_rate = 13; break;
2169 	case IBT_SRATE_80:	/* 8xQDR-80Gb/s injection rate */
2170 		path->max_stat_rate = 14; break;
2171 	case IBT_SRATE_120:	/* 12xQDR-120Gb/s injection rate */
2172 		path->max_stat_rate = 15; break;
2173 	case IBT_SRATE_NOT_SPECIFIED:	/* Max */
2174 		path->max_stat_rate = 0; break;
2175 	default:
2176 		return (IBT_STATIC_RATE_INVALID);
2177 	}
2178 	if (hermon_srate_override != -1) /* for evaluating HCA firmware */
2179 		path->max_stat_rate = hermon_srate_override;
2180 
2181 	/* If "grh" flag is set, then check for valid SGID index too */
2182 	gidtbl_sz = (1 << state->hs_queryport.log_max_gid);
2183 	if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) {
2184 		return (IBT_SGID_INVALID);
2185 	}
2186 
2187 	/*
2188 	 * Fill in all "global" values regardless of the value in the GRH
2189 	 * flag.  Because "grh" is not set unless "av_send_grh" is set, the
2190 	 * hardware will ignore the other "global" values as necessary.  Note:
2191 	 * SW does this here to enable later query operations to return
2192 	 * exactly the same params that were passed when the addr path was
2193 	 * last written.
2194 	 */
2195 	path->grh = av->av_send_grh;
2196 	if (type == HERMON_ADDRPATH_QP) {
2197 		path->mgid_index = av->av_sgid_ix;
2198 	} else {
2199 		/*
2200 		 * For Hermon UDAV, the "mgid_index" field is the index into
2201 		 * a combined table (not a per-port table), but having sections
2202 		 * for each port. So some extra calculations are necessary.
2203 		 */
2204 
2205 		path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) +
2206 		    av->av_sgid_ix;
2207 
2208 		udav->portnum = av->av_port_num;
2209 	}
2210 
2211 	/*
2212 	 * According to Hermon PRM, the (31:0) part of rgid_l must be set to
2213 	 * "0x2" if the 'grh' or 'g' bit is cleared.  It also says that we
2214 	 * only need to do it for UDAV's.  So we enforce that here.
2215 	 *
2216 	 * NOTE: The entire 64 bits worth of GUID info is actually being
2217 	 * preserved (for UDAVs) by the callers of this function
2218 	 * (hermon_ah_alloc() and hermon_ah_modify()) and as long as the
2219 	 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are
2220 	 * "don't care".
2221 	 */
2222 	if ((path->grh) || (type == HERMON_ADDRPATH_QP)) {
2223 		path->flow_label = av->av_flow;
2224 		path->tclass	 = av->av_tclass;
2225 		path->hop_limit	 = av->av_hop;
2226 		bcopy(&(av->av_dgid.gid_prefix), &(path->rgid_h),
2227 		    sizeof (uint64_t));
2228 		bcopy(&(av->av_dgid.gid_guid), &(path->rgid_l),
2229 		    sizeof (uint64_t));
2230 	} else {
2231 		path->rgid_l	 = 0x2;
2232 		path->flow_label = 0;
2233 		path->tclass	 = 0;
2234 		path->hop_limit	 = 0;
2235 		path->rgid_h	 = 0;
2236 	}
2237 	/* extract the default service level */
2238 	udav->sl = (HERMON_DEF_SCHED_SELECTION & 0x3C) >> 2;
2239 
2240 	return (DDI_SUCCESS);
2241 }
2242 
2243 
2244 /*
2245  * hermon_get_addr_path()
2246  *    Context: Can be called from interrupt or base context.
2247  *
2248  * Note: Just like hermon_set_addr_path() above, this routine is used for two
2249  * purposes.  It is used to read in the Hermon UDAV fields, and it is used to
2250  * read in the address path information for QPs.  Because the two Hermon
2251  * structures are similar, common fields can be read in here.  But because
2252  * they are slightly different, we pass an additional flag to indicate which
2253  * type is being read.
2254  */
2255 void
2256 hermon_get_addr_path(hermon_state_t *state, hermon_hw_addr_path_t *path,
2257     ibt_adds_vect_t *av, uint_t type)
2258 {
2259 	uint_t		gidtbl_sz;
2260 
2261 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path))
2262 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av))
2263 
2264 	av->av_src_path	= path->mlid;
2265 	av->av_dlid	= path->rlid;
2266 
2267 	/* Set "av_ipd" value from max_stat_rate */
2268 	switch (path->max_stat_rate) {
2269 	case 7:				/* 1xSDR-2.5Gb/s injection rate */
2270 		av->av_srate = IBT_SRATE_2; break;
2271 	case 8:				/* 4xSDR-10.0Gb/s injection rate */
2272 		av->av_srate = IBT_SRATE_10; break;
2273 	case 9:				/* 12xSDR-30Gb/s injection rate */
2274 		av->av_srate = IBT_SRATE_30; break;
2275 	case 10:			/* 1xDDR-5Gb/s injection rate */
2276 		av->av_srate = IBT_SRATE_5; break;
2277 	case 11:			/* 4xDDR-20Gb/s injection rate */
2278 		av->av_srate = IBT_SRATE_20; break;
2279 	case 12:			/* xQDR-40Gb/s injection rate */
2280 		av->av_srate = IBT_SRATE_40; break;
2281 	case 13:			/* 12xDDR-60Gb/s injection rate */
2282 		av->av_srate = IBT_SRATE_60; break;
2283 	case 14:			/* 8xQDR-80Gb/s injection rate */
2284 		av->av_srate = IBT_SRATE_80; break;
2285 	case 15:			/* 12xQDR-120Gb/s injection rate */
2286 		av->av_srate = IBT_SRATE_120; break;
2287 	case 0:				/* max */
2288 		av->av_srate = IBT_SRATE_NOT_SPECIFIED; break;
2289 	default:			/* 1x injection rate */
2290 		av->av_srate = IBT_SRATE_1X;
2291 	}
2292 
2293 	/*
2294 	 * Extract all "global" values regardless of the value in the GRH
2295 	 * flag.  Because "av_send_grh" is set only if "grh" is set, software
2296 	 * knows to ignore the other "global" values as necessary.  Note: SW
2297 	 * does it this way to enable these query operations to return exactly
2298 	 * the same params that were passed when the addr path was last written.
2299 	 */
2300 	av->av_send_grh		= path->grh;
2301 	if (type == HERMON_ADDRPATH_QP) {
2302 		av->av_sgid_ix  = path->mgid_index;
2303 	} else {
2304 		/*
2305 		 * For Hermon UDAV, the "mgid_index" field is the index into
2306 		 * a combined table (not a per-port table).
2307 		 */
2308 		gidtbl_sz = (1 << state->hs_queryport.log_max_gid);
2309 		av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) *
2310 		    gidtbl_sz);
2311 
2312 		av->av_port_num = ((hermon_hw_udav_t *)(void *)path)->portnum;
2313 	}
2314 	av->av_flow		= path->flow_label;
2315 	av->av_tclass		= path->tclass;
2316 	av->av_hop		= path->hop_limit;
2317 	/* this is for alignment issue w/ the addr path struct in Hermon */
2318 	bcopy(&(path->rgid_h), &(av->av_dgid.gid_prefix), sizeof (uint64_t));
2319 	bcopy(&(path->rgid_l), &(av->av_dgid.gid_guid), sizeof (uint64_t));
2320 }
2321 
2322 
2323 /*
2324  * hermon_portnum_is_valid()
2325  *    Context: Can be called from interrupt or base context.
2326  */
2327 int
2328 hermon_portnum_is_valid(hermon_state_t *state, uint_t portnum)
2329 {
2330 	uint_t	max_port;
2331 
2332 	max_port = state->hs_cfg_profile->cp_num_ports;
2333 	if ((portnum <= max_port) && (portnum != 0)) {
2334 		return (1);
2335 	} else {
2336 		return (0);
2337 	}
2338 }
2339 
2340 
2341 /*
2342  * hermon_pkeyindex_is_valid()
2343  *    Context: Can be called from interrupt or base context.
2344  */
2345 int
2346 hermon_pkeyindex_is_valid(hermon_state_t *state, uint_t pkeyindx)
2347 {
2348 	uint_t	max_pkeyindx;
2349 
2350 	max_pkeyindx = 1 << state->hs_cfg_profile->cp_log_max_pkeytbl;
2351 	if (pkeyindx < max_pkeyindx) {
2352 		return (1);
2353 	} else {
2354 		return (0);
2355 	}
2356 }
2357 
2358 
2359 /*
2360  * hermon_queue_alloc()
2361  *    Context: Can be called from interrupt or base context.
2362  */
2363 int
2364 hermon_queue_alloc(hermon_state_t *state, hermon_qalloc_info_t *qa_info,
2365     uint_t sleepflag)
2366 {
2367 	ddi_dma_attr_t		dma_attr;
2368 	int			(*callback)(caddr_t);
2369 	uint64_t		realsize, alloc_mask;
2370 	uint_t			type;
2371 	int			flag, status;
2372 
2373 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info))
2374 
2375 	/* Set the callback flag appropriately */
2376 	callback = (sleepflag == HERMON_SLEEP) ? DDI_DMA_SLEEP :
2377 	    DDI_DMA_DONTWAIT;
2378 
2379 	/*
2380 	 * Initialize many of the default DMA attributes.  Then set additional
2381 	 * alignment restrictions as necessary for the queue memory.  Also
2382 	 * respect the configured value for IOMMU bypass
2383 	 */
2384 	hermon_dma_attr_init(state, &dma_attr);
2385 	dma_attr.dma_attr_align = qa_info->qa_bind_align;
2386 	type = state->hs_cfg_profile->cp_iommu_bypass;
2387 	if (type == HERMON_BINDMEM_BYPASS) {
2388 		dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
2389 	}
2390 
2391 	/* Allocate a DMA handle */
2392 	status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, callback, NULL,
2393 	    &qa_info->qa_dmahdl);
2394 	if (status != DDI_SUCCESS) {
2395 		return (DDI_FAILURE);
2396 	}
2397 
2398 	/*
2399 	 * Determine the amount of memory to allocate, depending on the values
2400 	 * in "qa_bind_align" and "qa_alloc_align".  The problem we are trying
2401 	 * to solve here is that allocating a DMA handle with IOMMU bypass
2402 	 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments
2403 	 * that are less restrictive than the page size.  Since we may need
2404 	 * stricter alignments on the memory allocated by ddi_dma_mem_alloc()
2405 	 * (e.g. in Hermon QP work queue memory allocation), we use the
2406 	 * following method to calculate how much additional memory to request,
2407 	 * and we enforce our own alignment on the allocated result.
2408 	 */
2409 	alloc_mask = qa_info->qa_alloc_align - 1;
2410 	if (qa_info->qa_bind_align == qa_info->qa_alloc_align) {
2411 		realsize = qa_info->qa_size;
2412 	} else {
2413 		realsize = qa_info->qa_size + alloc_mask;
2414 	}
2415 
2416 	/*
2417 	 * If we are to allocate the queue from system memory, then use
2418 	 * ddi_dma_mem_alloc() to find the space.  Otherwise, this is a
2419 	 * host memory allocation, use ddi_umem_alloc(). In either case,
2420 	 * return a pointer to the memory range allocated (including any
2421 	 * necessary alignment adjustments), the "real" memory pointer,
2422 	 * the "real" size, and a ddi_acc_handle_t to use when reading
2423 	 * from/writing to the memory.
2424 	 */
2425 	if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) {
2426 		/* Allocate system memory for the queue */
2427 		status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize,
2428 		    &state->hs_reg_accattr, DDI_DMA_CONSISTENT, callback, NULL,
2429 		    (caddr_t *)&qa_info->qa_buf_real,
2430 		    (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl);
2431 		if (status != DDI_SUCCESS) {
2432 			ddi_dma_free_handle(&qa_info->qa_dmahdl);
2433 			return (DDI_FAILURE);
2434 		}
2435 
2436 		/*
2437 		 * Save temporary copy of the real pointer.  (This may be
2438 		 * modified in the last step below).
2439 		 */
2440 		qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2441 
2442 		bzero(qa_info->qa_buf_real, qa_info->qa_buf_realsz);
2443 
2444 	} else { /* HERMON_QUEUE_LOCATION_USERLAND */
2445 
2446 		/* Allocate userland mappable memory for the queue */
2447 		flag = (sleepflag == HERMON_SLEEP) ? DDI_UMEM_SLEEP :
2448 		    DDI_UMEM_NOSLEEP;
2449 		qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag,
2450 		    &qa_info->qa_umemcookie);
2451 		if (qa_info->qa_buf_real == NULL) {
2452 			ddi_dma_free_handle(&qa_info->qa_dmahdl);
2453 			return (DDI_FAILURE);
2454 		}
2455 
2456 		/*
2457 		 * Save temporary copy of the real pointer.  (This may be
2458 		 * modified in the last step below).
2459 		 */
2460 		qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2461 
2462 	}
2463 
2464 	/*
2465 	 * The next to last step is to ensure that the final address
2466 	 * ("qa_buf_aligned") has the appropriate "alloc" alignment
2467 	 * restriction applied to it (if necessary).
2468 	 */
2469 	if (qa_info->qa_bind_align != qa_info->qa_alloc_align) {
2470 		qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t)
2471 		    qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask);
2472 	}
2473 	/*
2474 	 * The last step is to figure out the offset of the start relative
2475 	 * to the first page of the region - will be used in the eqc/cqc
2476 	 * passed to the HW
2477 	 */
2478 	qa_info->qa_pgoffs = (uint_t)((uintptr_t)
2479 	    qa_info->qa_buf_aligned & HERMON_PAGEMASK);
2480 
2481 	return (DDI_SUCCESS);
2482 }
2483 
2484 
2485 /*
2486  * hermon_queue_free()
2487  *    Context: Can be called from interrupt or base context.
2488  */
2489 void
2490 hermon_queue_free(hermon_qalloc_info_t *qa_info)
2491 {
2492 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info))
2493 
2494 	/*
2495 	 * Depending on how (i.e. from where) we allocated the memory for
2496 	 * this queue, we choose the appropriate method for releasing the
2497 	 * resources.
2498 	 */
2499 	if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) {
2500 
2501 		ddi_dma_mem_free(&qa_info->qa_acchdl);
2502 
2503 	} else if (qa_info->qa_location == HERMON_QUEUE_LOCATION_USERLAND) {
2504 
2505 		ddi_umem_free(qa_info->qa_umemcookie);
2506 
2507 	}
2508 
2509 	/* Always free the dma handle */
2510 	ddi_dma_free_handle(&qa_info->qa_dmahdl);
2511 }
2512 
2513 /*
2514  * hermon_destroy_fmr_pool()
2515  * Create a pool of FMRs.
2516  *     Context: Can be called from kernel context only.
2517  */
2518 int
2519 hermon_create_fmr_pool(hermon_state_t *state, hermon_pdhdl_t pd,
2520     ibt_fmr_pool_attr_t *fmr_attr, hermon_fmrhdl_t *fmrpoolp)
2521 {
2522 	hermon_fmrhdl_t	fmrpool;
2523 	hermon_fmr_list_t *fmr, *fmr_next;
2524 	hermon_mrhdl_t   mr;
2525 	char		taskqname[48];
2526 	int		status;
2527 	int		sleep;
2528 	int		i;
2529 
2530 	sleep = (fmr_attr->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP :
2531 	    HERMON_NOSLEEP;
2532 	if ((sleep == HERMON_SLEEP) &&
2533 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2534 		return (IBT_INVALID_PARAM);
2535 	}
2536 
2537 	fmrpool = (hermon_fmrhdl_t)kmem_zalloc(sizeof (*fmrpool), sleep);
2538 	if (fmrpool == NULL) {
2539 		status = IBT_INSUFF_RESOURCE;
2540 		goto fail;
2541 	}
2542 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmrpool))
2543 
2544 	mutex_init(&fmrpool->fmr_lock, NULL, MUTEX_DRIVER,
2545 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2546 
2547 	fmrpool->fmr_state	    = state;
2548 	fmrpool->fmr_flush_function = fmr_attr->fmr_func_hdlr;
2549 	fmrpool->fmr_flush_arg	    = fmr_attr->fmr_func_arg;
2550 	fmrpool->fmr_pool_size	    = 0;
2551 	fmrpool->fmr_cache	    = 0;
2552 	fmrpool->fmr_max_pages	    = fmr_attr->fmr_max_pages_per_fmr;
2553 	fmrpool->fmr_page_sz	    = fmr_attr->fmr_page_sz;
2554 	fmrpool->fmr_dirty_watermark = fmr_attr->fmr_dirty_watermark;
2555 	fmrpool->fmr_dirty_len	    = 0;
2556 	fmrpool->fmr_flags	    = fmr_attr->fmr_flags;
2557 
2558 	/* Create taskq to handle cleanup and flush processing */
2559 	(void) snprintf(taskqname, 50, "fmrpool/%d/%d @ 0x%" PRIx64,
2560 	    fmr_attr->fmr_pool_size, hermon_debug_fmrpool_cnt,
2561 	    (uint64_t)(uintptr_t)fmrpool);
2562 	fmrpool->fmr_taskq = ddi_taskq_create(state->hs_dip, taskqname,
2563 	    HERMON_TASKQ_NTHREADS, TASKQ_DEFAULTPRI, 0);
2564 	if (fmrpool->fmr_taskq == NULL) {
2565 		status = IBT_INSUFF_RESOURCE;
2566 		goto fail1;
2567 	}
2568 
2569 	fmrpool->fmr_free_list = NULL;
2570 	fmrpool->fmr_dirty_list = NULL;
2571 
2572 	if (fmr_attr->fmr_cache) {
2573 		hermon_fmr_cache_init(fmrpool);
2574 	}
2575 
2576 	for (i = 0; i < fmr_attr->fmr_pool_size; i++) {
2577 		status = hermon_mr_alloc_fmr(state, pd, fmrpool, &mr);
2578 		if (status != DDI_SUCCESS) {
2579 			goto fail2;
2580 		}
2581 
2582 		fmr = (hermon_fmr_list_t *)kmem_zalloc(
2583 		    sizeof (hermon_fmr_list_t), sleep);
2584 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr))
2585 
2586 		fmr->fmr = mr;
2587 		fmr->fmr_refcnt = 0;
2588 		fmr->fmr_remaps = 0;
2589 		fmr->fmr_pool = fmrpool;
2590 		fmr->fmr_in_cache = 0;
2591 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
2592 		mr->mr_fmr = fmr;
2593 
2594 		fmr->fmr_next = fmrpool->fmr_free_list;
2595 		fmrpool->fmr_free_list = fmr;
2596 		fmrpool->fmr_pool_size++;
2597 	}
2598 
2599 	/* Set to return pool */
2600 	*fmrpoolp = fmrpool;
2601 
2602 	return (IBT_SUCCESS);
2603 fail2:
2604 	hermon_fmr_cache_fini(fmrpool);
2605 	for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) {
2606 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr))
2607 		fmr_next = fmr->fmr_next;
2608 		(void) hermon_mr_dealloc_fmr(state, &fmr->fmr);
2609 		kmem_free(fmr, sizeof (hermon_fmr_list_t));
2610 	}
2611 	ddi_taskq_destroy(fmrpool->fmr_taskq);
2612 fail1:
2613 	kmem_free(fmrpool, sizeof (*fmrpool));
2614 fail:
2615 	if (status == DDI_FAILURE) {
2616 		return (ibc_get_ci_failure(0));
2617 	} else {
2618 		return (status);
2619 	}
2620 }
2621 
2622 /*
2623  * hermon_destroy_fmr_pool()
2624  * Destroy an FMR pool and free all associated resources.
2625  *     Context: Can be called from kernel context only.
2626  */
2627 int
2628 hermon_destroy_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool)
2629 {
2630 	hermon_fmr_list_t	*fmr, *fmr_next;
2631 	int			status;
2632 
2633 	mutex_enter(&fmrpool->fmr_lock);
2634 	status = hermon_fmr_cleanup(state, fmrpool);
2635 	if (status != DDI_SUCCESS) {
2636 		mutex_exit(&fmrpool->fmr_lock);
2637 		return (status);
2638 	}
2639 
2640 	if (fmrpool->fmr_cache) {
2641 		hermon_fmr_cache_fini(fmrpool);
2642 	}
2643 
2644 	for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) {
2645 		fmr_next = fmr->fmr_next;
2646 
2647 		(void) hermon_mr_dealloc_fmr(state, &fmr->fmr);
2648 		kmem_free(fmr, sizeof (hermon_fmr_list_t));
2649 	}
2650 	mutex_exit(&fmrpool->fmr_lock);
2651 
2652 	ddi_taskq_destroy(fmrpool->fmr_taskq);
2653 	mutex_destroy(&fmrpool->fmr_lock);
2654 
2655 	kmem_free(fmrpool, sizeof (*fmrpool));
2656 	return (DDI_SUCCESS);
2657 }
2658 
2659 /*
2660  * hermon_flush_fmr_pool()
2661  * Ensure that all unmapped FMRs are fully invalidated.
2662  *     Context: Can be called from kernel context only.
2663  */
2664 int
2665 hermon_flush_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool)
2666 {
2667 	int		status;
2668 
2669 	/*
2670 	 * Force the unmapping of all entries on the dirty list, regardless of
2671 	 * whether the watermark has been hit yet.
2672 	 */
2673 	/* grab the pool lock */
2674 	mutex_enter(&fmrpool->fmr_lock);
2675 	status = hermon_fmr_cleanup(state, fmrpool);
2676 	mutex_exit(&fmrpool->fmr_lock);
2677 	return (status);
2678 }
2679 
2680 /*
2681  * hermon_deregister_fmr()
2682  * Map memory into FMR
2683  *    Context: Can be called from interrupt or base context.
2684  */
2685 int
2686 hermon_register_physical_fmr(hermon_state_t *state, hermon_fmrhdl_t fmrpool,
2687     ibt_pmr_attr_t *mem_pattr, hermon_mrhdl_t *mr,
2688     ibt_pmr_desc_t *mem_desc_p)
2689 {
2690 	hermon_fmr_list_t	*fmr;
2691 	hermon_fmr_list_t	query;
2692 	avl_index_t		where;
2693 	int			status;
2694 
2695 	/* Check length */
2696 	mutex_enter(&fmrpool->fmr_lock);
2697 	if (mem_pattr->pmr_len < 1 || (mem_pattr->pmr_num_buf >
2698 	    fmrpool->fmr_max_pages)) {
2699 		mutex_exit(&fmrpool->fmr_lock);
2700 		return (IBT_MR_LEN_INVALID);
2701 	}
2702 
2703 	mutex_enter(&fmrpool->fmr_cachelock);
2704 	/* lookup in fmr cache */
2705 	/* if exists, grab it, and return it */
2706 	if (fmrpool->fmr_cache) {
2707 		query.fmr_desc.pmd_iova = mem_pattr->pmr_iova;
2708 		query.fmr_desc.pmd_phys_buf_list_sz = mem_pattr->pmr_len;
2709 		fmr = (hermon_fmr_list_t *)avl_find(&fmrpool->fmr_cache_avl,
2710 		    &query, &where);
2711 
2712 		/*
2713 		 * If valid FMR was found in cache, return that fmr info
2714 		 */
2715 		if (fmr != NULL) {
2716 			fmr->fmr_refcnt++;
2717 			/* Store pmr desc for use in cache */
2718 			(void) memcpy(mem_desc_p, &fmr->fmr_desc,
2719 			    sizeof (ibt_pmr_desc_t));
2720 			*mr = (hermon_mrhdl_t)fmr->fmr;
2721 			mutex_exit(&fmrpool->fmr_cachelock);
2722 			mutex_exit(&fmrpool->fmr_lock);
2723 			return (DDI_SUCCESS);
2724 		}
2725 	}
2726 
2727 	/* FMR does not exist in cache, proceed with registration */
2728 
2729 	/* grab next free entry */
2730 	fmr = fmrpool->fmr_free_list;
2731 	if (fmr == NULL) {
2732 		mutex_exit(&fmrpool->fmr_cachelock);
2733 		mutex_exit(&fmrpool->fmr_lock);
2734 		return (IBT_INSUFF_RESOURCE);
2735 	}
2736 
2737 	fmrpool->fmr_free_list = fmrpool->fmr_free_list->fmr_next;
2738 	fmr->fmr_next = NULL;
2739 
2740 	status = hermon_mr_register_physical_fmr(state, mem_pattr, fmr->fmr,
2741 	    mem_desc_p);
2742 	if (status != DDI_SUCCESS) {
2743 		mutex_exit(&fmrpool->fmr_cachelock);
2744 		mutex_exit(&fmrpool->fmr_lock);
2745 		return (status);
2746 	}
2747 
2748 	fmr->fmr_refcnt = 1;
2749 	fmr->fmr_remaps++;
2750 
2751 	/* Store pmr desc for use in cache */
2752 	(void) memcpy(&fmr->fmr_desc, mem_desc_p, sizeof (ibt_pmr_desc_t));
2753 	*mr = (hermon_mrhdl_t)fmr->fmr;
2754 
2755 	/* Store in cache */
2756 	if (fmrpool->fmr_cache) {
2757 		if (!fmr->fmr_in_cache) {
2758 			avl_insert(&fmrpool->fmr_cache_avl, fmr, where);
2759 			fmr->fmr_in_cache = 1;
2760 		}
2761 	}
2762 
2763 	mutex_exit(&fmrpool->fmr_cachelock);
2764 	mutex_exit(&fmrpool->fmr_lock);
2765 	return (DDI_SUCCESS);
2766 }
2767 
2768 /*
2769  * hermon_deregister_fmr()
2770  * Unmap FMR
2771  *    Context: Can be called from kernel context only.
2772  */
2773 int
2774 hermon_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr)
2775 {
2776 	hermon_fmr_list_t	*fmr;
2777 	hermon_fmrhdl_t		fmrpool;
2778 	int			status;
2779 
2780 	fmr = mr->mr_fmr;
2781 	fmrpool = fmr->fmr_pool;
2782 
2783 	/* Grab pool lock */
2784 	mutex_enter(&fmrpool->fmr_lock);
2785 	fmr->fmr_refcnt--;
2786 
2787 	if (fmr->fmr_refcnt == 0) {
2788 		/*
2789 		 * First, do some bit of invalidation, reducing our exposure to
2790 		 * having this region still registered in hardware.
2791 		 */
2792 		(void) hermon_mr_invalidate_fmr(state, mr);
2793 
2794 		/*
2795 		 * If we've exhausted our remaps then add the FMR to the dirty
2796 		 * list, not allowing it to be re-used until we have done a
2797 		 * flush.  Otherwise, simply add it back to the free list for
2798 		 * re-mapping.
2799 		 */
2800 		if (fmr->fmr_remaps <
2801 		    state->hs_cfg_profile->cp_fmr_max_remaps) {
2802 			/* add to free list */
2803 			fmr->fmr_next = fmrpool->fmr_free_list;
2804 			fmrpool->fmr_free_list = fmr;
2805 		} else {
2806 			/* add to dirty list */
2807 			fmr->fmr_next = fmrpool->fmr_dirty_list;
2808 			fmrpool->fmr_dirty_list = fmr;
2809 			fmrpool->fmr_dirty_len++;
2810 
2811 			status = ddi_taskq_dispatch(fmrpool->fmr_taskq,
2812 			    hermon_fmr_processing, fmrpool, DDI_NOSLEEP);
2813 			if (status == DDI_FAILURE) {
2814 				mutex_exit(&fmrpool->fmr_lock);
2815 				return (IBT_INSUFF_RESOURCE);
2816 			}
2817 		}
2818 	}
2819 	/* Release pool lock */
2820 	mutex_exit(&fmrpool->fmr_lock);
2821 
2822 	return (DDI_SUCCESS);
2823 }
2824 
2825 
2826 /*
2827  * hermon_fmr_processing()
2828  * If required, perform cleanup.
2829  *     Context: Called from taskq context only.
2830  */
2831 static void
2832 hermon_fmr_processing(void *fmr_args)
2833 {
2834 	hermon_fmrhdl_t		fmrpool;
2835 	int			status;
2836 
2837 	ASSERT(fmr_args != NULL);
2838 
2839 	fmrpool = (hermon_fmrhdl_t)fmr_args;
2840 
2841 	/* grab pool lock */
2842 	mutex_enter(&fmrpool->fmr_lock);
2843 	if (fmrpool->fmr_dirty_len >= fmrpool->fmr_dirty_watermark) {
2844 		status = hermon_fmr_cleanup(fmrpool->fmr_state, fmrpool);
2845 		if (status != DDI_SUCCESS) {
2846 			mutex_exit(&fmrpool->fmr_lock);
2847 			return;
2848 		}
2849 
2850 		if (fmrpool->fmr_flush_function != NULL) {
2851 			(void) fmrpool->fmr_flush_function(
2852 			    (ibc_fmr_pool_hdl_t)fmrpool,
2853 			    fmrpool->fmr_flush_arg);
2854 		}
2855 	}
2856 
2857 	/* let pool lock go */
2858 	mutex_exit(&fmrpool->fmr_lock);
2859 }
2860 
2861 /*
2862  * hermon_fmr_cleanup()
2863  * Perform cleaning processing, walking the list and performing the MTT sync
2864  * operation if required.
2865  *    Context: can be called from taskq or base context.
2866  */
2867 static int
2868 hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t fmrpool)
2869 {
2870 	hermon_fmr_list_t	*fmr;
2871 	hermon_fmr_list_t	*fmr_next;
2872 	int			sync_needed;
2873 	int			status;
2874 
2875 	ASSERT(MUTEX_HELD(&fmrpool->fmr_lock));
2876 
2877 	sync_needed = 0;
2878 	for (fmr = fmrpool->fmr_dirty_list; fmr; fmr = fmr_next) {
2879 		fmr_next = fmr->fmr_next;
2880 		fmr->fmr_remaps = 0;
2881 
2882 		(void) hermon_mr_deregister_fmr(state, fmr->fmr);
2883 
2884 		/*
2885 		 * Update lists.
2886 		 * - add fmr back to free list
2887 		 * - remove fmr from dirty list
2888 		 */
2889 		fmr->fmr_next = fmrpool->fmr_free_list;
2890 		fmrpool->fmr_free_list = fmr;
2891 
2892 
2893 		/*
2894 		 * Because we have updated the dirty list, and deregistered the
2895 		 * FMR entry, we do need to sync the TPT, so we set the
2896 		 * 'sync_needed' flag here so we sync once we finish dirty_list
2897 		 * processing.
2898 		 */
2899 		sync_needed = 1;
2900 	}
2901 
2902 	fmrpool->fmr_dirty_list = NULL;
2903 	fmrpool->fmr_dirty_len = 0;
2904 
2905 	if (sync_needed) {
2906 		status = hermon_sync_tpt_cmd_post(state,
2907 		    HERMON_CMD_NOSLEEP_SPIN);
2908 		if (status != HERMON_CMD_SUCCESS) {
2909 			return (status);
2910 		}
2911 	}
2912 
2913 	return (DDI_SUCCESS);
2914 }
2915 
2916 /*
2917  * hermon_fmr_avl_compare()
2918  *    Context: Can be called from user or kernel context.
2919  */
2920 static int
2921 hermon_fmr_avl_compare(const void *q, const void *e)
2922 {
2923 	hermon_fmr_list_t *entry, *query;
2924 
2925 	entry = (hermon_fmr_list_t *)e;
2926 	query = (hermon_fmr_list_t *)q;
2927 
2928 	if (query->fmr_desc.pmd_iova < entry->fmr_desc.pmd_iova) {
2929 		return (-1);
2930 	} else if (query->fmr_desc.pmd_iova > entry->fmr_desc.pmd_iova) {
2931 		return (+1);
2932 	} else {
2933 		return (0);
2934 	}
2935 }
2936 
2937 
2938 /*
2939  * hermon_fmr_cache_init()
2940  *    Context: Can be called from user or kernel context.
2941  */
2942 static void
2943 hermon_fmr_cache_init(hermon_fmrhdl_t fmr)
2944 {
2945 	/* Initialize the lock used for FMR cache AVL tree access */
2946 	mutex_init(&fmr->fmr_cachelock, NULL, MUTEX_DRIVER,
2947 	    DDI_INTR_PRI(fmr->fmr_state->hs_intrmsi_pri));
2948 
2949 	/* Initialize the AVL tree for the FMR cache */
2950 	avl_create(&fmr->fmr_cache_avl, hermon_fmr_avl_compare,
2951 	    sizeof (hermon_fmr_list_t),
2952 	    offsetof(hermon_fmr_list_t, fmr_avlnode));
2953 
2954 	fmr->fmr_cache = 1;
2955 }
2956 
2957 
2958 /*
2959  * hermon_fmr_cache_fini()
2960  *    Context: Can be called from user or kernel context.
2961  */
2962 static void
2963 hermon_fmr_cache_fini(hermon_fmrhdl_t fmr)
2964 {
2965 	void			*cookie;
2966 
2967 	/*
2968 	 * Empty all entries (if necessary) and destroy the AVL tree.
2969 	 * The FMRs themselves are freed as part of destroy_pool()
2970 	 */
2971 	cookie = NULL;
2972 	while (((void *)(hermon_fmr_list_t *)avl_destroy_nodes(
2973 	    &fmr->fmr_cache_avl, &cookie)) != NULL) {
2974 		/* loop through */
2975 	}
2976 	avl_destroy(&fmr->fmr_cache_avl);
2977 
2978 	/* Destroy the lock used for FMR cache */
2979 	mutex_destroy(&fmr->fmr_cachelock);
2980 }
2981 
2982 /*
2983  * hermon_get_dma_cookies()
2984  * Return DMA cookies in the pre-allocated paddr_list_p based on the length
2985  * needed.
2986  *    Context: Can be called from interrupt or base context.
2987  */
2988 int
2989 hermon_get_dma_cookies(hermon_state_t *state, ibt_phys_buf_t *paddr_list_p,
2990     ibt_va_attr_t *va_attrs, uint_t list_len, uint_t *cookiecnt,
2991     ibc_ma_hdl_t *ibc_ma_hdl_p)
2992 {
2993 	ddi_dma_handle_t	dma_hdl;
2994 	ddi_dma_attr_t		dma_attr;
2995 	ddi_dma_cookie_t	dmacookie;
2996 	int			(*callback)(caddr_t);
2997 	int			status;
2998 	int			i;
2999 
3000 	/* Set the callback flag appropriately */
3001 	callback = (va_attrs->va_flags & IBT_VA_NOSLEEP) ? DDI_DMA_DONTWAIT :
3002 	    DDI_DMA_SLEEP;
3003 	if ((callback == DDI_DMA_SLEEP) &&
3004 	    (HERMON_SLEEP != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
3005 		return (IBT_INVALID_PARAM);
3006 	}
3007 
3008 	/*
3009 	 * Initialize many of the default DMA attributes and allocate the DMA
3010 	 * handle.  Then, if we're bypassing the IOMMU, set the
3011 	 * DDI_DMA_FORCE_PHYSICAL flag.
3012 	 */
3013 	hermon_dma_attr_init(state, &dma_attr);
3014 
3015 #ifdef __x86
3016 	/*
3017 	 * On x86 we can specify a maximum segment length for our returned
3018 	 * cookies.
3019 	 */
3020 	if (va_attrs->va_flags & IBT_VA_FMR) {
3021 		dma_attr.dma_attr_seg = PAGESIZE - 1;
3022 	}
3023 #endif
3024 
3025 	/*
3026 	 * Check to see if the RO flag is set, and if so,
3027 	 * set that bit in the attr structure as well.
3028 	 *
3029 	 * NOTE 1:  This function is ONLY called by consumers, and only for
3030 	 *	    data buffers
3031 	 */
3032 	if (hermon_kernel_data_ro == HERMON_RO_ENABLED) {
3033 		dma_attr.dma_attr_flags |= DDI_DMA_RELAXED_ORDERING;
3034 	}
3035 
3036 	status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
3037 	    callback, NULL, &dma_hdl);
3038 	if (status != DDI_SUCCESS) {
3039 		switch (status) {
3040 		case DDI_DMA_NORESOURCES:
3041 			return (IBT_INSUFF_RESOURCE);
3042 		case DDI_DMA_BADATTR:
3043 		default:
3044 			return (ibc_get_ci_failure(0));
3045 		}
3046 	}
3047 
3048 	/*
3049 	 * Now bind the handle with the correct DMA attributes.
3050 	 */
3051 	if (va_attrs->va_flags & IBT_VA_BUF) {
3052 		status = ddi_dma_buf_bind_handle(dma_hdl, va_attrs->va_buf,
3053 		    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT,
3054 		    NULL, &dmacookie, cookiecnt);
3055 	} else {
3056 		status = ddi_dma_addr_bind_handle(dma_hdl, NULL,
3057 		    (caddr_t)(uintptr_t)va_attrs->va_vaddr, va_attrs->va_len,
3058 		    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT,
3059 		    NULL, &dmacookie, cookiecnt);
3060 	}
3061 	if (status != DDI_SUCCESS) {
3062 		ddi_dma_free_handle(&dma_hdl);
3063 
3064 		switch (status) {
3065 		case DDI_DMA_NORESOURCES:
3066 			return (IBT_INSUFF_RESOURCE);
3067 		case DDI_DMA_TOOBIG:
3068 			return (IBT_INVALID_PARAM);
3069 		case DDI_DMA_PARTIAL_MAP:
3070 		case DDI_DMA_INUSE:
3071 		case DDI_DMA_NOMAPPING:
3072 		default:
3073 			return (ibc_get_ci_failure(0));
3074 		}
3075 	}
3076 
3077 	/*
3078 	 * Verify our physical buffer list (PBL) is large enough to handle the
3079 	 * number of cookies that were returned.
3080 	 */
3081 	if (*cookiecnt > list_len) {
3082 		(void) ddi_dma_unbind_handle(dma_hdl);
3083 		ddi_dma_free_handle(&dma_hdl);
3084 		return (IBT_PBL_TOO_SMALL);
3085 	}
3086 
3087 	/*
3088 	 * We store the cookies returned by the DDI into our own PBL.  This
3089 	 * sets the cookies up for later processing (for example, if we want to
3090 	 * split up the cookies into smaller chunks).  We use the laddr and
3091 	 * size fields in each cookie to create each individual entry (PBE).
3092 	 */
3093 
3094 	/*
3095 	 * Store first cookie info first
3096 	 */
3097 	paddr_list_p[0].p_laddr = dmacookie.dmac_laddress;
3098 	paddr_list_p[0].p_size = dmacookie.dmac_size;
3099 
3100 	/*
3101 	 * Loop through each cookie, storing each cookie into our physical
3102 	 * buffer list.
3103 	 */
3104 	for (i = 1; i < *cookiecnt; i++) {
3105 		ddi_dma_nextcookie(dma_hdl, &dmacookie);
3106 
3107 		paddr_list_p[i].p_laddr = dmacookie.dmac_laddress;
3108 		paddr_list_p[i].p_size  = dmacookie.dmac_size;
3109 	}
3110 
3111 	/* return handle */
3112 	*ibc_ma_hdl_p = (ibc_ma_hdl_t)dma_hdl;
3113 	return (DDI_SUCCESS);
3114 }
3115 
3116 /*
3117  * hermon_split_dma_cookies()
3118  * Split up cookies passed in from paddr_list_p, returning the new list in the
3119  * same buffers, based on the pagesize to split the cookies into.
3120  *    Context: Can be called from interrupt or base context.
3121  */
3122 /* ARGSUSED */
3123 int
3124 hermon_split_dma_cookies(hermon_state_t *state, ibt_phys_buf_t *paddr_list,
3125     ib_memlen_t *paddr_offset, uint_t list_len, uint_t *cookiecnt,
3126     uint_t pagesize)
3127 {
3128 	uint64_t	pageoffset;
3129 	uint64_t	pagemask;
3130 	uint_t		pageshift;
3131 	uint_t		current_cookiecnt;
3132 	uint_t		cookies_needed;
3133 	uint64_t	last_size, extra_cookie;
3134 	int		i_increment;
3135 	int		i, k;
3136 	int		status;
3137 
3138 	/* Setup pagesize calculations */
3139 	pageoffset = pagesize - 1;
3140 	pagemask = (~pageoffset);
3141 	pageshift = highbit(pagesize) - 1;
3142 
3143 	/*
3144 	 * Setup first cookie offset based on pagesize requested.
3145 	 */
3146 	*paddr_offset = paddr_list[0].p_laddr & pageoffset;
3147 	paddr_list[0].p_laddr &= pagemask;
3148 
3149 	/* Save away the current number of cookies that are passed in */
3150 	current_cookiecnt = *cookiecnt;
3151 
3152 	/* Perform splitting up of current cookies into pagesize blocks */
3153 	for (i = 0; i < current_cookiecnt; i += i_increment) {
3154 		/*
3155 		 * If the cookie is smaller than pagesize, or already is
3156 		 * pagesize, then we are already within our limits, so we skip
3157 		 * it.
3158 		 */
3159 		if (paddr_list[i].p_size <= pagesize) {
3160 			i_increment = 1;
3161 			continue;
3162 		}
3163 
3164 		/*
3165 		 * If this is our first cookie, then we have to deal with the
3166 		 * offset that may be present in the first address.  So add
3167 		 * that to our size, to calculate potential change to the last
3168 		 * cookie's size.
3169 		 *
3170 		 * Also, calculate the number of cookies that we'll need to
3171 		 * split up this block into.
3172 		 */
3173 		if (i == 0) {
3174 			last_size = (paddr_list[i].p_size + *paddr_offset) &
3175 			    pageoffset;
3176 			cookies_needed = (paddr_list[i].p_size +
3177 			    *paddr_offset) >> pageshift;
3178 		} else {
3179 			last_size = 0;
3180 			cookies_needed = paddr_list[i].p_size >> pageshift;
3181 		}
3182 
3183 		/*
3184 		 * If our size is not a multiple of pagesize, we need one more
3185 		 * cookie.
3186 		 */
3187 		if (last_size) {
3188 			extra_cookie = 1;
3189 		} else {
3190 			extra_cookie = 0;
3191 		}
3192 
3193 		/*
3194 		 * Split cookie into pagesize chunks, shifting list of cookies
3195 		 * down, using more cookie slots in the PBL if necessary.
3196 		 */
3197 		status = hermon_dma_cookie_shift(paddr_list, i, list_len,
3198 		    current_cookiecnt - i, cookies_needed + extra_cookie);
3199 		if (status != 0) {
3200 			return (status);
3201 		}
3202 
3203 		/*
3204 		 * If the very first cookie, we must take possible offset into
3205 		 * account.
3206 		 */
3207 		if (i == 0) {
3208 			paddr_list[i].p_size = pagesize - *paddr_offset;
3209 		} else {
3210 			paddr_list[i].p_size = pagesize;
3211 		}
3212 
3213 		/*
3214 		 * We have shifted the existing cookies down the PBL, now fill
3215 		 * in the blank entries by splitting up our current block.
3216 		 */
3217 		for (k = 1; k < cookies_needed; k++) {
3218 			paddr_list[i + k].p_laddr =
3219 			    paddr_list[i + k - 1].p_laddr + pagesize;
3220 			paddr_list[i + k].p_size = pagesize;
3221 		}
3222 
3223 		/* If we have one extra cookie (of less than pagesize...) */
3224 		if (extra_cookie) {
3225 			paddr_list[i + k].p_laddr =
3226 			    paddr_list[i + k - 1].p_laddr + pagesize;
3227 			paddr_list[i + k].p_size = (size_t)last_size;
3228 		}
3229 
3230 		/* Increment cookiecnt appropriately based on cookies used */
3231 		i_increment = cookies_needed + extra_cookie;
3232 		current_cookiecnt += i_increment - 1;
3233 	}
3234 
3235 	/* Update to new cookie count */
3236 	*cookiecnt = current_cookiecnt;
3237 	return (DDI_SUCCESS);
3238 }
3239 
3240 /*
3241  * hermon_dma_cookie_shift()
3242  *    Context: Can be called from interrupt or base context.
3243  */
3244 int
3245 hermon_dma_cookie_shift(ibt_phys_buf_t *paddr_list, int start, int end,
3246     int cookiecnt, int num_shift)
3247 {
3248 	int shift_start;
3249 	int i;
3250 
3251 	/* Calculating starting point in the PBL list */
3252 	shift_start = start + cookiecnt - 1;
3253 
3254 	/* Check if we're at the end of our PBL list */
3255 	if ((shift_start + num_shift - 1) >= end) {
3256 		return (IBT_PBL_TOO_SMALL);
3257 	}
3258 
3259 	for (i = shift_start; i > start; i--) {
3260 		paddr_list[i + num_shift - 1] = paddr_list[i];
3261 	}
3262 
3263 	return (DDI_SUCCESS);
3264 }
3265 
3266 
3267 /*
3268  * hermon_free_dma_cookies()
3269  *    Context: Can be called from interrupt or base context.
3270  */
3271 int
3272 hermon_free_dma_cookies(ibc_ma_hdl_t ma_hdl)
3273 {
3274 	ddi_dma_handle_t	dma_hdl;
3275 	int			status;
3276 
3277 	dma_hdl = (ddi_dma_handle_t)ma_hdl;
3278 
3279 	status = ddi_dma_unbind_handle(dma_hdl);
3280 	if (status != DDI_SUCCESS) {
3281 		return (ibc_get_ci_failure(0));
3282 	}
3283 	ddi_dma_free_handle(&dma_hdl);
3284 
3285 	return (DDI_SUCCESS);
3286 }
3287