xref: /illumos-gate/usr/src/uts/common/io/ib/adapters/hermon/hermon_cq.c (revision 9e39c5ba00a55fa05777cc94b148296af305e135)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * hermon_cq.c
29  *    Hermon Completion Queue Processing Routines
30  *
31  *    Implements all the routines necessary for allocating, freeing, resizing,
32  *    and handling the completion type events that the Hermon hardware can
33  *    generate.
34  */
35 
36 #include <sys/types.h>
37 #include <sys/conf.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/modctl.h>
41 #include <sys/bitmap.h>
42 #include <sys/sysmacros.h>
43 
44 #include <sys/ib/adapters/hermon/hermon.h>
45 
46 int hermon_should_panic = 0;	/* debugging aid */
47 
48 #define	hermon_cq_update_ci_doorbell(cq)				\
49 	/* Build the doorbell record data (low 24 bits only) */		\
50 	HERMON_UAR_DB_RECORD_WRITE(cq->cq_arm_ci_vdbr,			\
51 	    cq->cq_consindx & 0x00FFFFFF)
52 
53 static int hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq,
54     uint_t cmd);
55 #pragma inline(hermon_cq_arm_doorbell)
56 static void hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr);
57 #pragma inline(hermon_arm_cq_dbr_init)
58 static void hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
59     hermon_hw_cqe_t *cqe, ibt_wc_t *wc);
60 static void hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
61     hermon_hw_cqe_t *cqe, ibt_wc_t *wc);
62 static void hermon_cqe_sync(hermon_cqhdl_t cq, hermon_hw_cqe_t *cqe,
63     uint_t flag);
64 
65 
66 /*
67  * hermon_cq_alloc()
68  *    Context: Can be called only from user or kernel context.
69  */
70 int
71 hermon_cq_alloc(hermon_state_t *state, ibt_cq_hdl_t ibt_cqhdl,
72     ibt_cq_attr_t *cq_attr, uint_t *actual_size, hermon_cqhdl_t *cqhdl,
73     uint_t sleepflag)
74 {
75 	hermon_rsrc_t		*cqc, *rsrc;
76 	hermon_umap_db_entry_t	*umapdb;
77 	hermon_hw_cqc_t		cqc_entry;
78 	hermon_cqhdl_t		cq;
79 	ibt_mr_attr_t		mr_attr;
80 	hermon_mr_options_t	op;
81 	hermon_pdhdl_t		pd;
82 	hermon_mrhdl_t		mr;
83 	hermon_hw_cqe_t		*buf;
84 	uint64_t		value;
85 	uint32_t		log_cq_size, uarpg;
86 	uint_t			cq_is_umap;
87 	uint32_t		status, flag;
88 
89 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq_attr))
90 
91 	/*
92 	 * Determine whether CQ is being allocated for userland access or
93 	 * whether it is being allocated for kernel access.  If the CQ is
94 	 * being allocated for userland access, then lookup the UAR
95 	 * page number for the current process.  Note:  If this is not found
96 	 * (e.g. if the process has not previously open()'d the Hermon driver),
97 	 * then an error is returned.
98 	 */
99 	cq_is_umap = (cq_attr->cq_flags & IBT_CQ_USER_MAP) ? 1 : 0;
100 	if (cq_is_umap) {
101 		status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
102 		    MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
103 		if (status != DDI_SUCCESS) {
104 			status = IBT_INVALID_PARAM;
105 			goto cqalloc_fail;
106 		}
107 		uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
108 	} else {
109 		uarpg = state->hs_kernel_uar_index;
110 	}
111 
112 	/* Use the internal protection domain (PD) for setting up CQs */
113 	pd = state->hs_pdhdl_internal;
114 
115 	/* Increment the reference count on the protection domain (PD) */
116 	hermon_pd_refcnt_inc(pd);
117 
118 	/*
119 	 * Allocate an CQ context entry.  This will be filled in with all
120 	 * the necessary parameters to define the Completion Queue.  And then
121 	 * ownership will be passed to the hardware in the final step
122 	 * below.  If we fail here, we must undo the protection domain
123 	 * reference count.
124 	 */
125 	status = hermon_rsrc_alloc(state, HERMON_CQC, 1, sleepflag, &cqc);
126 	if (status != DDI_SUCCESS) {
127 		status = IBT_INSUFF_RESOURCE;
128 		goto cqalloc_fail1;
129 	}
130 
131 	/*
132 	 * Allocate the software structure for tracking the completion queue
133 	 * (i.e. the Hermon Completion Queue handle).  If we fail here, we must
134 	 * undo the protection domain reference count and the previous
135 	 * resource allocation.
136 	 */
137 	status = hermon_rsrc_alloc(state, HERMON_CQHDL, 1, sleepflag, &rsrc);
138 	if (status != DDI_SUCCESS) {
139 		status = IBT_INSUFF_RESOURCE;
140 		goto cqalloc_fail2;
141 	}
142 	cq = (hermon_cqhdl_t)rsrc->hr_addr;
143 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq))
144 	cq->cq_is_umap = cq_is_umap;
145 	cq->cq_cqnum = cqc->hr_indx;	/* just use index, implicit in Hermon */
146 
147 	/*
148 	 * If this will be a user-mappable CQ, then allocate an entry for
149 	 * the "userland resources database".  This will later be added to
150 	 * the database (after all further CQ operations are successful).
151 	 * If we fail here, we must undo the reference counts and the
152 	 * previous resource allocation.
153 	 */
154 	if (cq->cq_is_umap) {
155 		umapdb = hermon_umap_db_alloc(state->hs_instance, cq->cq_cqnum,
156 		    MLNX_UMAP_CQMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
157 		if (umapdb == NULL) {
158 			status = IBT_INSUFF_RESOURCE;
159 			goto cqalloc_fail3;
160 		}
161 	}
162 
163 
164 	/*
165 	 * Allocate the doorbell record.  We'll need one for the CQ, handling
166 	 * both consumer index (SET CI) and the CQ state (CQ ARM).
167 	 */
168 
169 	status = hermon_dbr_alloc(state, uarpg, &cq->cq_arm_ci_dbr_acchdl,
170 	    &cq->cq_arm_ci_vdbr, &cq->cq_arm_ci_pdbr, &cq->cq_dbr_mapoffset);
171 	if (status != DDI_SUCCESS) {
172 		status = IBT_INSUFF_RESOURCE;
173 		goto cqalloc_fail4;
174 	}
175 
176 	/*
177 	 * Calculate the appropriate size for the completion queue.
178 	 * Note:  All Hermon CQs must be a power-of-2 minus 1 in size.  Also
179 	 * they may not be any smaller than HERMON_CQ_MIN_SIZE.  This step is
180 	 * to round the requested size up to the next highest power-of-2
181 	 */
182 	cq_attr->cq_size = max(cq_attr->cq_size, HERMON_CQ_MIN_SIZE);
183 	log_cq_size = highbit(cq_attr->cq_size);
184 
185 	/*
186 	 * Next we verify that the rounded-up size is valid (i.e. consistent
187 	 * with the device limits and/or software-configured limits)
188 	 */
189 	if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) {
190 		status = IBT_HCA_CQ_EXCEEDED;
191 		goto cqalloc_fail4a;
192 	}
193 
194 	/*
195 	 * Allocate the memory for Completion Queue.
196 	 *
197 	 * Note: Although we use the common queue allocation routine, we
198 	 * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in
199 	 * kernel system memory) for kernel CQs because it would be
200 	 * inefficient to have CQs located in DDR memory.  This is primarily
201 	 * because CQs are read from (by software) more than they are written
202 	 * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all
203 	 * user-mappable CQs for a similar reason.)
204 	 * It is also worth noting that, unlike Hermon QP work queues,
205 	 * completion queues do not have the same strict alignment
206 	 * requirements.  It is sufficient for the CQ memory to be both
207 	 * aligned to and bound to addresses which are a multiple of CQE size.
208 	 */
209 	cq->cq_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t);
210 
211 	cq->cq_cqinfo.qa_alloc_align = PAGESIZE;
212 	cq->cq_cqinfo.qa_bind_align  = PAGESIZE;
213 	if (cq->cq_is_umap) {
214 		cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
215 	} else {
216 		cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
217 		hermon_arm_cq_dbr_init(cq->cq_arm_ci_vdbr);
218 	}
219 	status = hermon_queue_alloc(state, &cq->cq_cqinfo, sleepflag);
220 	if (status != DDI_SUCCESS) {
221 		status = IBT_INSUFF_RESOURCE;
222 		goto cqalloc_fail4;
223 	}
224 	buf = (hermon_hw_cqe_t *)cq->cq_cqinfo.qa_buf_aligned;
225 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
226 
227 	/*
228 	 * The ownership bit of the CQE's is set by the HW during the process
229 	 * of transferrring ownership of the CQ (PRM 09.35c, 14.2.1, note D1
230 	 *
231 	 */
232 
233 	/*
234 	 * Register the memory for the CQ.  The memory for the CQ must
235 	 * be registered in the Hermon TPT tables.  This gives us the LKey
236 	 * to specify in the CQ context below.  Note: If this is a user-
237 	 * mappable CQ, then we will force DDI_DMA_CONSISTENT mapping.
238 	 */
239 	flag = (sleepflag == HERMON_SLEEP) ?  IBT_MR_SLEEP : IBT_MR_NOSLEEP;
240 	mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
241 	mr_attr.mr_len	 = cq->cq_cqinfo.qa_size;
242 	mr_attr.mr_as	 = NULL;
243 	mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
244 	op.mro_bind_type   = state->hs_cfg_profile->cp_iommu_bypass;
245 	op.mro_bind_dmahdl = cq->cq_cqinfo.qa_dmahdl;
246 	op.mro_bind_override_addr = 0;
247 	status = hermon_mr_register(state, pd, &mr_attr, &mr, &op,
248 	    HERMON_CQ_CMPT);
249 	if (status != DDI_SUCCESS) {
250 		status = IBT_INSUFF_RESOURCE;
251 		goto cqalloc_fail5;
252 	}
253 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
254 
255 	/* Sync entire CQ for use by the hardware. */
256 	(void) ddi_dma_sync(mr->mr_bindinfo.bi_dmahdl, 0,
257 	    cq->cq_cqinfo.qa_size, DDI_DMA_SYNC_FORDEV);
258 
259 	/*
260 	 * Fill in the CQC entry.  This is the final step before passing
261 	 * ownership of the CQC entry to the Hermon hardware.  We use all of
262 	 * the information collected/calculated above to fill in the
263 	 * requisite portions of the CQC.  Note: If this CQ is going to be
264 	 * used for userland access, then we need to set the UAR page number
265 	 * appropriately (otherwise it's a "don't care")
266 	 */
267 	bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
268 
269 	cq->cq_eqnum		= HERMON_CQ_EQNUM_GET(state);
270 	cq->cq_erreqnum		= HERMON_CQ_ERREQNUM_GET(state);
271 
272 	cqc_entry.state		= HERMON_CQ_DISARMED;
273 	cqc_entry.pg_offs	= cq->cq_cqinfo.qa_pgoffs >> 5;
274 	cqc_entry.log_cq_sz	= log_cq_size;
275 	cqc_entry.usr_page	= uarpg;
276 	cqc_entry.c_eqn		= cq->cq_eqnum;
277 	cqc_entry.log2_pgsz	= mr->mr_log2_pgsz;
278 	cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
279 	cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3;
280 	cqc_entry.dbr_addrh = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 32);
281 	cqc_entry.dbr_addrl = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 3);
282 
283 	/*
284 	 * Write the CQC entry to hardware - we pass ownership of
285 	 * the entry to the hardware (using the Hermon SW2HW_CQ firmware
286 	 * command).  Note: In general, this operation shouldn't fail.  But
287 	 * if it does, we have to undo everything we've done above before
288 	 * returning error.
289 	 */
290 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_CQ, &cqc_entry,
291 	    sizeof (hermon_hw_cqc_t), cq->cq_cqnum, sleepflag);
292 	if (status != HERMON_CMD_SUCCESS) {
293 		cmn_err(CE_CONT, "Hermon: SW2HW_CQ command failed: %08x\n",
294 		    status);
295 		if (status == HERMON_CMD_INVALID_STATUS) {
296 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
297 		}
298 		status = ibc_get_ci_failure(0);
299 		goto cqalloc_fail6;
300 	}
301 
302 	/*
303 	 * Fill in the rest of the Hermon Completion Queue handle.  Having
304 	 * successfully transferred ownership of the CQC, we can update the
305 	 * following fields for use in further operations on the CQ.
306 	 */
307 	cq->cq_resize_hdl = 0;
308 	cq->cq_cqcrsrcp	  = cqc;
309 	cq->cq_rsrcp	  = rsrc;
310 	cq->cq_consindx	  = 0;
311 		/* least restrictive */
312 	cq->cq_buf	  = buf;
313 	cq->cq_bufsz	  = (1 << log_cq_size);
314 	cq->cq_log_cqsz	  = log_cq_size;
315 	cq->cq_mrhdl	  = mr;
316 	cq->cq_refcnt	  = 0;
317 	cq->cq_is_special = 0;
318 	cq->cq_uarpg	  = uarpg;
319 	cq->cq_umap_dhp	  = (devmap_cookie_t)NULL;
320 	avl_create(&cq->cq_wrid_wqhdr_avl_tree, hermon_wrid_workq_compare,
321 	    sizeof (struct hermon_workq_avl_s),
322 	    offsetof(struct hermon_workq_avl_s, wqa_link));
323 
324 	cq->cq_hdlrarg	  = (void *)ibt_cqhdl;
325 
326 	/*
327 	 * Put CQ handle in Hermon CQNum-to-CQHdl list.  Then fill in the
328 	 * "actual_size" and "cqhdl" and return success
329 	 */
330 	ASSERT(state->hs_cqhdl[cqc->hr_indx] == NULL);
331 	state->hs_cqhdl[cqc->hr_indx] = cq;
332 
333 	/*
334 	 * If this is a user-mappable CQ, then we need to insert the previously
335 	 * allocated entry into the "userland resources database".  This will
336 	 * allow for later lookup during devmap() (i.e. mmap()) calls.
337 	 */
338 	if (cq->cq_is_umap) {
339 		hermon_umap_db_add(umapdb);
340 	}
341 
342 	/*
343 	 * Fill in the return arguments (if necessary).  This includes the
344 	 * real completion queue size.
345 	 */
346 	if (actual_size != NULL) {
347 		*actual_size = (1 << log_cq_size) - 1;
348 	}
349 	*cqhdl = cq;
350 
351 	return (DDI_SUCCESS);
352 
353 /*
354  * The following is cleanup for all possible failure cases in this routine
355  */
356 cqalloc_fail6:
357 	if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
358 	    sleepflag) != DDI_SUCCESS) {
359 		HERMON_WARNING(state, "failed to deregister CQ memory");
360 	}
361 cqalloc_fail5:
362 	hermon_queue_free(&cq->cq_cqinfo);
363 cqalloc_fail4a:
364 	hermon_dbr_free(state, uarpg, cq->cq_arm_ci_vdbr);
365 cqalloc_fail4:
366 	if (cq_is_umap) {
367 		hermon_umap_db_free(umapdb);
368 	}
369 cqalloc_fail3:
370 	hermon_rsrc_free(state, &rsrc);
371 cqalloc_fail2:
372 	hermon_rsrc_free(state, &cqc);
373 cqalloc_fail1:
374 	hermon_pd_refcnt_dec(pd);
375 cqalloc_fail:
376 	return (status);
377 }
378 
379 
380 /*
381  * hermon_cq_free()
382  *    Context: Can be called only from user or kernel context.
383  */
384 /* ARGSUSED */
385 int
386 hermon_cq_free(hermon_state_t *state, hermon_cqhdl_t *cqhdl, uint_t sleepflag)
387 {
388 	hermon_rsrc_t		*cqc, *rsrc;
389 	hermon_umap_db_entry_t	*umapdb;
390 	hermon_hw_cqc_t		cqc_entry;
391 	hermon_pdhdl_t		pd;
392 	hermon_mrhdl_t		mr;
393 	hermon_cqhdl_t		cq, resize;
394 	uint32_t		cqnum;
395 	uint64_t		value;
396 	uint_t			maxprot;
397 	int			status;
398 
399 	/*
400 	 * Pull all the necessary information from the Hermon Completion Queue
401 	 * handle.  This is necessary here because the resource for the
402 	 * CQ handle is going to be freed up as part of this operation.
403 	 */
404 	cq	= *cqhdl;
405 	mutex_enter(&cq->cq_lock);
406 	cqc	= cq->cq_cqcrsrcp;
407 	rsrc	= cq->cq_rsrcp;
408 	pd	= state->hs_pdhdl_internal;
409 	mr	= cq->cq_mrhdl;
410 	cqnum	= cq->cq_cqnum;
411 
412 	resize = cq->cq_resize_hdl;		/* save the handle for later */
413 
414 	/*
415 	 * If there are work queues still associated with the CQ, then return
416 	 * an error.  Otherwise, we will be holding the CQ lock.
417 	 */
418 	if (cq->cq_refcnt != 0) {
419 		mutex_exit(&cq->cq_lock);
420 		return (IBT_CQ_BUSY);
421 	}
422 
423 	/*
424 	 * If this was a user-mappable CQ, then we need to remove its entry
425 	 * from the "userland resources database".  If it is also currently
426 	 * mmap()'d out to a user process, then we need to call
427 	 * devmap_devmem_remap() to remap the CQ memory to an invalid mapping.
428 	 * We also need to invalidate the CQ tracking information for the
429 	 * user mapping.
430 	 */
431 	if (cq->cq_is_umap) {
432 		status = hermon_umap_db_find(state->hs_instance, cqnum,
433 		    MLNX_UMAP_CQMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
434 		    &umapdb);
435 		if (status != DDI_SUCCESS) {
436 			mutex_exit(&cq->cq_lock);
437 			HERMON_WARNING(state, "failed to find in database");
438 			return (ibc_get_ci_failure(0));
439 		}
440 		hermon_umap_db_free(umapdb);
441 		if (cq->cq_umap_dhp != NULL) {
442 			maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
443 			status = devmap_devmem_remap(cq->cq_umap_dhp,
444 			    state->hs_dip, 0, 0, cq->cq_cqinfo.qa_size,
445 			    maxprot, DEVMAP_MAPPING_INVALID, NULL);
446 			if (status != DDI_SUCCESS) {
447 				mutex_exit(&cq->cq_lock);
448 				HERMON_WARNING(state, "failed in CQ memory "
449 				    "devmap_devmem_remap()");
450 				return (ibc_get_ci_failure(0));
451 			}
452 			cq->cq_umap_dhp = (devmap_cookie_t)NULL;
453 		}
454 	}
455 
456 	/*
457 	 * Put NULL into the Arbel CQNum-to-CQHdl list.  This will allow any
458 	 * in-progress events to detect that the CQ corresponding to this
459 	 * number has been freed.
460 	 */
461 	state->hs_cqhdl[cqc->hr_indx] = NULL;
462 
463 	mutex_exit(&cq->cq_lock);
464 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq))
465 
466 	/*
467 	 * Reclaim CQC entry from hardware (using the Hermon HW2SW_CQ
468 	 * firmware command).  If the ownership transfer fails for any reason,
469 	 * then it is an indication that something (either in HW or SW) has
470 	 * gone seriously wrong.
471 	 */
472 	status = hermon_cmn_ownership_cmd_post(state, HW2SW_CQ, &cqc_entry,
473 	    sizeof (hermon_hw_cqc_t), cqnum, sleepflag);
474 	if (status != HERMON_CMD_SUCCESS) {
475 		HERMON_WARNING(state, "failed to reclaim CQC ownership");
476 		cmn_err(CE_CONT, "Hermon: HW2SW_CQ command failed: %08x\n",
477 		    status);
478 		if (status == HERMON_CMD_INVALID_STATUS) {
479 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
480 		}
481 		return (ibc_get_ci_failure(0));
482 	}
483 
484 	/*
485 	 * From here on, we start reliquishing resources - but check to see
486 	 * if a resize was in progress - if so, we need to relinquish those
487 	 * resources as well
488 	 */
489 
490 
491 	/*
492 	 * Deregister the memory for the Completion Queue.  If this fails
493 	 * for any reason, then it is an indication that something (either
494 	 * in HW or SW) has gone seriously wrong.  So we print a warning
495 	 * message and return.
496 	 */
497 	status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
498 	    sleepflag);
499 	if (status != DDI_SUCCESS) {
500 		HERMON_WARNING(state, "failed to deregister CQ memory");
501 		return (ibc_get_ci_failure(0));
502 	}
503 
504 	if (resize)	{	/* there was a pointer to a handle */
505 		mr = resize->cq_mrhdl;	/* reuse the pointer to the region */
506 		status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
507 		    sleepflag);
508 		if (status != DDI_SUCCESS) {
509 			HERMON_WARNING(state, "failed to deregister resize CQ "
510 			    "memory");
511 			return (ibc_get_ci_failure(0));
512 		}
513 	}
514 
515 	/* Free the memory for the CQ */
516 	hermon_queue_free(&cq->cq_cqinfo);
517 	if (resize)	{
518 		hermon_queue_free(&resize->cq_cqinfo);
519 		/* and the temporary handle */
520 		kmem_free(resize, sizeof (struct hermon_sw_cq_s));
521 	}
522 
523 	/* everything else does not matter for the resize in progress */
524 
525 	/* Free the dbr */
526 	hermon_dbr_free(state, cq->cq_uarpg, cq->cq_arm_ci_vdbr);
527 
528 	/* Free the Hermon Completion Queue handle */
529 	hermon_rsrc_free(state, &rsrc);
530 
531 	/* Free up the CQC entry resource */
532 	hermon_rsrc_free(state, &cqc);
533 
534 	/* Decrement the reference count on the protection domain (PD) */
535 	hermon_pd_refcnt_dec(pd);
536 
537 	/* Set the cqhdl pointer to NULL and return success */
538 	*cqhdl = NULL;
539 
540 	return (DDI_SUCCESS);
541 }
542 
543 
544 /*
545  * hermon_cq_resize()
546  *    Context: Can be called only from user or kernel context.
547  */
548 int
549 hermon_cq_resize(hermon_state_t *state, hermon_cqhdl_t cq, uint_t req_size,
550     uint_t *actual_size, uint_t sleepflag)
551 {
552 	hermon_hw_cqc_t		cqc_entry;
553 	hermon_cqhdl_t		resize_hdl;
554 	hermon_qalloc_info_t	new_cqinfo;
555 	ibt_mr_attr_t		mr_attr;
556 	hermon_mr_options_t	op;
557 	hermon_pdhdl_t		pd;
558 	hermon_mrhdl_t		mr;
559 	hermon_hw_cqe_t		*buf;
560 	uint32_t		new_prod_indx;
561 	uint_t			log_cq_size;
562 	int			status, flag;
563 
564 	if (cq->cq_resize_hdl != 0) {	/* already in process */
565 		status = IBT_CQ_BUSY;
566 		goto cqresize_fail;
567 	}
568 
569 
570 	/* Use the internal protection domain (PD) for CQs */
571 	pd = state->hs_pdhdl_internal;
572 
573 	/*
574 	 * Calculate the appropriate size for the new resized completion queue.
575 	 * Note:  All Hermon CQs must be a power-of-2 minus 1 in size.  Also
576 	 * they may not be any smaller than HERMON_CQ_MIN_SIZE.  This step is
577 	 * to round the requested size up to the next highest power-of-2
578 	 */
579 	req_size = max(req_size, HERMON_CQ_MIN_SIZE);
580 	log_cq_size = highbit(req_size);
581 
582 	/*
583 	 * Next we verify that the rounded-up size is valid (i.e. consistent
584 	 * with the device limits and/or software-configured limits)
585 	 */
586 	if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) {
587 		status = IBT_HCA_CQ_EXCEEDED;
588 		goto cqresize_fail;
589 	}
590 
591 	/*
592 	 * Allocate the memory for newly resized Completion Queue.
593 	 *
594 	 * Note: Although we use the common queue allocation routine, we
595 	 * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in
596 	 * kernel system memory) for kernel CQs because it would be
597 	 * inefficient to have CQs located in DDR memory.  This is the same
598 	 * as we do when we first allocate completion queues primarily
599 	 * because CQs are read from (by software) more than they are written
600 	 * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all
601 	 * user-mappable CQs for a similar reason.)
602 	 * It is also worth noting that, unlike Hermon QP work queues,
603 	 * completion queues do not have the same strict alignment
604 	 * requirements.  It is sufficient for the CQ memory to be both
605 	 * aligned to and bound to addresses which are a multiple of CQE size.
606 	 */
607 
608 	/* first, alloc the resize_handle */
609 	resize_hdl = kmem_zalloc(sizeof (struct hermon_sw_cq_s), KM_SLEEP);
610 
611 	new_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t);
612 	new_cqinfo.qa_alloc_align = PAGESIZE;
613 	new_cqinfo.qa_bind_align  = PAGESIZE;
614 	if (cq->cq_is_umap) {
615 		new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
616 	} else {
617 		new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
618 	}
619 	status = hermon_queue_alloc(state, &new_cqinfo, sleepflag);
620 	if (status != DDI_SUCCESS) {
621 		/* free the resize handle */
622 		kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
623 		status = IBT_INSUFF_RESOURCE;
624 		goto cqresize_fail;
625 	}
626 	buf = (hermon_hw_cqe_t *)new_cqinfo.qa_buf_aligned;
627 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
628 
629 	/*
630 	 * No initialization of the cq is needed - the command will do it
631 	 */
632 
633 	/*
634 	 * Register the memory for the CQ.  The memory for the CQ must
635 	 * be registered in the Hermon TPT tables.  This gives us the LKey
636 	 * to specify in the CQ context below.
637 	 */
638 	flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP;
639 	mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
640 	mr_attr.mr_len	 = new_cqinfo.qa_size;
641 	mr_attr.mr_as	 = NULL;
642 	mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
643 	op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
644 	op.mro_bind_dmahdl = new_cqinfo.qa_dmahdl;
645 	op.mro_bind_override_addr = 0;
646 	status = hermon_mr_register(state, pd, &mr_attr, &mr, &op,
647 	    HERMON_CQ_CMPT);
648 	if (status != DDI_SUCCESS) {
649 		hermon_queue_free(&new_cqinfo);
650 		/* free the resize handle */
651 		kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
652 		status = IBT_INSUFF_RESOURCE;
653 		goto cqresize_fail;
654 	}
655 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
656 
657 	(void) ddi_dma_sync(mr->mr_bindinfo.bi_dmahdl, 0,
658 	    new_cqinfo.qa_size, DDI_DMA_SYNC_FORDEV);
659 
660 	/*
661 	 * Now we grab the CQ lock.  Since we will be updating the actual
662 	 * CQ location and the producer/consumer indexes, we should hold
663 	 * the lock.
664 	 *
665 	 * We do a ARBEL_NOSLEEP here (and below), though, because we are
666 	 * holding the "cq_lock" and if we got raised to interrupt level
667 	 * by priority inversion, we would not want to block in this routine
668 	 * waiting for success.
669 	 */
670 	mutex_enter(&cq->cq_lock);
671 
672 	/*
673 	 * Fill in the CQC entry.  For the resize operation this is the
674 	 * final step before attempting the resize operation on the CQC entry.
675 	 * We use all of the information collected/calculated above to fill
676 	 * in the requisite portions of the CQC.
677 	 */
678 	bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
679 	cqc_entry.log_cq_sz	= log_cq_size;
680 	cqc_entry.pg_offs	= new_cqinfo.qa_pgoffs >> 5;
681 	cqc_entry.log2_pgsz	= mr->mr_log2_pgsz;
682 	cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
683 	cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3;
684 
685 	/*
686 	 * Write the CQC entry to hardware.  Lastly, we pass ownership of
687 	 * the entry to the hardware (using the Hermon RESIZE_CQ firmware
688 	 * command).  Note: In general, this operation shouldn't fail.  But
689 	 * if it does, we have to undo everything we've done above before
690 	 * returning error.  Also note that the status returned may indicate
691 	 * the code to return to the IBTF.
692 	 */
693 	status = hermon_resize_cq_cmd_post(state, &cqc_entry, cq->cq_cqnum,
694 	    &new_prod_indx, HERMON_CMD_NOSLEEP_SPIN);
695 	if (status != HERMON_CMD_SUCCESS) {
696 		/* Resize attempt has failed, drop CQ lock and cleanup */
697 		mutex_exit(&cq->cq_lock);
698 		if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
699 		    sleepflag) != DDI_SUCCESS) {
700 			HERMON_WARNING(state, "failed to deregister CQ memory");
701 		}
702 		kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
703 		hermon_queue_free(&new_cqinfo);
704 		if (status == HERMON_CMD_BAD_SIZE) {
705 			return (IBT_CQ_SZ_INSUFFICIENT);
706 		} else {
707 			cmn_err(CE_CONT, "Hermon: RESIZE_CQ command failed: "
708 			    "%08x\n", status);
709 			if (status == HERMON_CMD_INVALID_STATUS) {
710 				hermon_fm_ereport(state, HCA_SYS_ERR,
711 				    HCA_ERR_SRV_LOST);
712 			}
713 			return (ibc_get_ci_failure(0));
714 		}
715 	}
716 
717 	/*
718 	 * For Hermon, we've alloc'd another handle structure and save off the
719 	 * important things in it. Then, in polling we check to see if there's
720 	 * a "resizing handle" and if so we look for the "special CQE", opcode
721 	 * 0x16, that indicates the transition to the new buffer.
722 	 *
723 	 * At that point, we'll adjust everything - including dereg and
724 	 * freeing of the original buffer, updating all the necessary fields
725 	 * in the cq_hdl, and setting up for the next cqe polling
726 	 */
727 
728 	resize_hdl->cq_buf 	= buf;
729 	resize_hdl->cq_bufsz	= (1 << log_cq_size);
730 	resize_hdl->cq_mrhdl	= mr;
731 	resize_hdl->cq_log_cqsz = log_cq_size;
732 
733 	bcopy(&new_cqinfo, &(resize_hdl->cq_cqinfo),
734 	    sizeof (struct hermon_qalloc_info_s));
735 
736 	/* sync the new buffer for use by the device */
737 	(void) ddi_dma_sync(mr->mr_bindinfo.bi_dmahdl, 0,
738 	    new_cqinfo.qa_size, DDI_DMA_SYNC_FORDEV);
739 
740 	/* now, save the address in the cq_handle */
741 	cq->cq_resize_hdl = resize_hdl;
742 
743 	/*
744 	 * Drop the CQ lock now.
745 	 */
746 
747 	mutex_exit(&cq->cq_lock);
748 	/*
749 	 * Fill in the return arguments (if necessary).  This includes the
750 	 * real new completion queue size.
751 	 */
752 	if (actual_size != NULL) {
753 		*actual_size = (1 << log_cq_size) - 1;
754 	}
755 
756 	return (DDI_SUCCESS);
757 
758 cqresize_fail:
759 	return (status);
760 }
761 
762 
763 /*
764  * hermon_cq_modify()
765  *    Context: Can be called base context.
766  *
767  * XXX - still need to implement use of the 'hid' argument.
768  */
769 /* ARGSUSED */
770 int
771 hermon_cq_modify(hermon_state_t *state, hermon_cqhdl_t cq,
772     uint_t count, uint_t usec, ibt_cq_handler_id_t hid, uint_t sleepflag)
773 {
774 	int	status;
775 	hermon_hw_cqc_t		cqc_entry;
776 
777 	mutex_enter(&cq->cq_lock);
778 	if (count != cq->cq_intmod_count ||
779 	    usec != cq->cq_intmod_usec) {
780 		bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
781 		cqc_entry.cq_max_cnt = count;
782 		cqc_entry.cq_period = usec;
783 		status = hermon_modify_cq_cmd_post(state, &cqc_entry,
784 		    cq->cq_cqnum, MODIFY_MODERATION_CQ, sleepflag);
785 		if (status != HERMON_CMD_SUCCESS) {
786 			mutex_exit(&cq->cq_lock);
787 			cmn_err(CE_CONT, "Hermon: MODIFY_CQ command failed: "
788 			    "%08x\n", status);
789 			if (status == HERMON_CMD_INVALID_STATUS) {
790 				hermon_fm_ereport(state, HCA_SYS_ERR,
791 				    HCA_ERR_SRV_LOST);
792 			}
793 			return (ibc_get_ci_failure(0));
794 		}
795 		cq->cq_intmod_count = count;
796 		cq->cq_intmod_usec = usec;
797 	}
798 	mutex_exit(&cq->cq_lock);
799 	return (DDI_SUCCESS);
800 }
801 
802 /*
803  * hermon_cq_notify()
804  *    Context: Can be called from interrupt or base context.
805  */
806 int
807 hermon_cq_notify(hermon_state_t *state, hermon_cqhdl_t cq,
808     ibt_cq_notify_flags_t flags)
809 {
810 	uint_t	cmd;
811 	ibt_status_t status;
812 
813 	/* Validate IBT flags and call doorbell routine. */
814 	if (flags == IBT_NEXT_COMPLETION) {
815 		cmd = HERMON_CQDB_NOTIFY_CQ;
816 	} else if (flags == IBT_NEXT_SOLICITED) {
817 		cmd = HERMON_CQDB_NOTIFY_CQ_SOLICIT;
818 	} else {
819 		return (IBT_CQ_NOTIFY_TYPE_INVALID);
820 	}
821 
822 	status = hermon_cq_arm_doorbell(state, cq, cmd);
823 	return (status);
824 }
825 
826 
827 /*
828  * hermon_cq_poll()
829  *    Context: Can be called from interrupt or base context.
830  */
831 int
832 hermon_cq_poll(hermon_state_t *state, hermon_cqhdl_t cq, ibt_wc_t *wc_p,
833     uint_t num_wc, uint_t *num_polled)
834 {
835 	hermon_hw_cqe_t	*cqe;
836 	uint_t		opcode;
837 	uint32_t	cons_indx, wrap_around_mask;
838 	uint32_t	polled_cnt, spec_op = 0;
839 	int		status;
840 
841 	/*
842 	 * Check for user-mappable CQ memory.  Note:  We do not allow kernel
843 	 * clients to poll CQ memory that is accessible directly by the user.
844 	 * If the CQ memory is user accessible, then return an error.
845 	 */
846 	if (cq->cq_is_umap) {
847 		return (IBT_CQ_HDL_INVALID);
848 	}
849 
850 	mutex_enter(&cq->cq_lock);
851 
852 	/* Get the consumer index */
853 	cons_indx = cq->cq_consindx;
854 
855 	/*
856 	 * Calculate the wrap around mask.  Note: This operation only works
857 	 * because all Hermon completion queues have power-of-2 sizes
858 	 */
859 	wrap_around_mask = (cq->cq_bufsz - 1);
860 
861 	/* Calculate the pointer to the first CQ entry */
862 	cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
863 
864 	/* Sync the current CQE to read */
865 	hermon_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU);
866 
867 	/*
868 	 * Keep pulling entries from the CQ until we find an entry owned by
869 	 * the hardware.  As long as there the CQE's owned by SW, process
870 	 * each entry by calling hermon_cq_cqe_consume() and updating the CQ
871 	 * consumer index.  Note:  We only update the consumer index if
872 	 * hermon_cq_cqe_consume() returns HERMON_CQ_SYNC_AND_DB.  Otherwise,
873 	 * it indicates that we are going to "recycle" the CQE (probably
874 	 * because it is a error CQE and corresponds to more than one
875 	 * completion).
876 	 */
877 	polled_cnt = 0;
878 	while (HERMON_CQE_OWNER_IS_SW(cq, cqe, cons_indx)) {
879 		if (cq->cq_resize_hdl != 0) {	/* in midst of resize */
880 			/* peek at the opcode */
881 			opcode = HERMON_CQE_OPCODE_GET(cq, cqe);
882 			if (opcode == HERMON_CQE_RCV_RESIZE_CODE) {
883 				hermon_cq_resize_helper(state, cq);
884 
885 				/* Increment the consumer index */
886 				cons_indx = (cons_indx + 1);
887 				spec_op = 1; /* plus one for the limiting CQE */
888 
889 				wrap_around_mask = (cq->cq_bufsz - 1);
890 
891 				/* Update the pointer to the next CQ entry */
892 				cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
893 
894 				/* Sync the next CQE to read */
895 				hermon_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU);
896 
897 				continue;
898 			}
899 		}	/* in resizing CQ */
900 
901 		/*
902 		 * either resizing and not the special opcode, or
903 		 * not resizing at all
904 		 */
905 		hermon_cq_cqe_consume(state, cq, cqe, &wc_p[polled_cnt++]);
906 
907 		/* Sync the current CQE for device */
908 		hermon_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORDEV);
909 
910 		/* Increment the consumer index */
911 		cons_indx = (cons_indx + 1);
912 
913 		/* Update the pointer to the next CQ entry */
914 		cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
915 
916 		/* Sync the next CQE to read */
917 		hermon_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU);
918 
919 		/*
920 		 * If we have run out of space to store work completions,
921 		 * then stop and return the ones we have pulled of the CQ.
922 		 */
923 		if (polled_cnt >= num_wc) {
924 			break;
925 		}
926 	}
927 
928 	/*
929 	 * Now we only ring the doorbell (to update the consumer index) if
930 	 * we've actually consumed a CQ entry.  If we have, for example,
931 	 * pulled from a CQE that we are still in the process of "recycling"
932 	 * for error purposes, then we would not update the consumer index.
933 	 */
934 	if ((polled_cnt != 0) && (cq->cq_consindx != cons_indx)) {
935 		/*
936 		 * Update the consumer index in both the CQ handle and the
937 		 * doorbell record.
938 		 */
939 		cq->cq_consindx = cons_indx;
940 		hermon_cq_update_ci_doorbell(cq);
941 
942 	} else if (polled_cnt == 0) {
943 		if (spec_op != 0) {
944 			/* if we got the special opcode, update the consindx */
945 			cq->cq_consindx = cons_indx;
946 			hermon_cq_update_ci_doorbell(cq);
947 		}
948 	}
949 
950 	mutex_exit(&cq->cq_lock);
951 
952 	/* Set "num_polled" (if necessary) */
953 	if (num_polled != NULL) {
954 		*num_polled = polled_cnt;
955 	}
956 
957 	/* Set CQ_EMPTY condition if needed, otherwise return success */
958 	if (polled_cnt == 0) {
959 		status = IBT_CQ_EMPTY;
960 	} else {
961 		status = DDI_SUCCESS;
962 	}
963 
964 	/*
965 	 * Check if the system is currently panicking.  If it is, then call
966 	 * the Hermon interrupt service routine.  This step is necessary here
967 	 * because we might be in a polled I/O mode and without the call to
968 	 * hermon_isr() - and its subsequent calls to poll and rearm each
969 	 * event queue - we might overflow our EQs and render the system
970 	 * unable to sync/dump.
971 	 */
972 	if (ddi_in_panic() != 0) {
973 		(void) hermon_isr((caddr_t)state, (caddr_t)NULL);
974 	}
975 	return (status);
976 }
977 
978 /*
979  *	cmd_sn must be initialized to 1 to enable proper reenabling
980  *	by hermon_arm_cq_dbr_update().
981  */
982 static void
983 hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr)
984 {
985 	uint32_t *target;
986 
987 	target = (uint32_t *)cq_arm_dbr + 1;
988 	*target = htonl(1 << HERMON_CQDB_CMDSN_SHIFT);
989 }
990 
991 
992 /*
993  *	User cmd_sn needs help from this kernel function to know
994  *	when it should be incremented (modulo 4).  We do an atomic
995  *	update of the arm_cq dbr to communicate this fact.  We retry
996  *	in the case that user library is racing with us.  We zero
997  *	out the cmd field so that the user library can use the cmd
998  *	field to track the last command it issued (solicited verses any).
999  */
1000 static void
1001 hermon_arm_cq_dbr_update(hermon_dbr_t *cq_arm_dbr)
1002 {
1003 	uint32_t tmp, cmp, new;
1004 	uint32_t old_cmd_sn, new_cmd_sn;
1005 	uint32_t *target;
1006 	int retries = 0;
1007 
1008 	target = (uint32_t *)cq_arm_dbr + 1;
1009 retry:
1010 	cmp = *target;
1011 	tmp = htonl(cmp);
1012 	old_cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1013 	new_cmd_sn = (old_cmd_sn + (0x1 << HERMON_CQDB_CMDSN_SHIFT)) &
1014 	    (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1015 	new = htonl((tmp & ~(0x37 << HERMON_CQDB_CMD_SHIFT)) | new_cmd_sn);
1016 	tmp = atomic_cas_32(target, cmp, new);
1017 	if (tmp != cmp) {	/* cas failed, so need to retry */
1018 		drv_usecwait(retries & 0xff);   /* avoid race */
1019 		if (++retries > 100000) {
1020 			cmn_err(CE_CONT, "cas failed in hermon\n");
1021 			retries = 0;
1022 		}
1023 		goto retry;
1024 	}
1025 }
1026 
1027 
1028 /*
1029  * hermon_cq_handler()
1030  *    Context: Only called from interrupt context
1031  */
1032 int
1033 hermon_cq_handler(hermon_state_t *state, hermon_eqhdl_t eq,
1034     hermon_hw_eqe_t *eqe)
1035 {
1036 	hermon_cqhdl_t		cq;
1037 	uint_t			cqnum;
1038 	uint_t			eqe_evttype;
1039 
1040 	eqe_evttype = HERMON_EQE_EVTTYPE_GET(eq, eqe);
1041 
1042 	ASSERT(eqe_evttype == HERMON_EVT_COMPLETION ||
1043 	    eqe_evttype == HERMON_EVT_EQ_OVERFLOW);
1044 
1045 	if (eqe_evttype == HERMON_EVT_EQ_OVERFLOW) {
1046 		hermon_eq_overflow_handler(state, eq, eqe);
1047 		return (DDI_FAILURE);
1048 	}
1049 
1050 	/* Get the CQ handle from CQ number in event descriptor */
1051 	cqnum = HERMON_EQE_CQNUM_GET(eq, eqe);
1052 	cq = hermon_cqhdl_from_cqnum(state, cqnum);
1053 
1054 	/*
1055 	 * If the CQ handle is NULL, this is probably an indication
1056 	 * that the CQ has been freed already.  In which case, we
1057 	 * should not deliver this event.
1058 	 *
1059 	 * We also check that the CQ number in the handle is the
1060 	 * same as the CQ number in the event queue entry.  This
1061 	 * extra check allows us to handle the case where a CQ was
1062 	 * freed and then allocated again in the time it took to
1063 	 * handle the event queue processing.  By constantly incrementing
1064 	 * the non-constrained portion of the CQ number every time
1065 	 * a new CQ is allocated, we mitigate (somewhat) the chance
1066 	 * that a stale event could be passed to the client's CQ
1067 	 * handler.
1068 	 *
1069 	 * Lastly, we check if "hs_ibtfpriv" is NULL.  If it is then it
1070 	 * means that we've have either received this event before we
1071 	 * finished attaching to the IBTF or we've received it while we
1072 	 * are in the process of detaching.
1073 	 */
1074 	if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1075 	    (state->hs_ibtfpriv != NULL)) {
1076 		hermon_arm_cq_dbr_update(cq->cq_arm_ci_vdbr);
1077 		HERMON_DO_IBTF_CQ_CALLB(state, cq);
1078 	}
1079 
1080 	return (DDI_SUCCESS);
1081 }
1082 
1083 
1084 /*
1085  * hermon_cq_err_handler()
1086  *    Context: Only called from interrupt context
1087  */
1088 int
1089 hermon_cq_err_handler(hermon_state_t *state, hermon_eqhdl_t eq,
1090     hermon_hw_eqe_t *eqe)
1091 {
1092 	hermon_cqhdl_t		cq;
1093 	uint_t			cqnum;
1094 	ibc_async_event_t	event;
1095 	ibt_async_code_t	type;
1096 	uint_t			eqe_evttype;
1097 
1098 	eqe_evttype = HERMON_EQE_EVTTYPE_GET(eq, eqe);
1099 
1100 
1101 	ASSERT(eqe_evttype == HERMON_EVT_CQ_ERRORS ||
1102 	    eqe_evttype == HERMON_EVT_EQ_OVERFLOW);
1103 
1104 	if (eqe_evttype == HERMON_EVT_EQ_OVERFLOW) {
1105 		hermon_eq_overflow_handler(state, eq, eqe);
1106 		return (DDI_FAILURE);
1107 	}
1108 
1109 	HERMON_FMANOTE(state, HERMON_FMA_OVERRUN);
1110 	/* Get the CQ handle from CQ number in event descriptor */
1111 	cqnum = HERMON_EQE_CQNUM_GET(eq, eqe);
1112 	cq = hermon_cqhdl_from_cqnum(state, cqnum);
1113 
1114 	/*
1115 	 * If the CQ handle is NULL, this is probably an indication
1116 	 * that the CQ has been freed already.  In which case, we
1117 	 * should not deliver this event.
1118 	 *
1119 	 * We also check that the CQ number in the handle is the
1120 	 * same as the CQ number in the event queue entry.  This
1121 	 * extra check allows us to handle the case where a CQ was
1122 	 * freed and then allocated again in the time it took to
1123 	 * handle the event queue processing.  By constantly incrementing
1124 	 * the non-constrained portion of the CQ number every time
1125 	 * a new CQ is allocated, we mitigate (somewhat) the chance
1126 	 * that a stale event could be passed to the client's CQ
1127 	 * handler.
1128 	 *
1129 	 * And then we check if "hs_ibtfpriv" is NULL.  If it is then it
1130 	 * means that we've have either received this event before we
1131 	 * finished attaching to the IBTF or we've received it while we
1132 	 * are in the process of detaching.
1133 	 */
1134 	if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1135 	    (state->hs_ibtfpriv != NULL)) {
1136 		event.ev_cq_hdl = (ibt_cq_hdl_t)cq->cq_hdlrarg;
1137 		type		= IBT_ERROR_CQ;
1138 		HERMON_DO_IBTF_ASYNC_CALLB(state, type, &event);
1139 	}
1140 
1141 	return (DDI_SUCCESS);
1142 }
1143 
1144 
1145 /*
1146  * hermon_cq_refcnt_inc()
1147  *    Context: Can be called from interrupt or base context.
1148  */
1149 int
1150 hermon_cq_refcnt_inc(hermon_cqhdl_t cq, uint_t is_special)
1151 {
1152 	/*
1153 	 * Increment the completion queue's reference count.  Note: In order
1154 	 * to ensure compliance with IBA C11-15, we must ensure that a given
1155 	 * CQ is not used for both special (SMI/GSI) QP and non-special QP.
1156 	 * This is accomplished here by keeping track of how the referenced
1157 	 * CQ is being used.
1158 	 */
1159 	mutex_enter(&cq->cq_lock);
1160 	if (cq->cq_refcnt == 0) {
1161 		cq->cq_is_special = is_special;
1162 	} else {
1163 		if (cq->cq_is_special != is_special) {
1164 			mutex_exit(&cq->cq_lock);
1165 			return (DDI_FAILURE);
1166 		}
1167 	}
1168 	cq->cq_refcnt++;
1169 	mutex_exit(&cq->cq_lock);
1170 	return (DDI_SUCCESS);
1171 }
1172 
1173 
1174 /*
1175  * hermon_cq_refcnt_dec()
1176  *    Context: Can be called from interrupt or base context.
1177  */
1178 void
1179 hermon_cq_refcnt_dec(hermon_cqhdl_t cq)
1180 {
1181 	/* Decrement the completion queue's reference count */
1182 	mutex_enter(&cq->cq_lock);
1183 	cq->cq_refcnt--;
1184 	mutex_exit(&cq->cq_lock);
1185 }
1186 
1187 
1188 /*
1189  * hermon_cq_arm_doorbell()
1190  *    Context: Can be called from interrupt or base context.
1191  */
1192 static int
1193 hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq, uint_t cq_cmd)
1194 {
1195 	uint32_t	cq_num;
1196 	uint32_t	*target;
1197 	uint32_t	old_cmd, cmp, new, tmp, cmd_sn;
1198 	ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
1199 
1200 	/* initialize the FMA retry loop */
1201 	hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num);
1202 
1203 	cq_num = cq->cq_cqnum;
1204 	target = (uint32_t *)cq->cq_arm_ci_vdbr + 1;
1205 
1206 	/* the FMA retry loop starts for Hermon doorbell register. */
1207 	hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, fm_status,
1208 	    fm_test_num);
1209 retry:
1210 	cmp = *target;
1211 	tmp = htonl(cmp);
1212 	old_cmd = tmp & (0x7 << HERMON_CQDB_CMD_SHIFT);
1213 	cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1214 	if (cq_cmd == HERMON_CQDB_NOTIFY_CQ) {
1215 		if (old_cmd != HERMON_CQDB_NOTIFY_CQ) {
1216 			cmd_sn |= (HERMON_CQDB_NOTIFY_CQ <<
1217 			    HERMON_CQDB_CMD_SHIFT);
1218 			new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF));
1219 			tmp = atomic_cas_32(target, cmp, new);
1220 			if (tmp != cmp)
1221 				goto retry;
1222 			HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *)
1223 			    &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) <<
1224 			    32) | (cq->cq_consindx & 0xFFFFFF));
1225 		} /* else it's already armed */
1226 	} else {
1227 		ASSERT(cq_cmd == HERMON_CQDB_NOTIFY_CQ_SOLICIT);
1228 		if (old_cmd != HERMON_CQDB_NOTIFY_CQ &&
1229 		    old_cmd != HERMON_CQDB_NOTIFY_CQ_SOLICIT) {
1230 			cmd_sn |= (HERMON_CQDB_NOTIFY_CQ_SOLICIT <<
1231 			    HERMON_CQDB_CMD_SHIFT);
1232 			new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF));
1233 			tmp = atomic_cas_32(target, cmp, new);
1234 			if (tmp != cmp)
1235 				goto retry;
1236 			HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *)
1237 			    &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) <<
1238 			    32) | (cq->cq_consindx & 0xFFFFFF));
1239 		} /* else it's already armed */
1240 	}
1241 
1242 	/* the FMA retry loop ends. */
1243 	hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, fm_status,
1244 	    fm_test_num);
1245 
1246 	return (IBT_SUCCESS);
1247 
1248 pio_error:
1249 	hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1250 	return (ibc_get_ci_failure(0));
1251 }
1252 
1253 
1254 /*
1255  * hermon_cqhdl_from_cqnum()
1256  *    Context: Can be called from interrupt or base context.
1257  *
1258  *    This routine is important because changing the unconstrained
1259  *    portion of the CQ number is critical to the detection of a
1260  *    potential race condition in the CQ handler code (i.e. the case
1261  *    where a CQ is freed and alloc'd again before an event for the
1262  *    "old" CQ can be handled).
1263  *
1264  *    While this is not a perfect solution (not sure that one exists)
1265  *    it does help to mitigate the chance that this race condition will
1266  *    cause us to deliver a "stale" event to the new CQ owner.  Note:
1267  *    this solution does not scale well because the number of constrained
1268  *    bits increases (and, hence, the number of unconstrained bits
1269  *    decreases) as the number of supported CQs grows.  For small and
1270  *    intermediate values, it should hopefully provide sufficient
1271  *    protection.
1272  */
1273 hermon_cqhdl_t
1274 hermon_cqhdl_from_cqnum(hermon_state_t *state, uint_t cqnum)
1275 {
1276 	uint_t	cqindx, cqmask;
1277 
1278 	/* Calculate the CQ table index from the cqnum */
1279 	cqmask = (1 << state->hs_cfg_profile->cp_log_num_cq) - 1;
1280 	cqindx = cqnum & cqmask;
1281 	return (state->hs_cqhdl[cqindx]);
1282 }
1283 
1284 /*
1285  * hermon_cq_cqe_consume()
1286  *    Context: Can be called from interrupt or base context.
1287  */
1288 static void
1289 hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
1290     hermon_hw_cqe_t *cqe, ibt_wc_t *wc)
1291 {
1292 	uint_t		opcode, qpnum, qp1_indx;
1293 	ibt_wc_flags_t	flags;
1294 	ibt_wrc_opcode_t type;
1295 
1296 	/*
1297 	 * Determine if this is an "error" CQE by examining "opcode".  If it
1298 	 * is an error CQE, then call hermon_cq_errcqe_consume() and return
1299 	 * whatever status it returns.  Otherwise, this is a successful
1300 	 * completion.
1301 	 */
1302 	opcode = HERMON_CQE_OPCODE_GET(cq, cqe);
1303 	if ((opcode == HERMON_CQE_SEND_ERR_OPCODE) ||
1304 	    (opcode == HERMON_CQE_RECV_ERR_OPCODE)) {
1305 		hermon_cq_errcqe_consume(state, cq, cqe, wc);
1306 		return;
1307 	}
1308 
1309 	/*
1310 	 * Fetch the Work Request ID using the information in the CQE.
1311 	 * See hermon_wr.c for more details.
1312 	 */
1313 	wc->wc_id = hermon_wrid_get_entry(cq, cqe);
1314 
1315 	/*
1316 	 * Parse the CQE opcode to determine completion type.  This will set
1317 	 * not only the type of the completion, but also any flags that might
1318 	 * be associated with it (e.g. whether immediate data is present).
1319 	 */
1320 	flags = IBT_WC_NO_FLAGS;
1321 	if (HERMON_CQE_SENDRECV_GET(cq, cqe) != HERMON_COMPLETION_RECV) {
1322 
1323 		/* Send CQE */
1324 		switch (opcode) {
1325 		case HERMON_CQE_SND_RDMAWR_IMM:
1326 			flags |= IBT_WC_IMMED_DATA_PRESENT;
1327 			/* FALLTHROUGH */
1328 		case HERMON_CQE_SND_RDMAWR:
1329 			type = IBT_WRC_RDMAW;
1330 			break;
1331 
1332 		case HERMON_CQE_SND_SEND_IMM:
1333 			flags |= IBT_WC_IMMED_DATA_PRESENT;
1334 			/* FALLTHROUGH */
1335 		case HERMON_CQE_SND_SEND:
1336 			type = IBT_WRC_SEND;
1337 			break;
1338 
1339 		case HERMON_CQE_SND_LSO:
1340 			type = IBT_WRC_SEND_LSO;
1341 			break;
1342 
1343 		case HERMON_CQE_SND_RDMARD:
1344 			type = IBT_WRC_RDMAR;
1345 			break;
1346 
1347 		case HERMON_CQE_SND_ATOMIC_CS:
1348 			type = IBT_WRC_CSWAP;
1349 			break;
1350 
1351 		case HERMON_CQE_SND_ATOMIC_FA:
1352 			type = IBT_WRC_FADD;
1353 			break;
1354 
1355 		case HERMON_CQE_SND_BIND_MW:
1356 			type = IBT_WRC_BIND;
1357 			break;
1358 
1359 		default:
1360 			HERMON_WARNING(state, "unknown send CQE type");
1361 			wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1362 			return;
1363 		}
1364 	} else {
1365 
1366 		/* Receive CQE */
1367 		switch (opcode & 0x1F) {
1368 		/* for sendonly w/imm or sendlast w/imm */
1369 		case HERMON_CQE_RCV_SEND_IMM:
1370 			/*
1371 			 * Note:  According to the PRM, all QP1 recv
1372 			 * completions look like the result of a Send with
1373 			 * Immediate.  They are not, however, (MADs are Send
1374 			 * Only) so we need to check the QP number and set
1375 			 * the flag only if it is non-QP1.
1376 			 */
1377 			qpnum	 = HERMON_CQE_QPNUM_GET(cq, cqe);
1378 			qp1_indx = state->hs_spec_qp1->hr_indx;
1379 			if ((qpnum < qp1_indx) || (qpnum > qp1_indx + 1)) {
1380 				flags |= IBT_WC_IMMED_DATA_PRESENT;
1381 			}
1382 			/* FALLTHROUGH */
1383 		/* for sendonly or sendlast */
1384 		case HERMON_CQE_RCV_SEND:
1385 			type = IBT_WRC_RECV;
1386 			if (HERMON_CQE_IS_IPOK(cq, cqe)) {
1387 				wc->wc_cksum = HERMON_CQE_CKSUM(cq, cqe);
1388 				flags |= IBT_WC_CKSUM_OK;
1389 				wc->wc_detail = IBT_WC_DETAIL_ALL_FLAGS_MASK &
1390 				    HERMON_CQE_IPOIB_STATUS(cq, cqe);
1391 			}
1392 			break;
1393 		/* for RDMAwrite only or RDMAwrite last w/imm */
1394 		case HERMON_CQE_RCV_RDMAWR_IMM:
1395 			flags |= IBT_WC_IMMED_DATA_PRESENT;
1396 			type = IBT_WRC_RECV_RDMAWI;
1397 			break;
1398 
1399 		default:
1400 		/* still don't support send/invalidate, need to add later */
1401 
1402 			HERMON_WARNING(state, "unknown recv CQE type");
1403 			wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1404 			return;
1405 		}
1406 	}
1407 	wc->wc_type = type;
1408 
1409 	/*
1410 	 * Check for GRH, update the flags, then fill in "wc_flags" field
1411 	 * in the work completion
1412 	 */
1413 	if (HERMON_CQE_GRH_GET(cq, cqe) != 0) {
1414 		flags |= IBT_WC_GRH_PRESENT;
1415 	}
1416 	wc->wc_flags = flags;
1417 
1418 	/* If we got here, completion status must be success */
1419 	wc->wc_status = IBT_WC_SUCCESS;
1420 
1421 	/*
1422 	 * Parse the remaining contents of the CQE into the work completion.
1423 	 * This means filling in SL, QP number, SLID, immediate data, etc.
1424 	 * Note:  Not all of these fields are valid in a given completion.
1425 	 * Many of them depend on the actual type of completion.  So we fill
1426 	 * in all of the fields and leave it up to the IBTF and consumer to
1427 	 * sort out which are valid based on their context.
1428 	 */
1429 	wc->wc_sl	  = HERMON_CQE_SL_GET(cq, cqe);
1430 	wc->wc_immed_data = HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1431 	wc->wc_qpn	  = HERMON_CQE_DQPN_GET(cq, cqe);
1432 	wc->wc_slid	  = HERMON_CQE_DLID_GET(cq, cqe);
1433 	wc->wc_ethertype  = (wc->wc_immed_data & 0xFFFF);
1434 	wc->wc_pkey_ix	  = (wc->wc_immed_data &
1435 	    ((1 << state->hs_queryport.log_max_pkey) - 1));
1436 	/*
1437 	 * Depending on whether the completion was a receive or a send
1438 	 * completion, fill in "bytes transferred" as appropriate.  Also,
1439 	 * if necessary, fill in the "path bits" field.
1440 	 */
1441 	if (HERMON_CQE_SENDRECV_GET(cq, cqe) == HERMON_COMPLETION_RECV) {
1442 		wc->wc_path_bits = HERMON_CQE_PATHBITS_GET(cq, cqe);
1443 		wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe);
1444 
1445 	} else if ((wc->wc_type == IBT_WRC_RDMAR) ||
1446 	    (wc->wc_type == IBT_WRC_CSWAP) || (wc->wc_type == IBT_WRC_FADD)) {
1447 		wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe);
1448 	}
1449 }
1450 
1451 /*
1452  * hermon_cq_errcqe_consume()
1453  *    Context: Can be called from interrupt or base context.
1454  */
1455 static void
1456 hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
1457     hermon_hw_cqe_t *cqe, ibt_wc_t *wc)
1458 {
1459 	uint32_t		imm_eth_pkey_cred;
1460 	uint_t			status;
1461 	ibt_wc_status_t		ibt_status;
1462 
1463 	/*
1464 	 * Fetch the Work Request ID using the information in the CQE.
1465 	 * See hermon_wr.c for more details.
1466 	 */
1467 	wc->wc_id = hermon_wrid_get_entry(cq, cqe);
1468 
1469 	/*
1470 	 * Parse the CQE opcode to determine completion type.  We know that
1471 	 * the CQE is an error completion, so we extract only the completion
1472 	 * status/syndrome here.
1473 	 */
1474 	imm_eth_pkey_cred = HERMON_CQE_ERROR_SYNDROME_GET(cq, cqe);
1475 	status = imm_eth_pkey_cred;
1476 	switch (status) {
1477 	case HERMON_CQE_LOC_LEN_ERR:
1478 		HERMON_FMANOTE(state, HERMON_FMA_LOCLEN);
1479 		ibt_status = IBT_WC_LOCAL_LEN_ERR;
1480 		break;
1481 
1482 	case HERMON_CQE_LOC_OP_ERR:
1483 		HERMON_FMANOTE(state, HERMON_FMA_LOCQPOP);
1484 		ibt_status = IBT_WC_LOCAL_QP_OP_ERR;
1485 		break;
1486 
1487 	case HERMON_CQE_LOC_PROT_ERR:
1488 		HERMON_FMANOTE(state, HERMON_FMA_LOCPROT);
1489 		ibt_status = IBT_WC_LOCAL_PROTECT_ERR;
1490 		if (hermon_should_panic) {
1491 			IBTF_DPRINTF_L2("ERRCQE", "is at %p", cqe);
1492 			cmn_err(CE_PANIC, "Hermon intentional PANIC - "
1493 			    "Local Protection Error\n");
1494 		}
1495 		break;
1496 
1497 	case HERMON_CQE_WR_FLUSHED_ERR:
1498 		ibt_status = IBT_WC_WR_FLUSHED_ERR;
1499 		break;
1500 
1501 	case HERMON_CQE_MW_BIND_ERR:
1502 		HERMON_FMANOTE(state, HERMON_FMA_MWBIND);
1503 		ibt_status = IBT_WC_MEM_WIN_BIND_ERR;
1504 		break;
1505 
1506 	case HERMON_CQE_BAD_RESPONSE_ERR:
1507 		HERMON_FMANOTE(state, HERMON_FMA_RESP);
1508 		ibt_status = IBT_WC_BAD_RESPONSE_ERR;
1509 		break;
1510 
1511 	case HERMON_CQE_LOCAL_ACCESS_ERR:
1512 		HERMON_FMANOTE(state, HERMON_FMA_LOCACC);
1513 		ibt_status = IBT_WC_LOCAL_ACCESS_ERR;
1514 		break;
1515 
1516 	case HERMON_CQE_REM_INV_REQ_ERR:
1517 		HERMON_FMANOTE(state, HERMON_FMA_REMREQ);
1518 		ibt_status = IBT_WC_REMOTE_INVALID_REQ_ERR;
1519 		break;
1520 
1521 	case HERMON_CQE_REM_ACC_ERR:
1522 		HERMON_FMANOTE(state, HERMON_FMA_REMACC);
1523 		ibt_status = IBT_WC_REMOTE_ACCESS_ERR;
1524 		break;
1525 
1526 	case HERMON_CQE_REM_OP_ERR:
1527 		HERMON_FMANOTE(state, HERMON_FMA_REMOP);
1528 		ibt_status = IBT_WC_REMOTE_OP_ERR;
1529 		break;
1530 
1531 	case HERMON_CQE_TRANS_TO_ERR:
1532 		HERMON_FMANOTE(state, HERMON_FMA_XPORTCNT);
1533 		ibt_status = IBT_WC_TRANS_TIMEOUT_ERR;
1534 		break;
1535 
1536 	case HERMON_CQE_RNRNAK_TO_ERR:
1537 		HERMON_FMANOTE(state, HERMON_FMA_RNRCNT);
1538 		ibt_status = IBT_WC_RNR_NAK_TIMEOUT_ERR;
1539 		break;
1540 
1541 	/*
1542 	 * The following error codes are not supported in the Hermon driver
1543 	 * as they relate only to Reliable Datagram completion statuses:
1544 	 *    case HERMON_CQE_LOCAL_RDD_VIO_ERR:
1545 	 *    case HERMON_CQE_REM_INV_RD_REQ_ERR:
1546 	 *    case HERMON_CQE_EEC_REM_ABORTED_ERR:
1547 	 *    case HERMON_CQE_INV_EEC_NUM_ERR:
1548 	 *    case HERMON_CQE_INV_EEC_STATE_ERR:
1549 	 *    case HERMON_CQE_LOC_EEC_ERR:
1550 	 */
1551 
1552 	default:
1553 		HERMON_WARNING(state, "unknown error CQE status");
1554 		HERMON_FMANOTE(state, HERMON_FMA_UNKN);
1555 		ibt_status = IBT_WC_LOCAL_QP_OP_ERR;
1556 		break;
1557 	}
1558 
1559 	wc->wc_status = ibt_status;
1560 }
1561 
1562 
1563 /*
1564  * hermon_cqe_sync()
1565  *    Context: Can be called from interrupt or base context.
1566  */
1567 static void
1568 hermon_cqe_sync(hermon_cqhdl_t cq, hermon_hw_cqe_t *cqe, uint_t flag)
1569 {
1570 	ddi_dma_handle_t	dmahdl;
1571 	off_t			offset;
1572 	int			status;
1573 
1574 	/* Get the DMA handle from CQ context */
1575 	dmahdl = cq->cq_mrhdl->mr_bindinfo.bi_dmahdl;
1576 
1577 	/* Calculate offset of next CQE */
1578 	offset = (off_t)((uintptr_t)cqe - (uintptr_t)&cq->cq_buf[0]);
1579 	status = ddi_dma_sync(dmahdl, offset, sizeof (hermon_hw_cqe_t), flag);
1580 	if (status != DDI_SUCCESS) {
1581 		return;
1582 	}
1583 }
1584 
1585 
1586 /*
1587  * hermon_cq_resize_helper()
1588  *    Context: Can be called only from user or kernel context.
1589  */
1590 void
1591 hermon_cq_resize_helper(hermon_state_t *state, hermon_cqhdl_t cq)
1592 {
1593 	hermon_cqhdl_t 		resize_hdl;
1594 	int			status;
1595 
1596 	/*
1597 	 * we're here because we found the special cqe opcode, so we have
1598 	 * to update the cq_handle, release the old resources, clear the
1599 	 * flag in the cq_hdl, and release the resize_hdl.  When we return
1600 	 * above, it will take care of the rest
1601 	 */
1602 	ASSERT(MUTEX_HELD(&cq->cq_lock));
1603 
1604 	resize_hdl = cq->cq_resize_hdl;
1605 
1606 	/*
1607 	 * Deregister the memory for the old Completion Queue.  Note: We
1608 	 * really can't return error here because we have no good way to
1609 	 * cleanup.  Plus, the deregistration really shouldn't ever happen.
1610 	 * So, if it does, it is an indication that something has gone
1611 	 * seriously wrong.  So we print a warning message and return error
1612 	 * (knowing, of course, that the "old" CQ memory will be leaked)
1613 	 */
1614 	status = hermon_mr_deregister(state, &cq->cq_mrhdl, HERMON_MR_DEREG_ALL,
1615 	    HERMON_SLEEP);
1616 	if (status != DDI_SUCCESS) {
1617 		HERMON_WARNING(state, "failed to deregister old CQ memory");
1618 	}
1619 
1620 	/* Next, free the memory from the old CQ buffer */
1621 	hermon_queue_free(&cq->cq_cqinfo);
1622 
1623 	/* now we can update the cq_hdl with the new things saved */
1624 
1625 	cq->cq_buf   = resize_hdl->cq_buf;
1626 	cq->cq_mrhdl = resize_hdl->cq_mrhdl;
1627 	cq->cq_bufsz = resize_hdl->cq_bufsz;
1628 	cq->cq_log_cqsz = resize_hdl->cq_log_cqsz;
1629 	cq->cq_umap_dhp = cq->cq_resize_hdl->cq_umap_dhp;
1630 	cq->cq_resize_hdl = 0;
1631 	bcopy(&resize_hdl->cq_cqinfo, &cq->cq_cqinfo,
1632 	    sizeof (struct hermon_qalloc_info_s));
1633 
1634 	/* finally, release the resizing handle */
1635 	kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
1636 }
1637 
1638 
1639 /*
1640  * hermon_cq_entries_flush()
1641  * Context: Can be called from interrupt or base context.
1642  */
1643 /* ARGSUSED */
1644 void
1645 hermon_cq_entries_flush(hermon_state_t *state, hermon_qphdl_t qp)
1646 {
1647 	hermon_cqhdl_t		cq;
1648 	hermon_hw_cqe_t		*cqe, *next_cqe;
1649 	hermon_srqhdl_t		srq;
1650 	hermon_workq_hdr_t	*wq;
1651 	uint32_t		cons_indx, tail_cons_indx, wrap_around_mask;
1652 	uint32_t		new_indx, check_indx, qpnum;
1653 	int			outstanding_cqes;
1654 
1655 	qpnum = qp->qp_qpnum;
1656 	if ((srq = qp->qp_srqhdl) != NULL)
1657 		wq = qp->qp_srqhdl->srq_wq_wqhdr;
1658 	else
1659 		wq = NULL;
1660 	cq = qp->qp_rq_cqhdl;
1661 
1662 do_send_cq:	/* loop back to here if send_cq is not the same as recv_cq */
1663 
1664 	cons_indx = cq->cq_consindx;
1665 	wrap_around_mask = (cq->cq_bufsz - 1);
1666 
1667 	/* Calculate the pointer to the first CQ entry */
1668 	cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
1669 
1670 	/* Sync the current CQE to read */
1671 	hermon_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU);
1672 
1673 	/*
1674 	 * Loop through the CQ looking for entries owned by software.  If an
1675 	 * entry is owned by software then we increment an 'outstanding_cqes'
1676 	 * count to know how many entries total we have on our CQ.  We use this
1677 	 * value further down to know how many entries to loop through looking
1678 	 * for our same QP number.
1679 	 */
1680 	outstanding_cqes = 0;
1681 	tail_cons_indx = cons_indx;
1682 	while (HERMON_CQE_OWNER_IS_SW(cq, cqe, tail_cons_indx)) {
1683 		/* increment total cqes count */
1684 		outstanding_cqes++;
1685 
1686 		/* increment the consumer index */
1687 		tail_cons_indx++;
1688 
1689 		/* update the pointer to the next cq entry */
1690 		cqe = &cq->cq_buf[tail_cons_indx & wrap_around_mask];
1691 
1692 		/* sync the next cqe to read */
1693 		hermon_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU);
1694 	}
1695 
1696 	/*
1697 	 * Using the 'tail_cons_indx' that was just set, we now know how many
1698 	 * total CQEs possible there are.  Set the 'check_indx' and the
1699 	 * 'new_indx' to the last entry identified by 'tail_cons_indx'
1700 	 */
1701 	check_indx = new_indx = (tail_cons_indx - 1);
1702 
1703 	while (--outstanding_cqes >= 0) {
1704 		cqe = &cq->cq_buf[check_indx & wrap_around_mask];
1705 
1706 		/*
1707 		 * If the QP number is the same in the CQE as the QP, then
1708 		 * we must "consume" it.  If it is for an SRQ wqe, then we
1709 		 * also must free the wqe back onto the free list of the SRQ.
1710 		 */
1711 		if (qpnum == HERMON_CQE_QPNUM_GET(cq, cqe)) {
1712 			if (srq && (HERMON_CQE_SENDRECV_GET(cq, cqe) ==
1713 			    HERMON_COMPLETION_RECV)) {
1714 				uint64_t *desc;
1715 				int indx;
1716 
1717 				/* Add wqe back to SRQ free list */
1718 				indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) &
1719 				    wq->wq_mask;
1720 				desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail);
1721 				((uint16_t *)desc)[1] = htons(indx);
1722 				wq->wq_tail = indx;
1723 			}
1724 		} else {	/* CQEs for other QPNs need to remain */
1725 			if (check_indx != new_indx) {
1726 				next_cqe =
1727 				    &cq->cq_buf[new_indx & wrap_around_mask];
1728 				/* Copy the CQE into the "next_cqe" pointer. */
1729 				bcopy(cqe, next_cqe, sizeof (hermon_hw_cqe_t));
1730 			}
1731 			new_indx--;	/* move index to next CQE to fill */
1732 		}
1733 		check_indx--;		/* move index to next CQE to check */
1734 	}
1735 
1736 	/*
1737 	 * Update consumer index to be the 'new_indx'.  This moves it past all
1738 	 * removed entries.  Because 'new_indx' is pointing to the last
1739 	 * previously valid SW owned entry, we add 1 to point the cons_indx to
1740 	 * the first HW owned entry.
1741 	 */
1742 	cons_indx = (new_indx + 1);
1743 
1744 	/*
1745 	 * Now we only ring the doorbell (to update the consumer index) if
1746 	 * we've actually consumed a CQ entry.  If we found no QP number
1747 	 * matches above, then we would not have removed anything.  So only if
1748 	 * something was removed do we ring the doorbell.
1749 	 */
1750 	if (cq->cq_consindx != cons_indx) {
1751 		/*
1752 		 * Update the consumer index in both the CQ handle and the
1753 		 * doorbell record.
1754 		 */
1755 		cq->cq_consindx = cons_indx;
1756 
1757 		hermon_cq_update_ci_doorbell(cq);
1758 
1759 	}
1760 	if (cq != qp->qp_sq_cqhdl) {
1761 		cq = qp->qp_sq_cqhdl;
1762 		goto do_send_cq;
1763 	}
1764 }
1765