xref: /illumos-gate/usr/src/uts/common/io/ib/clients/rds/rdsib_ib.c (revision 03494a9880d80f834bec10a1e8f0a2f8f7c97bf4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
27  *
28  * This software is available to you under a choice of one of two
29  * licenses.  You may choose to be licensed under the terms of the GNU
30  * General Public License (GPL) Version 2, available from the file
31  * COPYING in the main directory of this source tree, or the
32  * OpenIB.org BSD license below:
33  *
34  *     Redistribution and use in source and binary forms, with or
35  *     without modification, are permitted provided that the following
36  *     conditions are met:
37  *
38  *	- Redistributions of source code must retain the above
39  *	  copyright notice, this list of conditions and the following
40  *	  disclaimer.
41  *
42  *	- Redistributions in binary form must reproduce the above
43  *	  copyright notice, this list of conditions and the following
44  *	  disclaimer in the documentation and/or other materials
45  *	  provided with the distribution.
46  *
47  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
48  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
49  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
50  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
51  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
52  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
53  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
54  * SOFTWARE.
55  *
56  */
57 /*
58  * Sun elects to include this software in Sun product
59  * under the OpenIB BSD license.
60  *
61  *
62  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
63  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
66  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
67  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
68  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
69  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
70  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
71  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
72  * POSSIBILITY OF SUCH DAMAGE.
73  */
74 
75 #include <sys/types.h>
76 #include <sys/ddi.h>
77 #include <sys/sunddi.h>
78 #include <sys/ib/clients/rds/rdsib_cm.h>
79 #include <sys/ib/clients/rds/rdsib_ib.h>
80 #include <sys/ib/clients/rds/rdsib_buf.h>
81 #include <sys/ib/clients/rds/rdsib_ep.h>
82 #include <sys/ib/clients/rds/rds_kstat.h>
83 
84 static void rds_async_handler(void *clntp, ibt_hca_hdl_t hdl,
85     ibt_async_code_t code, ibt_async_event_t *event);
86 
87 static struct ibt_clnt_modinfo_s rds_ib_modinfo = {
88 	IBTI_V_CURR,
89 	IBT_NETWORK,
90 	rds_async_handler,
91 	NULL,
92 	"RDS"
93 };
94 
95 /* performance tunables */
96 uint_t		rds_no_interrupts = 0;
97 uint_t		rds_poll_percent_full = 25;
98 uint_t		rds_wc_signal = IBT_NEXT_SOLICITED;
99 uint_t		rds_waittime_ms = 100; /* ms */
100 
101 extern dev_info_t *rdsib_dev_info;
102 extern void rds_close_sessions();
103 
104 static void
105 rdsib_validate_chan_sizes(ibt_hca_attr_t *hattrp)
106 {
107 	/* The SQ size should not be more than that supported by the HCA */
108 	if (((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_chan_sz) ||
109 	    ((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_cq_sz)) {
110 		RDS_DPRINTF2("RDSIB", "MaxDataSendBuffers + %d is greater "
111 		    "than that supported by the HCA driver "
112 		    "(%d + %d > %d or %d), lowering it to a supported value.",
113 		    RDS_NUM_ACKS, MaxDataSendBuffers, RDS_NUM_ACKS,
114 		    hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
115 
116 		MaxDataSendBuffers = (hattrp->hca_max_chan_sz >
117 		    hattrp->hca_max_cq_sz) ?
118 		    hattrp->hca_max_cq_sz - RDS_NUM_ACKS :
119 		    hattrp->hca_max_chan_sz - RDS_NUM_ACKS;
120 	}
121 
122 	/* The RQ size should not be more than that supported by the HCA */
123 	if ((MaxDataRecvBuffers > hattrp->hca_max_chan_sz) ||
124 	    (MaxDataRecvBuffers > hattrp->hca_max_cq_sz)) {
125 		RDS_DPRINTF2("RDSIB", "MaxDataRecvBuffers is greater than that "
126 		    "supported by the HCA driver (%d > %d or %d), lowering it "
127 		    "to a supported value.", MaxDataRecvBuffers,
128 		    hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
129 
130 		MaxDataRecvBuffers = (hattrp->hca_max_chan_sz >
131 		    hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz :
132 		    hattrp->hca_max_chan_sz;
133 	}
134 
135 	/* The SQ size should not be more than that supported by the HCA */
136 	if ((MaxCtrlSendBuffers > hattrp->hca_max_chan_sz) ||
137 	    (MaxCtrlSendBuffers > hattrp->hca_max_cq_sz)) {
138 		RDS_DPRINTF2("RDSIB", "MaxCtrlSendBuffers is greater than that "
139 		    "supported by the HCA driver (%d > %d or %d), lowering it "
140 		    "to a supported value.", MaxCtrlSendBuffers,
141 		    hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
142 
143 		MaxCtrlSendBuffers = (hattrp->hca_max_chan_sz >
144 		    hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz :
145 		    hattrp->hca_max_chan_sz;
146 	}
147 
148 	/* The RQ size should not be more than that supported by the HCA */
149 	if ((MaxCtrlRecvBuffers > hattrp->hca_max_chan_sz) ||
150 	    (MaxCtrlRecvBuffers > hattrp->hca_max_cq_sz)) {
151 		RDS_DPRINTF2("RDSIB", "MaxCtrlRecvBuffers is greater than that "
152 		    "supported by the HCA driver (%d > %d or %d), lowering it "
153 		    "to a supported value.", MaxCtrlRecvBuffers,
154 		    hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
155 
156 		MaxCtrlRecvBuffers = (hattrp->hca_max_chan_sz >
157 		    hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz :
158 		    hattrp->hca_max_chan_sz;
159 	}
160 
161 	/* The MaxRecvMemory should be less than that supported by the HCA */
162 	if ((NDataRX * RdsPktSize) > hattrp->hca_max_memr_len) {
163 		RDS_DPRINTF2("RDSIB", "MaxRecvMemory is greater than that "
164 		    "supported by the HCA driver (%d > %d), lowering it to %d",
165 		    NDataRX * RdsPktSize, hattrp->hca_max_memr_len,
166 		    hattrp->hca_max_memr_len);
167 
168 		NDataRX = hattrp->hca_max_memr_len/RdsPktSize;
169 	}
170 }
171 
172 /* Return hcap, given the hca guid */
173 rds_hca_t *
174 rds_lkup_hca(ib_guid_t hca_guid)
175 {
176 	rds_hca_t	*hcap;
177 
178 	RDS_DPRINTF4("rds_lkup_hca", "Enter: statep: 0x%p "
179 	    "guid: %llx", rdsib_statep, hca_guid);
180 
181 	rw_enter(&rdsib_statep->rds_hca_lock, RW_READER);
182 
183 	hcap = rdsib_statep->rds_hcalistp;
184 	while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) {
185 		hcap = hcap->hca_nextp;
186 	}
187 
188 	rw_exit(&rdsib_statep->rds_hca_lock);
189 
190 	RDS_DPRINTF4("rds_lkup_hca", "return");
191 
192 	return (hcap);
193 }
194 
195 
196 static rds_hca_t *
197 rdsib_init_hca(ib_guid_t hca_guid)
198 {
199 	rds_hca_t	*hcap;
200 	boolean_t	alloc = B_FALSE;
201 	int		ret;
202 
203 	RDS_DPRINTF2("rdsib_init_hca", "enter: HCA 0x%llx", hca_guid);
204 
205 	/* Do a HCA lookup */
206 	hcap = rds_lkup_hca(hca_guid);
207 
208 	if (hcap != NULL && hcap->hca_hdl != NULL) {
209 		/*
210 		 * This can happen if we get IBT_HCA_ATTACH_EVENT on an HCA
211 		 * that we have already opened. Just return NULL so that
212 		 * we'll not end up reinitializing the HCA again.
213 		 */
214 		RDS_DPRINTF2("rdsib_init_hca", "HCA already initialized");
215 		return (NULL);
216 	}
217 
218 	if (hcap == NULL) {
219 		RDS_DPRINTF2("rdsib_init_hca", "New HCA is added");
220 		hcap = (rds_hca_t *)kmem_zalloc(sizeof (rds_hca_t), KM_SLEEP);
221 		alloc = B_TRUE;
222 	}
223 
224 	hcap->hca_guid = hca_guid;
225 	ret = ibt_open_hca(rdsib_statep->rds_ibhdl, hca_guid,
226 	    &hcap->hca_hdl);
227 	if (ret != IBT_SUCCESS) {
228 		if (ret == IBT_HCA_IN_USE) {
229 			RDS_DPRINTF2("rdsib_init_hca",
230 			    "ibt_open_hca: 0x%llx returned IBT_HCA_IN_USE",
231 			    hca_guid);
232 		} else {
233 			RDS_DPRINTF2("rdsib_init_hca",
234 			    "ibt_open_hca: 0x%llx failed: %d", hca_guid, ret);
235 		}
236 		if (alloc == B_TRUE) {
237 			kmem_free(hcap, sizeof (rds_hca_t));
238 		}
239 		return (NULL);
240 	}
241 
242 	ret = ibt_query_hca(hcap->hca_hdl, &hcap->hca_attr);
243 	if (ret != IBT_SUCCESS) {
244 		RDS_DPRINTF2("rdsib_init_hca",
245 		    "Query HCA: 0x%llx failed:  %d", hca_guid, ret);
246 		ret = ibt_close_hca(hcap->hca_hdl);
247 		ASSERT(ret == IBT_SUCCESS);
248 		if (alloc == B_TRUE) {
249 			kmem_free(hcap, sizeof (rds_hca_t));
250 		} else {
251 			hcap->hca_hdl = NULL;
252 		}
253 		return (NULL);
254 	}
255 
256 	ret = ibt_query_hca_ports(hcap->hca_hdl, 0,
257 	    &hcap->hca_pinfop, &hcap->hca_nports, &hcap->hca_pinfo_sz);
258 	if (ret != IBT_SUCCESS) {
259 		RDS_DPRINTF2("rdsib_init_hca",
260 		    "Query HCA 0x%llx ports failed: %d", hca_guid,
261 		    ret);
262 		ret = ibt_close_hca(hcap->hca_hdl);
263 		hcap->hca_hdl = NULL;
264 		ASSERT(ret == IBT_SUCCESS);
265 		if (alloc == B_TRUE) {
266 			kmem_free(hcap, sizeof (rds_hca_t));
267 		} else {
268 			hcap->hca_hdl = NULL;
269 		}
270 		return (NULL);
271 	}
272 
273 	/* Only one PD per HCA is allocated, so do it here */
274 	ret = ibt_alloc_pd(hcap->hca_hdl, IBT_PD_NO_FLAGS,
275 	    &hcap->hca_pdhdl);
276 	if (ret != IBT_SUCCESS) {
277 		RDS_DPRINTF2("rdsib_init_hca",
278 		    "ibt_alloc_pd 0x%llx failed: %d", hca_guid, ret);
279 		(void) ibt_free_portinfo(hcap->hca_pinfop,
280 		    hcap->hca_pinfo_sz);
281 		ret = ibt_close_hca(hcap->hca_hdl);
282 		ASSERT(ret == IBT_SUCCESS);
283 		hcap->hca_hdl = NULL;
284 		if (alloc == B_TRUE) {
285 			kmem_free(hcap, sizeof (rds_hca_t));
286 		} else {
287 			hcap->hca_hdl = NULL;
288 		}
289 		return (NULL);
290 	}
291 
292 	rdsib_validate_chan_sizes(&hcap->hca_attr);
293 
294 	rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER);
295 	hcap->hca_state = RDS_HCA_STATE_OPEN;
296 	if (alloc == B_TRUE) {
297 		/* this is a new HCA, add it to the list */
298 		rdsib_statep->rds_nhcas++;
299 		hcap->hca_nextp = rdsib_statep->rds_hcalistp;
300 		rdsib_statep->rds_hcalistp = hcap;
301 	}
302 	rw_exit(&rdsib_statep->rds_hca_lock);
303 
304 	RDS_DPRINTF2("rdsib_init_hca", "return: HCA 0x%llx", hca_guid);
305 
306 	return (hcap);
307 }
308 
309 /*
310  * Called from attach
311  */
312 int
313 rdsib_initialize_ib()
314 {
315 	ib_guid_t	*guidp;
316 	rds_hca_t	*hcap;
317 	uint_t		ix, hcaix, nhcas;
318 	int		ret;
319 
320 	RDS_DPRINTF2("rdsib_initialize_ib", "enter: statep %p", rdsib_statep);
321 
322 	ASSERT(rdsib_statep != NULL);
323 	if (rdsib_statep == NULL) {
324 		RDS_DPRINTF1("rdsib_initialize_ib",
325 		    "RDS Statep not initialized");
326 		return (-1);
327 	}
328 
329 	/* How many hcas are there? */
330 	nhcas = ibt_get_hca_list(&guidp);
331 	if (nhcas == 0) {
332 		RDS_DPRINTF2("rdsib_initialize_ib", "No IB HCAs Available");
333 		return (-1);
334 	}
335 
336 	RDS_DPRINTF3("rdsib_initialize_ib", "Number of HCAs: %d", nhcas);
337 
338 	/* Register with IBTF */
339 	ret = ibt_attach(&rds_ib_modinfo, rdsib_dev_info, rdsib_statep,
340 	    &rdsib_statep->rds_ibhdl);
341 	if (ret != IBT_SUCCESS) {
342 		RDS_DPRINTF2("rdsib_initialize_ib", "ibt_attach failed: %d",
343 		    ret);
344 		(void) ibt_free_hca_list(guidp, nhcas);
345 		return (-1);
346 	}
347 
348 	/*
349 	 * Open each HCA and gather its information. Don't care about HCAs
350 	 * that cannot be opened. It is OK as long as atleast one HCA can be
351 	 * opened.
352 	 * Initialize a HCA only if all the information is available.
353 	 */
354 	for (ix = 0, hcaix = 0; ix < nhcas; ix++) {
355 		RDS_DPRINTF3(LABEL, "Open HCA: 0x%llx", guidp[ix]);
356 
357 		hcap = rdsib_init_hca(guidp[ix]);
358 		if (hcap != NULL) hcaix++;
359 	}
360 
361 	/* free the HCA list, we are done with it */
362 	(void) ibt_free_hca_list(guidp, nhcas);
363 
364 	if (hcaix == 0) {
365 		/* Failed to Initialize even one HCA */
366 		RDS_DPRINTF2("rdsib_initialize_ib", "No HCAs are initialized");
367 		(void) ibt_detach(rdsib_statep->rds_ibhdl);
368 		rdsib_statep->rds_ibhdl = NULL;
369 		return (-1);
370 	}
371 
372 	if (hcaix < nhcas) {
373 		RDS_DPRINTF2("rdsib_open_ib", "HCAs %d/%d failed to initialize",
374 		    (nhcas - hcaix), nhcas);
375 	}
376 
377 	RDS_DPRINTF2("rdsib_initialize_ib", "return: statep %p", rdsib_statep);
378 
379 	return (0);
380 }
381 
382 /*
383  * Called from detach
384  */
385 void
386 rdsib_deinitialize_ib()
387 {
388 	rds_hca_t	*hcap, *nextp;
389 	int		ret;
390 
391 	RDS_DPRINTF2("rdsib_deinitialize_ib", "enter: statep %p", rdsib_statep);
392 
393 	/* close and destroy all the sessions */
394 	rds_close_sessions(NULL);
395 
396 	/* Release all HCA resources */
397 	rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER);
398 	RDS_DPRINTF2("rdsib_deinitialize_ib", "HCA List: %p, NHCA: %d",
399 	    rdsib_statep->rds_hcalistp, rdsib_statep->rds_nhcas);
400 	hcap = rdsib_statep->rds_hcalistp;
401 	rdsib_statep->rds_hcalistp = NULL;
402 	rdsib_statep->rds_nhcas = 0;
403 	rw_exit(&rdsib_statep->rds_hca_lock);
404 
405 	while (hcap != NULL) {
406 		nextp = hcap->hca_nextp;
407 
408 		if (hcap->hca_hdl != NULL) {
409 			ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl);
410 			ASSERT(ret == IBT_SUCCESS);
411 
412 			(void) ibt_free_portinfo(hcap->hca_pinfop,
413 			    hcap->hca_pinfo_sz);
414 
415 			ret = ibt_close_hca(hcap->hca_hdl);
416 			ASSERT(ret == IBT_SUCCESS);
417 		}
418 
419 		kmem_free(hcap, sizeof (rds_hca_t));
420 		hcap = nextp;
421 	}
422 
423 	/* Deregister with IBTF */
424 	if (rdsib_statep->rds_ibhdl != NULL) {
425 		(void) ibt_detach(rdsib_statep->rds_ibhdl);
426 		rdsib_statep->rds_ibhdl = NULL;
427 	}
428 
429 	RDS_DPRINTF2("rdsib_deinitialize_ib", "return: statep %p",
430 	    rdsib_statep);
431 }
432 
433 /*
434  * Called on open of first RDS socket
435  */
436 int
437 rdsib_open_ib()
438 {
439 	int	ret;
440 
441 	RDS_DPRINTF2("rdsib_open_ib", "enter: statep %p", rdsib_statep);
442 
443 	/* Enable incoming connection requests */
444 	if (rdsib_statep->rds_srvhdl == NULL) {
445 		rdsib_statep->rds_srvhdl =
446 		    rds_register_service(rdsib_statep->rds_ibhdl);
447 		if (rdsib_statep->rds_srvhdl == NULL) {
448 			RDS_DPRINTF2("rdsib_open_ib",
449 			    "Service registration failed");
450 			return (-1);
451 		} else {
452 			/* bind the service on all available ports */
453 			ret = rds_bind_service(rdsib_statep);
454 			if (ret != 0) {
455 				RDS_DPRINTF2("rdsib_open_ib",
456 				    "Bind service failed: %d", ret);
457 			}
458 		}
459 	}
460 
461 	RDS_DPRINTF2("rdsib_open_ib", "return: statep %p", rdsib_statep);
462 
463 	return (0);
464 }
465 
466 /*
467  * Called when all ports are closed.
468  */
469 void
470 rdsib_close_ib()
471 {
472 	int	ret;
473 
474 	RDS_DPRINTF2("rdsib_close_ib", "enter: statep %p", rdsib_statep);
475 
476 	/* Disable incoming connection requests */
477 	if (rdsib_statep->rds_srvhdl != NULL) {
478 		ret = ibt_unbind_all_services(rdsib_statep->rds_srvhdl);
479 		if (ret != 0) {
480 			RDS_DPRINTF2("rdsib_close_ib",
481 			    "ibt_unbind_all_services failed: %d\n", ret);
482 		}
483 		ret = ibt_deregister_service(rdsib_statep->rds_ibhdl,
484 		    rdsib_statep->rds_srvhdl);
485 		if (ret != 0) {
486 			RDS_DPRINTF2("rdsib_close_ib",
487 			    "ibt_deregister_service failed: %d\n", ret);
488 		} else {
489 			rdsib_statep->rds_srvhdl = NULL;
490 		}
491 	}
492 
493 	RDS_DPRINTF2("rdsib_close_ib", "return: statep %p", rdsib_statep);
494 }
495 
496 /* Return hcap, given the hca guid */
497 rds_hca_t *
498 rds_get_hcap(rds_state_t *statep, ib_guid_t hca_guid)
499 {
500 	rds_hca_t	*hcap;
501 
502 	RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: Enter: statep: 0x%p "
503 	    "guid: %llx", statep, hca_guid);
504 
505 	rw_enter(&statep->rds_hca_lock, RW_READER);
506 
507 	hcap = statep->rds_hcalistp;
508 	while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) {
509 		hcap = hcap->hca_nextp;
510 	}
511 
512 	/*
513 	 * don't let anyone use this HCA until the RECV memory
514 	 * is registered with this HCA
515 	 */
516 	if ((hcap != NULL) &&
517 	    (hcap->hca_state == RDS_HCA_STATE_MEM_REGISTERED)) {
518 		ASSERT(hcap->hca_mrhdl != NULL);
519 		rw_exit(&statep->rds_hca_lock);
520 		return (hcap);
521 	}
522 
523 	RDS_DPRINTF2("rds_get_hcap",
524 	    "HCA (0x%p, 0x%llx) is not initialized", hcap, hca_guid);
525 	rw_exit(&statep->rds_hca_lock);
526 
527 	RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: return");
528 
529 	return (NULL);
530 }
531 
532 /* Return hcap, given a gid */
533 rds_hca_t *
534 rds_gid_to_hcap(rds_state_t *statep, ib_gid_t gid)
535 {
536 	rds_hca_t	*hcap;
537 	uint_t		ix;
538 
539 	RDS_DPRINTF4("rds_gid_to_hcap", "Enter: statep: 0x%p gid: %llx:%llx",
540 	    statep, gid.gid_prefix, gid.gid_guid);
541 
542 	rw_enter(&statep->rds_hca_lock, RW_READER);
543 
544 	hcap = statep->rds_hcalistp;
545 	while (hcap != NULL) {
546 
547 		/*
548 		 * don't let anyone use this HCA until the RECV memory
549 		 * is registered with this HCA
550 		 */
551 		if (hcap->hca_state != RDS_HCA_STATE_MEM_REGISTERED) {
552 			RDS_DPRINTF3("rds_gid_to_hcap",
553 			    "HCA (0x%p, 0x%llx) is not initialized",
554 			    hcap, gid.gid_guid);
555 			hcap = hcap->hca_nextp;
556 			continue;
557 		}
558 
559 		for (ix = 0; ix < hcap->hca_nports; ix++) {
560 			if ((hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_prefix ==
561 			    gid.gid_prefix) &&
562 			    (hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_guid ==
563 			    gid.gid_guid)) {
564 				RDS_DPRINTF4("rds_gid_to_hcap",
565 				    "gid found in hcap: 0x%p", hcap);
566 				rw_exit(&statep->rds_hca_lock);
567 				return (hcap);
568 			}
569 		}
570 		hcap = hcap->hca_nextp;
571 	}
572 
573 	rw_exit(&statep->rds_hca_lock);
574 
575 	return (NULL);
576 }
577 
578 /* This is called from the send CQ handler */
579 void
580 rds_send_acknowledgement(rds_ep_t *ep)
581 {
582 	int	ret;
583 	uint_t	ix;
584 
585 	RDS_DPRINTF4("rds_send_acknowledgement", "Enter EP(%p)", ep);
586 
587 	mutex_enter(&ep->ep_lock);
588 
589 	ASSERT(ep->ep_rdmacnt != 0);
590 
591 	/*
592 	 * The previous ACK completed successfully, send the next one
593 	 * if more messages were received after sending the last ACK
594 	 */
595 	if (ep->ep_rbufid != *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va) {
596 		*(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va = ep->ep_rbufid;
597 		mutex_exit(&ep->ep_lock);
598 
599 		/* send acknowledgement */
600 		RDS_INCR_TXACKS();
601 		ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix);
602 		if (ret != IBT_SUCCESS) {
603 			RDS_DPRINTF2("rds_send_acknowledgement",
604 			    "EP(%p): ibt_post_send for acknowledgement "
605 			    "failed: %d, SQ depth: %d",
606 			    ep, ret, ep->ep_sndpool.pool_nbusy);
607 			mutex_enter(&ep->ep_lock);
608 			ep->ep_rdmacnt--;
609 			mutex_exit(&ep->ep_lock);
610 		}
611 	} else {
612 		/* ACKed all messages, no more to ACK */
613 		ep->ep_rdmacnt--;
614 		mutex_exit(&ep->ep_lock);
615 		return;
616 	}
617 
618 	RDS_DPRINTF4("rds_send_acknowledgement", "Return EP(%p)", ep);
619 }
620 
621 static int
622 rds_poll_ctrl_completions(ibt_cq_hdl_t cq, rds_ep_t *ep)
623 {
624 	ibt_wc_t	wc;
625 	uint_t		npolled;
626 	rds_buf_t	*bp;
627 	rds_ctrl_pkt_t	*cpkt;
628 	rds_qp_t	*recvqp;
629 	int		ret = IBT_SUCCESS;
630 
631 	RDS_DPRINTF4("rds_poll_ctrl_completions", "Enter: EP(%p)", ep);
632 
633 	bzero(&wc, sizeof (ibt_wc_t));
634 	ret = ibt_poll_cq(cq, &wc, 1, &npolled);
635 	if (ret != IBT_SUCCESS) {
636 		if (ret != IBT_CQ_EMPTY) {
637 			RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq "
638 			    "returned: %d", ep, cq, ret);
639 		} else {
640 			RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq "
641 			    "returned: IBT_CQ_EMPTY", ep, cq);
642 		}
643 		return (ret);
644 	}
645 
646 	bp = (rds_buf_t *)(uintptr_t)wc.wc_id;
647 
648 	if (wc.wc_status != IBT_WC_SUCCESS) {
649 		mutex_enter(&ep->ep_recvqp.qp_lock);
650 		ep->ep_recvqp.qp_level--;
651 		mutex_exit(&ep->ep_recvqp.qp_lock);
652 
653 		/* Free the buffer */
654 		bp->buf_state = RDS_RCVBUF_FREE;
655 		rds_free_recv_buf(bp, 1);
656 
657 		/* Receive completion failure */
658 		if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) {
659 			RDS_DPRINTF2("rds_poll_ctrl_completions",
660 			    "EP(%p) CQ(%p) BP(%p): WC Error Status: %d",
661 			    ep, cq, wc.wc_id, wc.wc_status);
662 		}
663 		return (ret);
664 	}
665 
666 	/* there is one less in the RQ */
667 	recvqp = &ep->ep_recvqp;
668 	mutex_enter(&recvqp->qp_lock);
669 	recvqp->qp_level--;
670 	if ((recvqp->qp_taskqpending == B_FALSE) &&
671 	    (recvqp->qp_level <= recvqp->qp_lwm)) {
672 		/* Time to post more buffers into the RQ */
673 		recvqp->qp_taskqpending = B_TRUE;
674 		mutex_exit(&recvqp->qp_lock);
675 
676 		ret = ddi_taskq_dispatch(rds_taskq,
677 		    rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP);
678 		if (ret != DDI_SUCCESS) {
679 			RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d",
680 			    ret);
681 			mutex_enter(&recvqp->qp_lock);
682 			recvqp->qp_taskqpending = B_FALSE;
683 			mutex_exit(&recvqp->qp_lock);
684 		}
685 	} else {
686 		mutex_exit(&recvqp->qp_lock);
687 	}
688 
689 	cpkt = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va;
690 	rds_handle_control_message(ep->ep_sp, cpkt);
691 
692 	bp->buf_state = RDS_RCVBUF_FREE;
693 	rds_free_recv_buf(bp, 1);
694 
695 	RDS_DPRINTF4("rds_poll_ctrl_completions", "Return: EP(%p)", ep);
696 
697 	return (ret);
698 }
699 
700 #define	RDS_POST_FEW_ATATIME	100
701 /* Post recv WRs into the RQ. Assumes the ep->refcnt is already incremented */
702 void
703 rds_post_recv_buf(void *arg)
704 {
705 	ibt_channel_hdl_t	chanhdl;
706 	rds_ep_t		*ep;
707 	rds_session_t		*sp;
708 	rds_qp_t		*recvqp;
709 	rds_bufpool_t		*gp;
710 	rds_buf_t		*bp, *bp1;
711 	ibt_recv_wr_t		*wrp, wr[RDS_POST_FEW_ATATIME];
712 	rds_hca_t		*hcap;
713 	uint_t			npost, nspace, rcv_len;
714 	uint_t			ix, jx, kx;
715 	int			ret;
716 
717 	chanhdl = (ibt_channel_hdl_t)arg;
718 	RDS_DPRINTF4("rds_post_recv_buf", "Enter: CHAN(%p)", chanhdl);
719 	RDS_INCR_POST_RCV_BUF_CALLS();
720 
721 	ep = (rds_ep_t *)ibt_get_chan_private(chanhdl);
722 	ASSERT(ep != NULL);
723 	sp = ep->ep_sp;
724 	recvqp = &ep->ep_recvqp;
725 
726 	RDS_DPRINTF5("rds_post_recv_buf", "EP(%p)", ep);
727 
728 	/* get the hcap for the HCA hosting this channel */
729 	hcap = rds_lkup_hca(ep->ep_hca_guid);
730 	if (hcap == NULL) {
731 		RDS_DPRINTF2("rds_post_recv_buf", "HCA (0x%llx) not found",
732 		    ep->ep_hca_guid);
733 		return;
734 	}
735 
736 	/* Make sure the session is still connected */
737 	rw_enter(&sp->session_lock, RW_READER);
738 	if ((sp->session_state != RDS_SESSION_STATE_INIT) &&
739 	    (sp->session_state != RDS_SESSION_STATE_CONNECTED) &&
740 	    (sp->session_state != RDS_SESSION_STATE_HCA_CLOSING)) {
741 		RDS_DPRINTF2("rds_post_recv_buf", "EP(%p): Session is not "
742 		    "in active state (%d)", ep, sp->session_state);
743 		rw_exit(&sp->session_lock);
744 		return;
745 	}
746 	rw_exit(&sp->session_lock);
747 
748 	/* how many can be posted */
749 	mutex_enter(&recvqp->qp_lock);
750 	nspace = recvqp->qp_depth - recvqp->qp_level;
751 	if (nspace == 0) {
752 		RDS_DPRINTF2("rds_post_recv_buf", "RQ is FULL");
753 		recvqp->qp_taskqpending = B_FALSE;
754 		mutex_exit(&recvqp->qp_lock);
755 		return;
756 	}
757 	mutex_exit(&recvqp->qp_lock);
758 
759 	if (ep->ep_type == RDS_EP_TYPE_DATA) {
760 		gp = &rds_dpool;
761 		rcv_len = RdsPktSize;
762 	} else {
763 		gp = &rds_cpool;
764 		rcv_len = RDS_CTRLPKT_SIZE;
765 	}
766 
767 	bp = rds_get_buf(gp, nspace, &jx);
768 	if (bp == NULL) {
769 		RDS_DPRINTF2(LABEL, "EP(%p): No Recv buffers available", ep);
770 		/* try again later */
771 		ret = ddi_taskq_dispatch(rds_taskq, rds_post_recv_buf,
772 		    (void *)ep->ep_chanhdl, DDI_NOSLEEP);
773 		if (ret != DDI_SUCCESS) {
774 			RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d",
775 			    ret);
776 			mutex_enter(&recvqp->qp_lock);
777 			recvqp->qp_taskqpending = B_FALSE;
778 			mutex_exit(&recvqp->qp_lock);
779 		}
780 		return;
781 	}
782 
783 	if (jx != nspace) {
784 		RDS_DPRINTF2(LABEL, "EP(%p): Recv buffers "
785 		    "needed: %d available: %d", ep, nspace, jx);
786 		nspace = jx;
787 	}
788 
789 	bp1 = bp;
790 	for (ix = 0; ix < nspace; ix++) {
791 		bp1->buf_ep = ep;
792 		ASSERT(bp1->buf_state == RDS_RCVBUF_FREE);
793 		bp1->buf_state = RDS_RCVBUF_POSTED;
794 		bp1->buf_ds.ds_key = hcap->hca_lkey;
795 		bp1->buf_ds.ds_len = rcv_len;
796 		bp1 = bp1->buf_nextp;
797 	}
798 
799 #if 0
800 	wrp = kmem_zalloc(RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t),
801 	    KM_SLEEP);
802 #else
803 	wrp = &wr[0];
804 #endif
805 
806 	npost = nspace;
807 	while (npost) {
808 		jx = (npost > RDS_POST_FEW_ATATIME) ?
809 		    RDS_POST_FEW_ATATIME : npost;
810 		for (ix = 0; ix < jx; ix++) {
811 			wrp[ix].wr_id = (uintptr_t)bp;
812 			wrp[ix].wr_nds = 1;
813 			wrp[ix].wr_sgl = &bp->buf_ds;
814 			bp = bp->buf_nextp;
815 		}
816 
817 		ret = ibt_post_recv(chanhdl, wrp, jx, &kx);
818 		if ((ret != IBT_SUCCESS) || (kx != jx)) {
819 			RDS_DPRINTF2(LABEL, "ibt_post_recv for %d WRs failed: "
820 			    "%d", npost, ret);
821 			npost -= kx;
822 			break;
823 		}
824 
825 		npost -= jx;
826 	}
827 
828 	mutex_enter(&recvqp->qp_lock);
829 	if (npost != 0) {
830 		RDS_DPRINTF2("rds_post_recv_buf",
831 		    "EP(%p) Failed to post %d WRs", ep, npost);
832 		recvqp->qp_level += (nspace - npost);
833 	} else {
834 		recvqp->qp_level += nspace;
835 	}
836 
837 	/*
838 	 * sometimes, the recv WRs can get consumed as soon as they are
839 	 * posted. In that case, taskq thread to post more WRs to the RQ will
840 	 * not be scheduled as the taskqpending flag is still set.
841 	 */
842 	if (recvqp->qp_level == 0) {
843 		mutex_exit(&recvqp->qp_lock);
844 		ret = ddi_taskq_dispatch(rds_taskq,
845 		    rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP);
846 		if (ret != DDI_SUCCESS) {
847 			RDS_DPRINTF2("rds_post_recv_buf",
848 			    "ddi_taskq_dispatch failed: %d", ret);
849 			mutex_enter(&recvqp->qp_lock);
850 			recvqp->qp_taskqpending = B_FALSE;
851 			mutex_exit(&recvqp->qp_lock);
852 		}
853 	} else {
854 		recvqp->qp_taskqpending = B_FALSE;
855 		mutex_exit(&recvqp->qp_lock);
856 	}
857 
858 #if 0
859 	kmem_free(wrp, RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t));
860 #endif
861 
862 	RDS_DPRINTF4("rds_post_recv_buf", "Return: EP(%p)", ep);
863 }
864 
865 static int
866 rds_poll_data_completions(ibt_cq_hdl_t cq, rds_ep_t *ep)
867 {
868 	ibt_wc_t	wc;
869 	rds_buf_t	*bp;
870 	rds_data_hdr_t	*pktp;
871 	rds_qp_t	*recvqp;
872 	uint_t		npolled;
873 	int		ret = IBT_SUCCESS;
874 
875 
876 	RDS_DPRINTF4("rds_poll_data_completions", "Enter: EP(%p)", ep);
877 
878 	bzero(&wc, sizeof (ibt_wc_t));
879 	ret = ibt_poll_cq(cq, &wc, 1, &npolled);
880 	if (ret != IBT_SUCCESS) {
881 		if (ret != IBT_CQ_EMPTY) {
882 			RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq "
883 			    "returned: %d", ep, cq, ret);
884 		} else {
885 			RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq "
886 			    "returned: IBT_CQ_EMPTY", ep, cq);
887 		}
888 		return (ret);
889 	}
890 
891 	bp = (rds_buf_t *)(uintptr_t)wc.wc_id;
892 	ASSERT(bp->buf_state == RDS_RCVBUF_POSTED);
893 	bp->buf_state = RDS_RCVBUF_ONSOCKQ;
894 	bp->buf_nextp = NULL;
895 
896 	if (wc.wc_status != IBT_WC_SUCCESS) {
897 		mutex_enter(&ep->ep_recvqp.qp_lock);
898 		ep->ep_recvqp.qp_level--;
899 		mutex_exit(&ep->ep_recvqp.qp_lock);
900 
901 		/* free the buffer */
902 		bp->buf_state = RDS_RCVBUF_FREE;
903 		rds_free_recv_buf(bp, 1);
904 
905 		/* Receive completion failure */
906 		if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) {
907 			RDS_DPRINTF2("rds_poll_data_completions",
908 			    "EP(%p) CQ(%p) BP(%p): WC Error Status: %d",
909 			    ep, cq, wc.wc_id, wc.wc_status);
910 			RDS_INCR_RXERRS();
911 		}
912 		return (ret);
913 	}
914 
915 	/* there is one less in the RQ */
916 	recvqp = &ep->ep_recvqp;
917 	mutex_enter(&recvqp->qp_lock);
918 	recvqp->qp_level--;
919 	if ((recvqp->qp_taskqpending == B_FALSE) &&
920 	    (recvqp->qp_level <= recvqp->qp_lwm)) {
921 		/* Time to post more buffers into the RQ */
922 		recvqp->qp_taskqpending = B_TRUE;
923 		mutex_exit(&recvqp->qp_lock);
924 
925 		ret = ddi_taskq_dispatch(rds_taskq,
926 		    rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP);
927 		if (ret != DDI_SUCCESS) {
928 			RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d",
929 			    ret);
930 			mutex_enter(&recvqp->qp_lock);
931 			recvqp->qp_taskqpending = B_FALSE;
932 			mutex_exit(&recvqp->qp_lock);
933 		}
934 	} else {
935 		mutex_exit(&recvqp->qp_lock);
936 	}
937 
938 	pktp = (rds_data_hdr_t *)(uintptr_t)bp->buf_ds.ds_va;
939 	ASSERT(pktp->dh_datalen != 0);
940 
941 	RDS_DPRINTF5(LABEL, "Message Received: sendIP: 0x%x recvIP: 0x%x "
942 	    "sendport: %d recvport: %d npkts: %d pktno: %d", ep->ep_remip,
943 	    ep->ep_myip, pktp->dh_sendport, pktp->dh_recvport,
944 	    pktp->dh_npkts, pktp->dh_psn);
945 
946 	RDS_DPRINTF3(LABEL, "BP(%p): npkts: %d psn: %d", bp,
947 	    pktp->dh_npkts, pktp->dh_psn);
948 
949 	if (pktp->dh_npkts == 1) {
950 		/* single pkt or last packet */
951 		if (pktp->dh_psn != 0) {
952 			/* last packet of a segmented message */
953 			ASSERT(ep->ep_seglbp != NULL);
954 			ep->ep_seglbp->buf_nextp = bp;
955 			ep->ep_seglbp = bp;
956 			rds_received_msg(ep, ep->ep_segfbp);
957 			ep->ep_segfbp = NULL;
958 			ep->ep_seglbp = NULL;
959 		} else {
960 			/* single packet */
961 			rds_received_msg(ep, bp);
962 		}
963 	} else {
964 		/* multi-pkt msg */
965 		if (pktp->dh_psn == 0) {
966 			/* first packet */
967 			ASSERT(ep->ep_segfbp == NULL);
968 			ep->ep_segfbp = bp;
969 			ep->ep_seglbp = bp;
970 		} else {
971 			/* intermediate packet */
972 			ASSERT(ep->ep_segfbp != NULL);
973 			ep->ep_seglbp->buf_nextp = bp;
974 			ep->ep_seglbp = bp;
975 		}
976 	}
977 
978 	RDS_DPRINTF4("rds_poll_data_completions", "Return: EP(%p)", ep);
979 
980 	return (ret);
981 }
982 
983 void
984 rds_recvcq_handler(ibt_cq_hdl_t cq, void *arg)
985 {
986 	rds_ep_t	*ep;
987 	int		ret = IBT_SUCCESS;
988 	int		(*func)(ibt_cq_hdl_t, rds_ep_t *);
989 
990 	ep = (rds_ep_t *)arg;
991 
992 	RDS_DPRINTF4("rds_recvcq_handler", "enter: EP(%p)", ep);
993 
994 	if (ep->ep_type == RDS_EP_TYPE_DATA) {
995 		func = rds_poll_data_completions;
996 	} else {
997 		func = rds_poll_ctrl_completions;
998 	}
999 
1000 	do {
1001 		ret = func(cq, ep);
1002 	} while (ret != IBT_CQ_EMPTY);
1003 
1004 	/* enable the CQ */
1005 	ret = ibt_enable_cq_notify(cq, rds_wc_signal);
1006 	if (ret != IBT_SUCCESS) {
1007 		RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify "
1008 		    "failed: %d", ep, cq, ret);
1009 		return;
1010 	}
1011 
1012 	do {
1013 		ret = func(cq, ep);
1014 	} while (ret != IBT_CQ_EMPTY);
1015 
1016 	RDS_DPRINTF4("rds_recvcq_handler", "Return: EP(%p)", ep);
1017 }
1018 
1019 void
1020 rds_poll_send_completions(ibt_cq_hdl_t cq, rds_ep_t *ep, boolean_t lock)
1021 {
1022 	ibt_wc_t	wc[RDS_NUM_DATA_SEND_WCS];
1023 	uint_t		npolled, nret, send_error = 0;
1024 	rds_buf_t	*headp, *tailp, *bp;
1025 	int		ret, ix;
1026 
1027 	RDS_DPRINTF4("rds_poll_send_completions", "Enter EP(%p)", ep);
1028 
1029 	headp = NULL;
1030 	tailp = NULL;
1031 	npolled = 0;
1032 	do {
1033 		ret = ibt_poll_cq(cq, wc, RDS_NUM_DATA_SEND_WCS, &nret);
1034 		if (ret != IBT_SUCCESS) {
1035 			if (ret != IBT_CQ_EMPTY) {
1036 				RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): "
1037 				    "ibt_poll_cq returned: %d", ep, cq, ret);
1038 			} else {
1039 				RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): "
1040 				    "ibt_poll_cq returned: IBT_CQ_EMPTY",
1041 				    ep, cq);
1042 			}
1043 
1044 			break;
1045 		}
1046 
1047 		for (ix = 0; ix < nret; ix++) {
1048 			if (wc[ix].wc_status == IBT_WC_SUCCESS) {
1049 				if (wc[ix].wc_type == IBT_WRC_RDMAW) {
1050 					rds_send_acknowledgement(ep);
1051 					continue;
1052 				}
1053 
1054 				bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id;
1055 				ASSERT(bp->buf_state == RDS_SNDBUF_PENDING);
1056 				bp->buf_state = RDS_SNDBUF_FREE;
1057 			} else if (wc[ix].wc_status == IBT_WC_WR_FLUSHED_ERR) {
1058 				RDS_INCR_TXERRS();
1059 				RDS_DPRINTF5("rds_poll_send_completions",
1060 				    "EP(%p): WC ID: %p ERROR: %d", ep,
1061 				    wc[ix].wc_id, wc[ix].wc_status);
1062 
1063 				if (wc[ix].wc_id == RDS_RDMAW_WRID) {
1064 					mutex_enter(&ep->ep_lock);
1065 					ep->ep_rdmacnt--;
1066 					mutex_exit(&ep->ep_lock);
1067 					continue;
1068 				}
1069 
1070 				bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id;
1071 				ASSERT(bp->buf_state == RDS_SNDBUF_PENDING);
1072 				bp->buf_state = RDS_SNDBUF_FREE;
1073 			} else {
1074 				RDS_INCR_TXERRS();
1075 				RDS_DPRINTF2("rds_poll_send_completions",
1076 				    "EP(%p): WC ID: %p ERROR: %d", ep,
1077 				    wc[ix].wc_id, wc[ix].wc_status);
1078 				if (send_error == 0) {
1079 					rds_session_t	*sp = ep->ep_sp;
1080 
1081 					/* don't let anyone send anymore */
1082 					rw_enter(&sp->session_lock, RW_WRITER);
1083 					if (sp->session_state !=
1084 					    RDS_SESSION_STATE_ERROR) {
1085 						sp->session_state =
1086 						    RDS_SESSION_STATE_ERROR;
1087 						/* Make this the active end */
1088 						sp->session_type =
1089 						    RDS_SESSION_ACTIVE;
1090 					}
1091 					rw_exit(&sp->session_lock);
1092 				}
1093 
1094 				send_error++;
1095 
1096 				if (wc[ix].wc_id == RDS_RDMAW_WRID) {
1097 					mutex_enter(&ep->ep_lock);
1098 					ep->ep_rdmacnt--;
1099 					mutex_exit(&ep->ep_lock);
1100 					continue;
1101 				}
1102 
1103 				bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id;
1104 				ASSERT(bp->buf_state == RDS_SNDBUF_PENDING);
1105 				bp->buf_state = RDS_SNDBUF_FREE;
1106 			}
1107 
1108 			bp->buf_nextp = NULL;
1109 			if (headp) {
1110 				tailp->buf_nextp = bp;
1111 				tailp = bp;
1112 			} else {
1113 				headp = bp;
1114 				tailp = bp;
1115 			}
1116 
1117 			npolled++;
1118 		}
1119 
1120 		if (rds_no_interrupts && (npolled > 100)) {
1121 			break;
1122 		}
1123 
1124 		if (rds_no_interrupts == 1) {
1125 			break;
1126 		}
1127 	} while (ret != IBT_CQ_EMPTY);
1128 
1129 	RDS_DPRINTF5("rds_poll_send_completions", "Npolled: %d send_error: %d",
1130 	    npolled, send_error);
1131 
1132 	/* put the buffers to the pool */
1133 	if (npolled != 0) {
1134 		rds_free_send_buf(ep, headp, tailp, npolled, lock);
1135 	}
1136 
1137 	if (send_error != 0) {
1138 		rds_handle_send_error(ep);
1139 	}
1140 
1141 	RDS_DPRINTF4("rds_poll_send_completions", "Return EP(%p)", ep);
1142 }
1143 
1144 void
1145 rds_sendcq_handler(ibt_cq_hdl_t cq, void *arg)
1146 {
1147 	rds_ep_t	*ep;
1148 	int		ret;
1149 
1150 	ep = (rds_ep_t *)arg;
1151 
1152 	RDS_DPRINTF4("rds_sendcq_handler", "Enter: EP(%p)", ep);
1153 
1154 	/* enable the CQ */
1155 	ret = ibt_enable_cq_notify(cq, IBT_NEXT_COMPLETION);
1156 	if (ret != IBT_SUCCESS) {
1157 		RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify "
1158 		    "failed: %d", ep, cq, ret);
1159 		return;
1160 	}
1161 
1162 	rds_poll_send_completions(cq, ep, B_FALSE);
1163 
1164 	RDS_DPRINTF4("rds_sendcq_handler", "Return: EP(%p)", ep);
1165 }
1166 
1167 void
1168 rds_ep_free_rc_channel(rds_ep_t *ep)
1169 {
1170 	int ret;
1171 
1172 	RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Enter", ep);
1173 
1174 	ASSERT(mutex_owned(&ep->ep_lock));
1175 
1176 	/* free the QP */
1177 	if (ep->ep_chanhdl != NULL) {
1178 		/* wait until the RQ is empty */
1179 		(void) ibt_flush_channel(ep->ep_chanhdl);
1180 		(void) rds_is_recvq_empty(ep, B_TRUE);
1181 		ret = ibt_free_channel(ep->ep_chanhdl);
1182 		if (ret != IBT_SUCCESS) {
1183 			RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) "
1184 			    "ibt_free_channel returned: %d", ep, ret);
1185 		}
1186 		ep->ep_chanhdl = NULL;
1187 	} else {
1188 		RDS_DPRINTF2("rds_ep_free_rc_channel",
1189 		    "EP(%p) Channel is ALREADY FREE", ep);
1190 	}
1191 
1192 	/* free the Send CQ */
1193 	if (ep->ep_sendcq != NULL) {
1194 		ret = ibt_free_cq(ep->ep_sendcq);
1195 		if (ret != IBT_SUCCESS) {
1196 			RDS_DPRINTF2("rds_ep_free_rc_channel",
1197 			    "EP(%p) - for sendcq, ibt_free_cq returned %d",
1198 			    ep, ret);
1199 		}
1200 		ep->ep_sendcq = NULL;
1201 	} else {
1202 		RDS_DPRINTF2("rds_ep_free_rc_channel",
1203 		    "EP(%p) SendCQ is ALREADY FREE", ep);
1204 	}
1205 
1206 	/* free the Recv CQ */
1207 	if (ep->ep_recvcq != NULL) {
1208 		ret = ibt_free_cq(ep->ep_recvcq);
1209 		if (ret != IBT_SUCCESS) {
1210 			RDS_DPRINTF2("rds_ep_free_rc_channel",
1211 			    "EP(%p) - for recvcq, ibt_free_cq returned %d",
1212 			    ep, ret);
1213 		}
1214 		ep->ep_recvcq = NULL;
1215 	} else {
1216 		RDS_DPRINTF2("rds_ep_free_rc_channel",
1217 		    "EP(%p) RecvCQ is ALREADY FREE", ep);
1218 	}
1219 
1220 	RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Return", ep);
1221 }
1222 
1223 /* Allocate resources for RC channel */
1224 ibt_channel_hdl_t
1225 rds_ep_alloc_rc_channel(rds_ep_t *ep, uint8_t hca_port)
1226 {
1227 	int				ret = IBT_SUCCESS;
1228 	ibt_cq_attr_t			scqattr, rcqattr;
1229 	ibt_rc_chan_alloc_args_t	chanargs;
1230 	ibt_channel_hdl_t		chanhdl;
1231 	rds_session_t			*sp;
1232 	rds_hca_t			*hcap;
1233 
1234 	RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Enter: 0x%p port: %d",
1235 	    ep, hca_port);
1236 
1237 	/* Update the EP with the right IP address and HCA guid */
1238 	sp = ep->ep_sp;
1239 	ASSERT(sp != NULL);
1240 	rw_enter(&sp->session_lock, RW_READER);
1241 	mutex_enter(&ep->ep_lock);
1242 	ep->ep_myip = sp->session_myip;
1243 	ep->ep_remip = sp->session_remip;
1244 	hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid);
1245 	ep->ep_hca_guid = hcap->hca_guid;
1246 	mutex_exit(&ep->ep_lock);
1247 	rw_exit(&sp->session_lock);
1248 
1249 	/* reset taskqpending flag here */
1250 	ep->ep_recvqp.qp_taskqpending = B_FALSE;
1251 
1252 	if (ep->ep_type == RDS_EP_TYPE_CTRL) {
1253 		scqattr.cq_size = MaxCtrlSendBuffers;
1254 		scqattr.cq_sched = NULL;
1255 		scqattr.cq_flags = IBT_CQ_NO_FLAGS;
1256 
1257 		rcqattr.cq_size = MaxCtrlRecvBuffers;
1258 		rcqattr.cq_sched = NULL;
1259 		rcqattr.cq_flags = IBT_CQ_NO_FLAGS;
1260 
1261 		chanargs.rc_sizes.cs_sq = MaxCtrlSendBuffers;
1262 		chanargs.rc_sizes.cs_rq = MaxCtrlRecvBuffers;
1263 		chanargs.rc_sizes.cs_sq_sgl = 1;
1264 		chanargs.rc_sizes.cs_rq_sgl = 1;
1265 	} else {
1266 		scqattr.cq_size = MaxDataSendBuffers + RDS_NUM_ACKS;
1267 		scqattr.cq_sched = NULL;
1268 		scqattr.cq_flags = IBT_CQ_NO_FLAGS;
1269 
1270 		rcqattr.cq_size = MaxDataRecvBuffers;
1271 		rcqattr.cq_sched = NULL;
1272 		rcqattr.cq_flags = IBT_CQ_NO_FLAGS;
1273 
1274 		chanargs.rc_sizes.cs_sq = MaxDataSendBuffers + RDS_NUM_ACKS;
1275 		chanargs.rc_sizes.cs_rq = MaxDataRecvBuffers;
1276 		chanargs.rc_sizes.cs_sq_sgl = 1;
1277 		chanargs.rc_sizes.cs_rq_sgl = 1;
1278 	}
1279 
1280 	mutex_enter(&ep->ep_lock);
1281 	if (ep->ep_sendcq == NULL) {
1282 		/* returned size is always greater than the requested size */
1283 		ret = ibt_alloc_cq(hcap->hca_hdl, &scqattr,
1284 		    &ep->ep_sendcq, NULL);
1285 		if (ret != IBT_SUCCESS) {
1286 			RDS_DPRINTF2(LABEL, "ibt_alloc_cq for sendCQ "
1287 			    "failed, size = %d: %d", scqattr.cq_size, ret);
1288 			mutex_exit(&ep->ep_lock);
1289 			return (NULL);
1290 		}
1291 
1292 		(void) ibt_set_cq_handler(ep->ep_sendcq, rds_sendcq_handler,
1293 		    ep);
1294 
1295 		if (rds_no_interrupts == 0) {
1296 			ret = ibt_enable_cq_notify(ep->ep_sendcq,
1297 			    IBT_NEXT_COMPLETION);
1298 			if (ret != IBT_SUCCESS) {
1299 				RDS_DPRINTF2(LABEL,
1300 				    "ibt_enable_cq_notify failed: %d", ret);
1301 				(void) ibt_free_cq(ep->ep_sendcq);
1302 				ep->ep_sendcq = NULL;
1303 				mutex_exit(&ep->ep_lock);
1304 				return (NULL);
1305 			}
1306 		}
1307 	}
1308 
1309 	if (ep->ep_recvcq == NULL) {
1310 		/* returned size is always greater than the requested size */
1311 		ret = ibt_alloc_cq(hcap->hca_hdl, &rcqattr,
1312 		    &ep->ep_recvcq, NULL);
1313 		if (ret != IBT_SUCCESS) {
1314 			RDS_DPRINTF2(LABEL, "ibt_alloc_cq for recvCQ "
1315 			    "failed, size = %d: %d", rcqattr.cq_size, ret);
1316 			(void) ibt_free_cq(ep->ep_sendcq);
1317 			ep->ep_sendcq = NULL;
1318 			mutex_exit(&ep->ep_lock);
1319 			return (NULL);
1320 		}
1321 
1322 		(void) ibt_set_cq_handler(ep->ep_recvcq, rds_recvcq_handler,
1323 		    ep);
1324 
1325 		ret = ibt_enable_cq_notify(ep->ep_recvcq, rds_wc_signal);
1326 		if (ret != IBT_SUCCESS) {
1327 			RDS_DPRINTF2(LABEL,
1328 			    "ibt_enable_cq_notify failed: %d", ret);
1329 			(void) ibt_free_cq(ep->ep_recvcq);
1330 			ep->ep_recvcq = NULL;
1331 			(void) ibt_free_cq(ep->ep_sendcq);
1332 			ep->ep_sendcq = NULL;
1333 			mutex_exit(&ep->ep_lock);
1334 			return (NULL);
1335 		}
1336 	}
1337 
1338 	chanargs.rc_flags = IBT_ALL_SIGNALED;
1339 	chanargs.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR |
1340 	    IBT_CEP_ATOMIC;
1341 	chanargs.rc_hca_port_num = hca_port;
1342 	chanargs.rc_scq = ep->ep_sendcq;
1343 	chanargs.rc_rcq = ep->ep_recvcq;
1344 	chanargs.rc_pd = hcap->hca_pdhdl;
1345 	chanargs.rc_srq = NULL;
1346 
1347 	ret = ibt_alloc_rc_channel(hcap->hca_hdl,
1348 	    IBT_ACHAN_NO_FLAGS, &chanargs, &chanhdl, NULL);
1349 	if (ret != IBT_SUCCESS) {
1350 		RDS_DPRINTF2(LABEL, "ibt_alloc_rc_channel fail: %d",
1351 		    ret);
1352 		(void) ibt_free_cq(ep->ep_recvcq);
1353 		ep->ep_recvcq = NULL;
1354 		(void) ibt_free_cq(ep->ep_sendcq);
1355 		ep->ep_sendcq = NULL;
1356 		mutex_exit(&ep->ep_lock);
1357 		return (NULL);
1358 	}
1359 	mutex_exit(&ep->ep_lock);
1360 
1361 	/* Chan private should contain the ep */
1362 	(void) ibt_set_chan_private(chanhdl, ep);
1363 
1364 	RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Return: 0x%p", chanhdl);
1365 
1366 	return (chanhdl);
1367 }
1368 
1369 
1370 #if 0
1371 
1372 /* Return node guid given a port gid */
1373 ib_guid_t
1374 rds_gid_to_node_guid(ib_gid_t gid)
1375 {
1376 	ibt_node_info_t	nodeinfo;
1377 	int		ret;
1378 
1379 	RDS_DPRINTF4("rds_gid_to_node_guid", "Enter: gid: %llx:%llx",
1380 	    gid.gid_prefix, gid.gid_guid);
1381 
1382 	ret = ibt_gid_to_node_info(gid, &nodeinfo);
1383 	if (ret != IBT_SUCCESS) {
1384 		RDS_DPRINTF2(LABEL, "ibt_gid_node_info for gid: %llx:%llx "
1385 		    "failed", gid.gid_prefix, gid.gid_guid);
1386 		return (0LL);
1387 	}
1388 
1389 	RDS_DPRINTF4("rds_gid_to_node_guid", "Return: Node guid: %llx",
1390 	    nodeinfo.n_node_guid);
1391 
1392 	return (nodeinfo.n_node_guid);
1393 }
1394 
1395 #endif
1396 
1397 static void
1398 rds_handle_portup_event(rds_state_t *statep, ibt_hca_hdl_t hdl,
1399     ibt_async_event_t *event)
1400 {
1401 	rds_hca_t		*hcap;
1402 	ibt_hca_portinfo_t	*newpinfop, *oldpinfop;
1403 	uint_t			newsize, oldsize, nport;
1404 	ib_gid_t		gid;
1405 	int			ret;
1406 
1407 	RDS_DPRINTF2("rds_handle_portup_event",
1408 	    "Enter: GUID: 0x%llx Statep: %p", event->ev_hca_guid, statep);
1409 
1410 	rw_enter(&statep->rds_hca_lock, RW_WRITER);
1411 
1412 	hcap = statep->rds_hcalistp;
1413 	while ((hcap != NULL) && (hcap->hca_guid != event->ev_hca_guid)) {
1414 		hcap = hcap->hca_nextp;
1415 	}
1416 
1417 	if (hcap == NULL) {
1418 		RDS_DPRINTF2("rds_handle_portup_event", "HCA: 0x%llx is "
1419 		    "not in our list", event->ev_hca_guid);
1420 		rw_exit(&statep->rds_hca_lock);
1421 		return;
1422 	}
1423 
1424 	ret = ibt_query_hca_ports(hdl, 0, &newpinfop, &nport, &newsize);
1425 	if (ret != IBT_SUCCESS) {
1426 		RDS_DPRINTF2(LABEL, "ibt_query_hca_ports failed: %d", ret);
1427 		rw_exit(&statep->rds_hca_lock);
1428 		return;
1429 	}
1430 
1431 	oldpinfop = hcap->hca_pinfop;
1432 	oldsize = hcap->hca_pinfo_sz;
1433 	hcap->hca_pinfop = newpinfop;
1434 	hcap->hca_pinfo_sz = newsize;
1435 
1436 	(void) ibt_free_portinfo(oldpinfop, oldsize);
1437 
1438 	/* If RDS service is not registered then no bind is needed */
1439 	if (statep->rds_srvhdl == NULL) {
1440 		RDS_DPRINTF2("rds_handle_portup_event",
1441 		    "RDS Service is not registered, so no action needed");
1442 		rw_exit(&statep->rds_hca_lock);
1443 		return;
1444 	}
1445 
1446 	/*
1447 	 * If the service was previously bound on this port and
1448 	 * if this port has changed state down and now up, we do not
1449 	 * need to bind the service again. The bind is expected to
1450 	 * persist across state changes. If the service was never bound
1451 	 * before then we bind it this time.
1452 	 */
1453 	if (hcap->hca_bindhdl[event->ev_port - 1] == NULL) {
1454 
1455 		/* structure copy */
1456 		gid = newpinfop[event->ev_port - 1].p_sgid_tbl[0];
1457 
1458 		/* bind RDS service on the port, pass statep as cm_private */
1459 		ret = ibt_bind_service(statep->rds_srvhdl, gid, NULL, statep,
1460 		    &hcap->hca_bindhdl[event->ev_port - 1]);
1461 		if (ret != IBT_SUCCESS) {
1462 			RDS_DPRINTF2("rds_handle_portup_event",
1463 			    "Bind service for HCA: 0x%llx Port: %d "
1464 			    "gid %llx:%llx returned: %d", event->ev_hca_guid,
1465 			    event->ev_port, gid.gid_prefix, gid.gid_guid, ret);
1466 		}
1467 	}
1468 
1469 	rw_exit(&statep->rds_hca_lock);
1470 
1471 	RDS_DPRINTF2("rds_handle_portup_event", "Return: GUID: 0x%llx",
1472 	    event->ev_hca_guid);
1473 }
1474 
1475 static void
1476 rdsib_add_hca(ib_guid_t hca_guid)
1477 {
1478 	rds_hca_t	*hcap;
1479 	ibt_mr_attr_t	mem_attr;
1480 	ibt_mr_desc_t	mem_desc;
1481 	int		ret;
1482 
1483 	RDS_DPRINTF2("rdsib_add_hca", "Enter: GUID: 0x%llx", hca_guid);
1484 
1485 	hcap = rdsib_init_hca(hca_guid);
1486 	if (hcap == NULL)
1487 		return;
1488 
1489 	/* register the recv memory with this hca */
1490 	mutex_enter(&rds_dpool.pool_lock);
1491 	if (rds_dpool.pool_memp == NULL) {
1492 		/* no memory to register */
1493 		RDS_DPRINTF2("rdsib_add_hca", "No memory to register");
1494 		mutex_exit(&rds_dpool.pool_lock);
1495 		return;
1496 	}
1497 
1498 	mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)rds_dpool.pool_memp;
1499 	mem_attr.mr_len = rds_dpool.pool_memsize;
1500 	mem_attr.mr_as = NULL;
1501 	mem_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE;
1502 
1503 	ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, &mem_attr,
1504 	    &hcap->hca_mrhdl, &mem_desc);
1505 
1506 	mutex_exit(&rds_dpool.pool_lock);
1507 
1508 	if (ret != IBT_SUCCESS) {
1509 		RDS_DPRINTF2("rdsib_add_hca", "ibt_register_mr failed: %d",
1510 		    ret);
1511 	} else {
1512 		rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER);
1513 		hcap->hca_state = RDS_HCA_STATE_MEM_REGISTERED;
1514 		hcap->hca_lkey = mem_desc.md_lkey;
1515 		hcap->hca_rkey = mem_desc.md_rkey;
1516 		rw_exit(&rdsib_statep->rds_hca_lock);
1517 	}
1518 
1519 	RDS_DPRINTF2("rdsib_add_hca", "Retrun: GUID: 0x%llx", hca_guid);
1520 }
1521 
1522 void rds_close_this_session(rds_session_t *sp, uint8_t wait);
1523 int rds_post_control_message(rds_session_t *sp, uint8_t code, in_port_t port);
1524 
1525 static void
1526 rdsib_del_hca(rds_state_t *statep, ib_guid_t hca_guid)
1527 {
1528 	rds_session_t	*sp;
1529 	rds_hca_t	*hcap;
1530 	rds_hca_state_t	saved_state;
1531 	int		ret, ix;
1532 
1533 	RDS_DPRINTF2("rdsib_del_hca", "Enter: GUID: 0x%llx", hca_guid);
1534 
1535 	/*
1536 	 * This should be a write lock as we don't want anyone to get access
1537 	 * to the hcap while we are modifing its contents
1538 	 */
1539 	rw_enter(&statep->rds_hca_lock, RW_WRITER);
1540 
1541 	hcap = statep->rds_hcalistp;
1542 	while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) {
1543 		hcap = hcap->hca_nextp;
1544 	}
1545 
1546 	/* Prevent initiating any new activity on this HCA */
1547 	ASSERT(hcap != NULL);
1548 	saved_state = hcap->hca_state;
1549 	hcap->hca_state = RDS_HCA_STATE_STOPPING;
1550 
1551 	rw_exit(&statep->rds_hca_lock);
1552 
1553 	/*
1554 	 * stop the outgoing traffic and close any active sessions on this hca.
1555 	 * Any pending messages in the SQ will be allowed to complete.
1556 	 */
1557 	rw_enter(&statep->rds_sessionlock, RW_READER);
1558 	sp = statep->rds_sessionlistp;
1559 	while (sp) {
1560 		if (sp->session_hca_guid != hca_guid) {
1561 			sp = sp->session_nextp;
1562 			continue;
1563 		}
1564 
1565 		rw_enter(&sp->session_lock, RW_WRITER);
1566 		RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp,
1567 		    sp->session_state);
1568 		/*
1569 		 * We are changing the session state in advance. This prevents
1570 		 * further messages to be posted to the SQ. We then
1571 		 * send a control message to the remote and tell it close
1572 		 * the session.
1573 		 */
1574 		sp->session_state = RDS_SESSION_STATE_HCA_CLOSING;
1575 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
1576 		    "RDS_SESSION_STATE_PASSIVE_CLOSING", sp);
1577 		rw_exit(&sp->session_lock);
1578 
1579 		/*
1580 		 * wait until the sendq is empty then tell the remote to
1581 		 * close this session. This enables for graceful shutdown of
1582 		 * the session
1583 		 */
1584 		rds_is_sendq_empty(&sp->session_dataep, 2);
1585 		(void) rds_post_control_message(sp,
1586 		    RDS_CTRL_CODE_CLOSE_SESSION, 0);
1587 
1588 		sp = sp->session_nextp;
1589 	}
1590 
1591 	/* wait until all the sessions are off this HCA */
1592 	sp = statep->rds_sessionlistp;
1593 	while (sp) {
1594 		if (sp->session_hca_guid != hca_guid) {
1595 			sp = sp->session_nextp;
1596 			continue;
1597 		}
1598 
1599 		rw_enter(&sp->session_lock, RW_READER);
1600 		RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp,
1601 		    sp->session_state);
1602 
1603 		while ((sp->session_state == RDS_SESSION_STATE_HCA_CLOSING) ||
1604 		    (sp->session_state == RDS_SESSION_STATE_ERROR) ||
1605 		    (sp->session_state == RDS_SESSION_STATE_PASSIVE_CLOSING) ||
1606 		    (sp->session_state == RDS_SESSION_STATE_CLOSED)) {
1607 			rw_exit(&sp->session_lock);
1608 			delay(drv_usectohz(1000000));
1609 			rw_enter(&sp->session_lock, RW_READER);
1610 			RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp,
1611 			    sp->session_state);
1612 		}
1613 
1614 		rw_exit(&sp->session_lock);
1615 
1616 		sp = sp->session_nextp;
1617 	}
1618 	rw_exit(&statep->rds_sessionlock);
1619 
1620 	/*
1621 	 * if rdsib_close_ib was called before this, then that would have
1622 	 * unbound the service on all ports. In that case, the HCA structs
1623 	 * will contain stale bindhdls. Hence, we do not call unbind unless
1624 	 * the service is still registered.
1625 	 */
1626 	if (statep->rds_srvhdl != NULL) {
1627 		/* unbind RDS service on all ports on this HCA */
1628 		for (ix = 0; ix < hcap->hca_nports; ix++) {
1629 			if (hcap->hca_bindhdl[ix] == NULL) {
1630 				continue;
1631 			}
1632 
1633 			RDS_DPRINTF2("rdsib_del_hca",
1634 			    "Unbinding Service: port: %d, bindhdl: %p",
1635 			    ix + 1, hcap->hca_bindhdl[ix]);
1636 			(void) ibt_unbind_service(rdsib_statep->rds_srvhdl,
1637 			    hcap->hca_bindhdl[ix]);
1638 			hcap->hca_bindhdl[ix] = NULL;
1639 		}
1640 	}
1641 
1642 	RDS_DPRINTF2("rdsib_del_hca", "HCA(%p) State: %d", hcap,
1643 	    hcap->hca_state);
1644 
1645 	switch (saved_state) {
1646 	case RDS_HCA_STATE_MEM_REGISTERED:
1647 		ASSERT(hcap->hca_mrhdl != NULL);
1648 		ret = ibt_deregister_mr(hcap->hca_hdl, hcap->hca_mrhdl);
1649 		if (ret != IBT_SUCCESS) {
1650 			RDS_DPRINTF2("rdsib_del_hca",
1651 			    "ibt_deregister_mr failed: %d", ret);
1652 			return;
1653 		}
1654 		hcap->hca_mrhdl = NULL;
1655 		/* FALLTHRU */
1656 	case RDS_HCA_STATE_OPEN:
1657 		ASSERT(hcap->hca_hdl != NULL);
1658 		ASSERT(hcap->hca_pdhdl != NULL);
1659 
1660 
1661 		ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl);
1662 		if (ret != IBT_SUCCESS) {
1663 			RDS_DPRINTF2("rdsib_del_hca",
1664 			    "ibt_free_pd failed: %d", ret);
1665 		}
1666 
1667 		(void) ibt_free_portinfo(hcap->hca_pinfop, hcap->hca_pinfo_sz);
1668 
1669 		ret = ibt_close_hca(hcap->hca_hdl);
1670 		if (ret != IBT_SUCCESS) {
1671 			RDS_DPRINTF2("rdsib_del_hca",
1672 			    "ibt_close_hca failed: %d", ret);
1673 		}
1674 
1675 		hcap->hca_hdl = NULL;
1676 		hcap->hca_pdhdl = NULL;
1677 		hcap->hca_lkey = 0;
1678 		hcap->hca_rkey = 0;
1679 	}
1680 
1681 	/*
1682 	 * This should be a write lock as we don't want anyone to get access
1683 	 * to the hcap while we are modifing its contents
1684 	 */
1685 	rw_enter(&statep->rds_hca_lock, RW_WRITER);
1686 	hcap->hca_state = RDS_HCA_STATE_REMOVED;
1687 	rw_exit(&statep->rds_hca_lock);
1688 
1689 	RDS_DPRINTF2("rdsib_del_hca", "Return: GUID: 0x%llx", hca_guid);
1690 }
1691 
1692 static void
1693 rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code,
1694     ibt_async_event_t *event)
1695 {
1696 	rds_state_t		*statep = (rds_state_t *)clntp;
1697 
1698 	RDS_DPRINTF2("rds_async_handler", "Async code: %d", code);
1699 
1700 	switch (code) {
1701 	case IBT_EVENT_PORT_UP:
1702 		rds_handle_portup_event(statep, hdl, event);
1703 		break;
1704 	case IBT_HCA_ATTACH_EVENT:
1705 		/*
1706 		 * NOTE: In some error recovery paths, it is possible to
1707 		 * receive IBT_HCA_ATTACH_EVENTs on already known HCAs.
1708 		 */
1709 		(void) rdsib_add_hca(event->ev_hca_guid);
1710 		break;
1711 	case IBT_HCA_DETACH_EVENT:
1712 		(void) rdsib_del_hca(statep, event->ev_hca_guid);
1713 		break;
1714 
1715 	default:
1716 		RDS_DPRINTF2(LABEL, "Async event: %d not handled", code);
1717 	}
1718 
1719 	RDS_DPRINTF2("rds_async_handler", "Return: code: %d", code);
1720 }
1721