xref: /illumos-gate/usr/src/uts/common/io/ib/clients/iser/iser_ib.c (revision cf8c0ebaf84c824d8f14486e47457119c138ce3c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/ddi.h>
28 #include <sys/types.h>
29 #include <sys/socket.h>
30 #include <netinet/in.h>
31 #include <sys/sunddi.h>
32 #include <sys/sysmacros.h>
33 #include <sys/iscsi_protocol.h>
34 
35 #include <sys/ib/clients/iser/iser.h>
36 #include <sys/ib/clients/iser/iser_idm.h>
37 
38 /*
39  * iser_ib.c
40  * Routines for InfiniBand transport for iSER
41  *
42  * This file contains the routines to interface with the IBT API to attach and
43  * allocate IB resources, handle async events, and post recv work requests.
44  *
45  */
46 
47 static iser_hca_t *iser_ib_gid2hca(ib_gid_t gid);
48 static iser_hca_t *iser_ib_guid2hca(ib_guid_t guid);
49 
50 static iser_hca_t *iser_ib_alloc_hca(ib_guid_t guid);
51 static int iser_ib_free_hca(iser_hca_t *hca);
52 static int iser_ib_update_hcaports(iser_hca_t *hca);
53 static int iser_ib_init_hcas(void);
54 static int iser_ib_fini_hcas(void);
55 
56 static iser_sbind_t *iser_ib_get_bind(
57     iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid);
58 static int iser_ib_activate_port(
59     idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid);
60 static void iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid);
61 
62 static void iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size);
63 static void iser_ib_fini_qp(iser_qp_t *qp);
64 
65 static int iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size,
66     ibt_cq_hdl_t *cq_hdl);
67 
68 static void iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl,
69     ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size,
70     ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs);
71 
72 static void iser_ib_handle_portup_event(ibt_hca_hdl_t hdl,
73     ibt_async_event_t *event);
74 static void iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl,
75     ibt_async_event_t *event);
76 static void iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl,
77     ibt_async_event_t *event);
78 
79 static void iser_ib_post_recv_task(void *arg);
80 
81 static struct ibt_clnt_modinfo_s iser_ib_modinfo = {
82 	IBTI_V_CURR,
83 	IBT_STORAGE_DEV,
84 	iser_ib_async_handler,
85 	NULL,
86 	"iSER"
87 };
88 
89 /*
90  * iser_ib_init
91  *
92  * This function registers the HCA drivers with IBTF and registers and binds
93  * iSER as a service with IBTF.
94  */
95 int
96 iser_ib_init(void)
97 {
98 	int		status;
99 
100 	/* Register with IBTF */
101 	status = ibt_attach(&iser_ib_modinfo, iser_state->is_dip, iser_state,
102 	    &iser_state->is_ibhdl);
103 	if (status != DDI_SUCCESS) {
104 		ISER_LOG(CE_NOTE, "iser_ib_init: ibt_attach failed (0x%x)",
105 		    status);
106 		return (DDI_FAILURE);
107 	}
108 
109 	/* Create the global work request kmem_cache */
110 	iser_state->iser_wr_cache = kmem_cache_create("iser_wr_cache",
111 	    sizeof (iser_wr_t), 0, NULL, NULL, NULL,
112 	    iser_state, NULL, KM_SLEEP);
113 
114 	/* Populate our list of HCAs */
115 	status = iser_ib_init_hcas();
116 	if (status != DDI_SUCCESS) {
117 		/* HCAs failed to initialize, tear it down */
118 		kmem_cache_destroy(iser_state->iser_wr_cache);
119 		(void) ibt_detach(iser_state->is_ibhdl);
120 		iser_state->is_ibhdl = NULL;
121 		ISER_LOG(CE_NOTE, "iser_ib_init: failed to initialize HCAs");
122 		return (DDI_FAILURE);
123 	}
124 
125 	/* Target will register iSER as a service with IBTF when required */
126 
127 	/* Target will bind this service when it comes online */
128 
129 	return (DDI_SUCCESS);
130 }
131 
132 /*
133  * iser_ib_fini
134  *
135  * This function unbinds and degisters the iSER service from IBTF
136  */
137 int
138 iser_ib_fini(void)
139 {
140 	/* IDM would have already disabled all the services */
141 
142 	/* Teardown the HCA list and associated resources */
143 	if (iser_ib_fini_hcas() != DDI_SUCCESS)
144 		return (DDI_FAILURE);
145 
146 	/* Teardown the global work request kmem_cache */
147 	kmem_cache_destroy(iser_state->iser_wr_cache);
148 
149 	/* Deregister with IBTF */
150 	if (iser_state->is_ibhdl != NULL) {
151 		(void) ibt_detach(iser_state->is_ibhdl);
152 		iser_state->is_ibhdl = NULL;
153 	}
154 
155 	return (DDI_SUCCESS);
156 }
157 
158 /*
159  * iser_ib_register_service
160  *
161  * This function registers the iSER service using the RDMA-Aware Service ID.
162  */
163 int
164 iser_ib_register_service(idm_svc_t *idm_svc)
165 {
166 	ibt_srv_desc_t	srvdesc;
167 	iser_svc_t	*iser_svc;
168 	int		status;
169 
170 	bzero(&srvdesc, sizeof (ibt_srv_desc_t));
171 
172 	/* Set up IBTI client callback handler from the CM */
173 	srvdesc.sd_handler = iser_ib_cm_handler;
174 
175 	srvdesc.sd_flags = IBT_SRV_NO_FLAGS;
176 
177 	iser_svc = (iser_svc_t *)idm_svc->is_iser_svc;
178 
179 	/* Register the service on the specified port */
180 	status = ibt_register_service(
181 	    iser_state->is_ibhdl, &srvdesc,
182 	    iser_svc->is_svcid, 1, &iser_svc->is_srvhdl, NULL);
183 
184 	return (status);
185 }
186 
187 /*
188  * iser_ib_bind_service
189  *
190  * This function binds a given iSER service on all available HCA ports
191  */
192 int
193 iser_ib_bind_service(idm_svc_t *idm_svc)
194 {
195 	iser_hca_t	*hca;
196 	ib_gid_t	gid;
197 	int		num_ports = 0;
198 	int		num_binds = 0;
199 	int		status;
200 	int		i;
201 
202 	ASSERT(idm_svc != NULL);
203 	ASSERT(idm_svc->is_iser_svc != NULL);
204 
205 	/* Register the iSER service on all available ports */
206 	mutex_enter(&iser_state->is_hcalist_lock);
207 
208 	for (hca = list_head(&iser_state->is_hcalist);
209 	    hca != NULL;
210 	    hca = list_next(&iser_state->is_hcalist, hca)) {
211 
212 		for (i = 0; i < hca->hca_num_ports; i++) {
213 			num_ports++;
214 			if (hca->hca_port_info[i].p_linkstate !=
215 			    IBT_PORT_ACTIVE) {
216 				/*
217 				 * Move on. We will attempt to bind service
218 				 * in our async handler if the port comes up
219 				 * at a later time.
220 				 */
221 				continue;
222 			}
223 
224 			gid = hca->hca_port_info[i].p_sgid_tbl[0];
225 
226 			/* If the port is already bound, skip */
227 			if (iser_ib_get_bind(
228 			    idm_svc->is_iser_svc, hca->hca_guid, gid) == NULL) {
229 
230 				status = iser_ib_activate_port(
231 				    idm_svc, hca->hca_guid, gid);
232 				if (status != IBT_SUCCESS) {
233 					ISER_LOG(CE_NOTE,
234 					    "iser_ib_bind_service: "
235 					    "iser_ib_activate_port failure "
236 					    "(0x%x)", status);
237 					continue;
238 				}
239 			}
240 			num_binds++;
241 		}
242 	}
243 	mutex_exit(&iser_state->is_hcalist_lock);
244 
245 	if (num_binds) {
246 		ISER_LOG(CE_NOTE, "iser_ib_bind_service: Service available on "
247 		    "(%d) of (%d) ports", num_binds, num_ports);
248 		return (ISER_STATUS_SUCCESS);
249 	} else {
250 		ISER_LOG(CE_NOTE, "iser_ib_bind_service: Did not bind service");
251 		return (ISER_STATUS_FAIL);
252 	}
253 }
254 
255 /*
256  * iser_ib_unbind_service
257  *
258  * This function unbinds a given service on a all HCA ports
259  */
260 void
261 iser_ib_unbind_service(idm_svc_t *idm_svc)
262 {
263 	iser_svc_t	*iser_svc;
264 	iser_sbind_t	*is_sbind, *next_sb;
265 
266 	if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) {
267 
268 		iser_svc = idm_svc->is_iser_svc;
269 
270 		for (is_sbind = list_head(&iser_svc->is_sbindlist);
271 		    is_sbind != NULL;
272 		    is_sbind = next_sb) {
273 			next_sb = list_next(&iser_svc->is_sbindlist, is_sbind);
274 			ibt_unbind_service(iser_svc->is_srvhdl,
275 			    is_sbind->is_sbindhdl);
276 			list_remove(&iser_svc->is_sbindlist, is_sbind);
277 			kmem_free(is_sbind, sizeof (iser_sbind_t));
278 		}
279 	}
280 }
281 
282 /* ARGSUSED */
283 void
284 iser_ib_deregister_service(idm_svc_t *idm_svc)
285 {
286 	iser_svc_t	*iser_svc;
287 
288 	if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) {
289 
290 		iser_svc = (iser_svc_t *)idm_svc->is_iser_svc;
291 		ibt_deregister_service(iser_state->is_ibhdl,
292 		    iser_svc->is_srvhdl);
293 		ibt_release_ip_sid(iser_svc->is_svcid);
294 	}
295 }
296 
297 /*
298  * iser_ib_get_paths
299  * This function finds the IB path between the local and the remote address.
300  *
301  */
302 int
303 iser_ib_get_paths(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip,
304     ibt_path_info_t *path, ibt_path_ip_src_t *path_src_ip)
305 {
306 	ibt_ip_path_attr_t	ipattr;
307 	int			status;
308 
309 	(void) bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
310 	ipattr.ipa_dst_ip	= remote_ip;
311 	ipattr.ipa_src_ip	= *local_ip;
312 	ipattr.ipa_max_paths	= 1;
313 	ipattr.ipa_ndst		= 1;
314 
315 	(void) bzero(path, sizeof (ibt_path_info_t));
316 	status = ibt_get_ip_paths(iser_state->is_ibhdl, IBT_PATH_NO_FLAGS,
317 	    &ipattr, path, NULL, path_src_ip);
318 	if (status != IBT_SUCCESS) {
319 		ISER_LOG(CE_NOTE, "ibt_get_ip_paths: ibt_get_ip_paths "
320 		    "failure: status (%d)", status);
321 		return (status);
322 	}
323 
324 	if (local_ip != NULL) {
325 		ISER_LOG(CE_NOTE, "iser_ib_get_paths success: IP[%x to %x]",
326 		    local_ip->un.ip4addr, remote_ip->un.ip4addr);
327 	} else {
328 		ISER_LOG(CE_NOTE, "iser_ib_get_paths success: "
329 		    "IP[INADDR_ANY to %x]", remote_ip->un.ip4addr);
330 	}
331 
332 	return (ISER_STATUS_SUCCESS);
333 }
334 
335 /*
336  * iser_ib_alloc_rc_channel
337  *
338  * This function allocates a reliable communication channel using the specified
339  * channel attributes.
340  */
341 iser_chan_t *
342 iser_ib_alloc_rc_channel(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip)
343 {
344 
345 	iser_chan_t			*chan;
346 	ib_gid_t			lgid;
347 	uint8_t				hca_port; /* from path */
348 	iser_hca_t			*hca;
349 	ibt_path_ip_src_t		path_src_ip;
350 	ibt_rc_chan_alloc_args_t	chanargs;
351 	uint_t				sq_size, rq_size;
352 	int				status;
353 
354 	chan = kmem_zalloc(sizeof (iser_chan_t), KM_SLEEP);
355 
356 	mutex_init(&chan->ic_lock, NULL, MUTEX_DRIVER, NULL);
357 	mutex_init(&chan->ic_sq_post_lock, NULL, MUTEX_DRIVER, NULL);
358 
359 	/* Lookup a path to the given destination */
360 	status = iser_ib_get_paths(local_ip, remote_ip, &chan->ic_ibt_path,
361 	    &path_src_ip);
362 
363 	if (status != ISER_STATUS_SUCCESS) {
364 		ISER_LOG(CE_NOTE, "iser_ib_get_paths failed: status (%d)",
365 		    status);
366 		mutex_destroy(&chan->ic_lock);
367 		mutex_destroy(&chan->ic_sq_post_lock);
368 		kmem_free(chan, sizeof (iser_chan_t));
369 		return (NULL);
370 	}
371 
372 	/* get the local gid from the path info */
373 	lgid = chan->ic_ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid;
374 
375 	/* get the hca port from the path info */
376 	hca_port = chan->ic_ibt_path.pi_prim_cep_path.cep_hca_port_num;
377 
378 	/* Lookup the hca using the gid in the path info */
379 	hca = iser_ib_gid2hca(lgid);
380 	if (hca == NULL) {
381 		ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed "
382 		    "to lookup HCA handle");
383 		mutex_destroy(&chan->ic_lock);
384 		mutex_destroy(&chan->ic_sq_post_lock);
385 		kmem_free(chan, sizeof (iser_chan_t));
386 		return (NULL);
387 	}
388 
389 	/* Set up the iSER channel handle with HCA and IP data */
390 	chan->ic_hca		= hca;
391 	chan->ic_localip	= path_src_ip.ip_primary;
392 	chan->ic_remoteip	= *remote_ip;
393 
394 	/*
395 	 * Determine the queue sizes, based upon the HCA query data.
396 	 * For our Work Queues, we will use either our default value,
397 	 * or the HCA's maximum value, whichever is smaller.
398 	 */
399 	sq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_SENDQ_SIZE);
400 	rq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_RECVQ_SIZE);
401 
402 	/*
403 	 * For our Completion Queues, we again check the device maximum.
404 	 * We want to end up with CQs that are the next size up from the
405 	 * WQs they are servicing so that they have some overhead.
406 	 */
407 	if (hca->hca_attr.hca_max_cq_sz >= (sq_size + 1)) {
408 		chan->ic_sendcq_sz = sq_size + 1;
409 	} else {
410 		chan->ic_sendcq_sz = hca->hca_attr.hca_max_cq_sz;
411 		sq_size = chan->ic_sendcq_sz - 1;
412 	}
413 
414 	if (hca->hca_attr.hca_max_cq_sz >= (rq_size + 1)) {
415 		chan->ic_recvcq_sz = rq_size + 1;
416 	} else {
417 		chan->ic_recvcq_sz = hca->hca_attr.hca_max_cq_sz;
418 		rq_size = chan->ic_recvcq_sz - 1;
419 	}
420 
421 	/* Initialize the iSER channel's QP handle */
422 	iser_ib_init_qp(chan, sq_size, rq_size);
423 
424 	/* Set up the Send Completion Queue */
425 	status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_sendcq_sz,
426 	    &chan->ic_sendcq);
427 	if (status != ISER_STATUS_SUCCESS) {
428 		iser_ib_fini_qp(&chan->ic_qp);
429 		mutex_destroy(&chan->ic_lock);
430 		mutex_destroy(&chan->ic_sq_post_lock);
431 		kmem_free(chan, sizeof (iser_chan_t));
432 		return (NULL);
433 	}
434 	ibt_set_cq_handler(chan->ic_sendcq, iser_ib_sendcq_handler, chan);
435 	ibt_enable_cq_notify(chan->ic_sendcq, IBT_NEXT_COMPLETION);
436 
437 	/* Set up the Receive Completion Queue */
438 	status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_recvcq_sz,
439 	    &chan->ic_recvcq);
440 	if (status != ISER_STATUS_SUCCESS) {
441 		(void) ibt_free_cq(chan->ic_sendcq);
442 		iser_ib_fini_qp(&chan->ic_qp);
443 		mutex_destroy(&chan->ic_lock);
444 		mutex_destroy(&chan->ic_sq_post_lock);
445 		kmem_free(chan, sizeof (iser_chan_t));
446 		return (NULL);
447 	}
448 	ibt_set_cq_handler(chan->ic_recvcq, iser_ib_recvcq_handler, chan);
449 	ibt_enable_cq_notify(chan->ic_recvcq, IBT_NEXT_COMPLETION);
450 
451 	/* Setup the channel arguments */
452 	iser_ib_setup_chanargs(hca_port, chan->ic_sendcq, chan->ic_recvcq,
453 	    sq_size, rq_size, hca->hca_pdhdl, &chanargs);
454 
455 	status = ibt_alloc_rc_channel(hca->hca_hdl,
456 	    IBT_ACHAN_NO_FLAGS, &chanargs, &chan->ic_chanhdl, NULL);
457 	if (status != IBT_SUCCESS) {
458 		ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed "
459 		    "ibt_alloc_rc_channel: status (%d)", status);
460 		(void) ibt_free_cq(chan->ic_sendcq);
461 		(void) ibt_free_cq(chan->ic_recvcq);
462 		iser_ib_fini_qp(&chan->ic_qp);
463 		mutex_destroy(&chan->ic_lock);
464 		mutex_destroy(&chan->ic_sq_post_lock);
465 		kmem_free(chan, sizeof (iser_chan_t));
466 		return (NULL);
467 	}
468 
469 	/* Set the 'channel' as the client private data */
470 	(void) ibt_set_chan_private(chan->ic_chanhdl, chan);
471 
472 	ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel success: "
473 	    "chanhdl (0x%p), IP:[%llx to %llx], lgid (%llx:%llx), HCA(%llx) %d",
474 	    (void *)chan->ic_chanhdl,
475 	    (longlong_t)local_ip->un.ip4addr,
476 	    (longlong_t)remote_ip->un.ip4addr,
477 	    (longlong_t)lgid.gid_prefix, (longlong_t)lgid.gid_guid,
478 	    (longlong_t)hca->hca_guid, hca_port);
479 
480 	return (chan);
481 }
482 
483 /*
484  * iser_ib_open_rc_channel
485  * This function opens a RC connection on the given allocated RC channel
486  */
487 int
488 iser_ib_open_rc_channel(iser_chan_t *chan)
489 {
490 	ibt_ip_cm_info_t	ipcm_info;
491 	iser_private_data_t	iser_priv_data;
492 	ibt_chan_open_args_t	ocargs;
493 	ibt_rc_returns_t	ocreturns;
494 	int			status;
495 
496 	mutex_enter(&chan->ic_lock);
497 
498 	/*
499 	 * For connection establishment, the initiator sends a CM REQ using the
500 	 * iSER RDMA-Aware Service ID. Included are the source and destination
501 	 * IP addresses, and the src port.
502 	 */
503 	bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t));
504 	ipcm_info.src_addr = chan->ic_localip;
505 	ipcm_info.dst_addr = chan->ic_remoteip;
506 	ipcm_info.src_port = chan->ic_lport;
507 
508 	/*
509 	 * The CM Private Data field defines the iSER connection parameters
510 	 * such as zero based virtual address exception (ZBVAE) and Send with
511 	 * invalidate Exception (SIE).
512 	 *
513 	 * Solaris IBT does not currently support ZBVAE or SIE.
514 	 */
515 	iser_priv_data.rsvd1	= 0;
516 	iser_priv_data.sie	= 1;
517 	iser_priv_data.zbvae	= 1;
518 
519 	status = ibt_format_ip_private_data(&ipcm_info,
520 	    sizeof (iser_private_data_t), &iser_priv_data);
521 	if (status != IBT_SUCCESS) {
522 		ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status);
523 		mutex_exit(&chan->ic_lock);
524 		return (status);
525 	}
526 
527 	/*
528 	 * Set the SID we are attempting to connect to, based upon the
529 	 * remote port number.
530 	 */
531 	chan->ic_ibt_path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, chan->ic_rport);
532 
533 	/* Set up the args for the channel open */
534 	bzero(&ocargs, sizeof (ibt_chan_open_args_t));
535 	ocargs.oc_path			= &chan->ic_ibt_path;
536 	ocargs.oc_cm_handler		= iser_ib_cm_handler;
537 	ocargs.oc_cm_clnt_private	= iser_state;
538 	ocargs.oc_rdma_ra_out		= 4;
539 	ocargs.oc_rdma_ra_in		= 4;
540 	ocargs.oc_path_retry_cnt	= 2;
541 	ocargs.oc_path_rnr_retry_cnt	= 2;
542 	ocargs.oc_priv_data_len		= sizeof (iser_private_data_t);
543 	ocargs.oc_priv_data		= &iser_priv_data;
544 
545 	bzero(&ocreturns, sizeof (ibt_rc_returns_t));
546 
547 	status = ibt_open_rc_channel(chan->ic_chanhdl,
548 	    IBT_OCHAN_NO_FLAGS, IBT_BLOCKING, &ocargs, &ocreturns);
549 
550 	if (status != IBT_SUCCESS) {
551 		ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status);
552 		mutex_exit(&chan->ic_lock);
553 		return (status);
554 	}
555 
556 	mutex_exit(&chan->ic_lock);
557 	return (IDM_STATUS_SUCCESS);
558 }
559 
560 /*
561  * iser_ib_close_rc_channel
562  * This function closes the RC channel related to this iser_chan handle.
563  * We invoke this in a non-blocking, no callbacks context.
564  */
565 void
566 iser_ib_close_rc_channel(iser_chan_t *chan)
567 {
568 	int			status;
569 
570 	mutex_enter(&chan->ic_lock);
571 	status = ibt_close_rc_channel(chan->ic_chanhdl, IBT_BLOCKING, NULL,
572 	    0, NULL, NULL, 0);
573 	if (status != IBT_SUCCESS) {
574 		ISER_LOG(CE_NOTE, "iser_ib_close_rc_channel: "
575 		    "ibt_close_rc_channel failed: status (%d)", status);
576 	}
577 	mutex_exit(&chan->ic_lock);
578 }
579 
580 /*
581  * iser_ib_free_rc_channel
582  *
583  * This function tears down an RC channel's QP initialization and frees it.
584  * Note that we do not need synchronization here; the channel has been
585  * closed already, so we should only have completion polling occuring.  Once
586  * complete, we are free to free the IBTF channel, WQ and CQ resources, and
587  * our own related resources.
588  */
589 void
590 iser_ib_free_rc_channel(iser_chan_t *chan)
591 {
592 	iser_qp_t	*iser_qp;
593 
594 	iser_qp = &chan->ic_qp;
595 
596 	/* Ensure the SQ is empty */
597 	while (chan->ic_sq_post_count != 0) {
598 		mutex_exit(&chan->ic_conn->ic_lock);
599 		delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
600 		mutex_enter(&chan->ic_conn->ic_lock);
601 	}
602 	mutex_destroy(&chan->ic_sq_post_lock);
603 
604 	/* Ensure the RQ is empty */
605 	(void) ibt_flush_channel(chan->ic_chanhdl);
606 	mutex_enter(&iser_qp->qp_lock);
607 	while (iser_qp->rq_level != 0) {
608 		mutex_exit(&iser_qp->qp_lock);
609 		mutex_exit(&chan->ic_conn->ic_lock);
610 		delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
611 		mutex_enter(&chan->ic_conn->ic_lock);
612 		mutex_enter(&iser_qp->qp_lock);
613 	}
614 
615 	/* Free our QP handle */
616 	mutex_exit(&iser_qp->qp_lock);
617 	(void) iser_ib_fini_qp(iser_qp);
618 
619 	/* Free the IBT channel resources */
620 	(void) ibt_free_channel(chan->ic_chanhdl);
621 	chan->ic_chanhdl = NULL;
622 
623 	/* Free the CQs */
624 	ibt_free_cq(chan->ic_sendcq);
625 	ibt_free_cq(chan->ic_recvcq);
626 
627 	/* Free the chan handle */
628 	mutex_destroy(&chan->ic_lock);
629 	kmem_free(chan, sizeof (iser_chan_t));
630 }
631 
632 /*
633  * iser_ib_post_recv
634  *
635  * This function handles keeping the RQ full on a given channel.
636  * This routine will mostly be run on a taskq, and will check the
637  * current fill level of the RQ, and post as many WRs as necessary
638  * to fill it again.
639  */
640 
641 int
642 iser_ib_post_recv_async(ibt_channel_hdl_t chanhdl)
643 {
644 	iser_chan_t	*chan;
645 	int		status;
646 
647 	/* Pull our iSER channel handle from the private data */
648 	chan = (iser_chan_t *)ibt_get_chan_private(chanhdl);
649 
650 	idm_conn_hold(chan->ic_conn->ic_idmc);
651 	status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv_task,
652 	    (void *)chanhdl, DDI_NOSLEEP);
653 	if (status != DDI_SUCCESS) {
654 		idm_conn_rele(chan->ic_conn->ic_idmc);
655 	}
656 
657 	return (status);
658 }
659 
660 static void
661 iser_ib_post_recv_task(void *arg)
662 {
663 	ibt_channel_hdl_t	chanhdl = arg;
664 	iser_chan_t		*chan;
665 
666 	/* Pull our iSER channel handle from the private data */
667 	chan = (iser_chan_t *)ibt_get_chan_private(chanhdl);
668 
669 	iser_ib_post_recv(chanhdl);
670 	idm_conn_rele(chan->ic_conn->ic_idmc);
671 }
672 
673 void
674 iser_ib_post_recv(ibt_channel_hdl_t chanhdl)
675 {
676 	iser_chan_t	*chan;
677 	iser_hca_t	*hca;
678 	iser_msg_t	*msg;
679 	ibt_recv_wr_t	*wrlist, wr[ISER_IB_RQ_POST_MAX];
680 	int		rq_space, msg_ret;
681 	int		total_num, npost;
682 	uint_t		nposted;
683 	int		status, i;
684 	iser_qp_t	*iser_qp;
685 	ib_gid_t	lgid;
686 
687 	/* Pull our iSER channel handle from the private data */
688 	chan = (iser_chan_t *)ibt_get_chan_private(chanhdl);
689 
690 	ASSERT(chan != NULL);
691 
692 	mutex_enter(&chan->ic_conn->ic_lock);
693 
694 	/* Bail out if the connection is closed; no need for more recv WRs */
695 	if ((chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSING) ||
696 	    (chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSED)) {
697 		mutex_exit(&chan->ic_conn->ic_lock);
698 		return;
699 	}
700 
701 	/* get the QP handle from the iser_chan */
702 	iser_qp = &chan->ic_qp;
703 
704 	/* get the local gid from the path info */
705 	lgid = chan->ic_ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid;
706 
707 	/* get the hca port from the path info */
708 	hca = iser_ib_gid2hca(lgid);
709 	if (hca == NULL) {
710 		ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to retrieve "
711 		    "HCA handle");
712 		mutex_exit(&chan->ic_conn->ic_lock);
713 		return;
714 	}
715 
716 	/* check for space to post on the RQ */
717 	mutex_enter(&iser_qp->qp_lock);
718 	rq_space = iser_qp->rq_depth - iser_qp->rq_level;
719 	if (rq_space == 0) {
720 		/* The RQ is full, clear the pending flag and return */
721 		iser_qp->rq_taskqpending = B_FALSE;
722 		mutex_exit(&iser_qp->qp_lock);
723 		mutex_exit(&chan->ic_conn->ic_lock);
724 		return;
725 	}
726 
727 	/* Keep track of the lowest value for rq_min_post_level */
728 	if (iser_qp->rq_level < iser_qp->rq_min_post_level)
729 		iser_qp->rq_min_post_level = iser_qp->rq_level;
730 
731 	mutex_exit(&iser_qp->qp_lock);
732 
733 	/* we've room to post, so pull from the msg cache */
734 	msg = iser_msg_get(hca, rq_space, &msg_ret);
735 	if (msg == NULL) {
736 		ISER_LOG(CE_NOTE, "iser_ib_post_recv: no message handles "
737 		    "available in msg cache currently");
738 		/*
739 		 * There are no messages on the cache. Wait a half-
740 		 * second, then try again.
741 		 */
742 		delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
743 		status = iser_ib_post_recv_async(chanhdl);
744 		if (status != DDI_SUCCESS) {
745 			ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to "
746 			    "redispatch routine");
747 			/* Failed to dispatch, clear pending flag */
748 			mutex_enter(&iser_qp->qp_lock);
749 			iser_qp->rq_taskqpending = B_FALSE;
750 			mutex_exit(&iser_qp->qp_lock);
751 		}
752 		mutex_exit(&chan->ic_conn->ic_lock);
753 		return;
754 	}
755 
756 	if (msg_ret != rq_space) {
757 		ISER_LOG(CE_NOTE, "iser_ib_post_recv: requested number of "
758 		    "messages not allocated: requested (%d) allocated (%d)",
759 		    rq_space, msg_ret);
760 		/* We got some, but not all, of our requested depth */
761 		rq_space = msg_ret;
762 	}
763 
764 	/*
765 	 * Now, walk through the allocated WRs and post them,
766 	 * ISER_IB_RQ_POST_MAX (or less) at a time.
767 	 */
768 	wrlist = &wr[0];
769 	total_num = rq_space;
770 
771 	while (total_num) {
772 		/* determine the number to post on this iteration */
773 		npost = (total_num > ISER_IB_RQ_POST_MAX) ?
774 		    ISER_IB_RQ_POST_MAX : total_num;
775 
776 		/* build a list of WRs from the msg list */
777 		for (i = 0; i < npost; i++) {
778 			wrlist[i].wr_id		= (ibt_wrid_t)(uintptr_t)msg;
779 			wrlist[i].wr_nds	= ISER_IB_SGLIST_SIZE;
780 			wrlist[i].wr_sgl	= &msg->msg_ds;
781 			msg = msg->nextp;
782 		}
783 
784 		/* post the list to the RQ */
785 		nposted = 0;
786 		status = ibt_post_recv(chanhdl, wrlist, npost, &nposted);
787 		if ((status != IBT_SUCCESS) || (nposted != npost)) {
788 			ISER_LOG(CE_NOTE, "iser_ib_post_recv: ibt_post_recv "
789 			    "failed: requested (%d) posted (%d) status (%d)",
790 			    npost, nposted, status);
791 			total_num -= nposted;
792 			break;
793 		}
794 
795 		/* decrement total number to post by the number posted */
796 		total_num -= nposted;
797 	}
798 
799 	mutex_enter(&iser_qp->qp_lock);
800 	if (total_num != 0) {
801 		ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to fill RQ, "
802 		    "failed to post (%d) WRs", total_num);
803 		iser_qp->rq_level += rq_space - total_num;
804 	} else {
805 		iser_qp->rq_level += rq_space;
806 	}
807 
808 	/*
809 	 * Now that we've filled the RQ, check that all of the recv WRs
810 	 * haven't just been immediately consumed. If so, taskqpending is
811 	 * still B_TRUE, so we need to fire off a taskq thread to post
812 	 * more WRs.
813 	 */
814 	if (iser_qp->rq_level == 0) {
815 		mutex_exit(&iser_qp->qp_lock);
816 		status = iser_ib_post_recv_async(chanhdl);
817 		if (status != DDI_SUCCESS) {
818 			ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to "
819 			    "dispatch followup routine");
820 			/* Failed to dispatch, clear pending flag */
821 			mutex_enter(&iser_qp->qp_lock);
822 			iser_qp->rq_taskqpending = B_FALSE;
823 			mutex_exit(&iser_qp->qp_lock);
824 		}
825 	} else {
826 		/*
827 		 * We're done, we've filled the RQ. Clear the taskq
828 		 * flag so that we can run again.
829 		 */
830 		iser_qp->rq_taskqpending = B_FALSE;
831 		mutex_exit(&iser_qp->qp_lock);
832 	}
833 
834 	mutex_exit(&chan->ic_conn->ic_lock);
835 }
836 
837 /*
838  * iser_ib_handle_portup_event()
839  * This handles the IBT_EVENT_PORT_UP unaffiliated asynchronous event.
840  *
841  * To facilitate a seamless bringover of the port and configure the CM service
842  * for inbound iSER service requests on this newly active port, the existing
843  * IDM services will be checked for iSER support.
844  * If an iSER service was already created, then this service will simply be
845  * bound to the gid of the newly active port. If on the other hand, the CM
846  * service did not exist, i.e. only socket communication, then a new CM
847  * service will be first registered with the saved service parameters and
848  * then bound to the newly active port.
849  *
850  */
851 /* ARGSUSED */
852 static void
853 iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
854 {
855 	iser_hca_t		*hca;
856 	ib_gid_t		gid;
857 	idm_svc_t		*idm_svc;
858 	int			status;
859 
860 	ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: HCA(0x%llx) port(%d)",
861 	    (longlong_t)event->ev_hca_guid, event->ev_port);
862 
863 	/*
864 	 * Query all ports on the HCA and update the port information
865 	 * maintainted in the iser_hca_t structure
866 	 */
867 	hca = iser_ib_guid2hca(event->ev_hca_guid);
868 	if (hca == NULL) {
869 
870 		/* HCA is just made available, first port on that HCA */
871 		hca = iser_ib_alloc_hca(event->ev_hca_guid);
872 
873 		mutex_enter(&iser_state->is_hcalist_lock);
874 		list_insert_tail(&iser_state->is_hcalist, hca);
875 		iser_state->is_num_hcas++;
876 		mutex_exit(&iser_state->is_hcalist_lock);
877 
878 	} else {
879 
880 		status = iser_ib_update_hcaports(hca);
881 
882 		if (status != IBT_SUCCESS) {
883 			ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
884 			    "status(0x%x): iser_ib_update_hcaports failed: "
885 			    "HCA(0x%llx) port(%d)", status,
886 			    (longlong_t)event->ev_hca_guid, event->ev_port);
887 			return;
888 		}
889 	}
890 
891 	gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0];
892 
893 	/*
894 	 * Iterate through the global list of IDM target services
895 	 * and check for existing iSER CM service.
896 	 */
897 	mutex_enter(&idm.idm_global_mutex);
898 	for (idm_svc = list_head(&idm.idm_tgt_svc_list);
899 	    idm_svc != NULL;
900 	    idm_svc = list_next(&idm.idm_tgt_svc_list, idm_svc)) {
901 
902 
903 		if (idm_svc->is_iser_svc == NULL) {
904 
905 			/* Establish a new CM service for iSER requests */
906 			status = iser_tgt_svc_create(
907 			    &idm_svc->is_svc_req, idm_svc);
908 
909 			if (status != IBT_SUCCESS) {
910 				ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
911 				    "status(0x%x): iser_tgt_svc_create failed: "
912 				    "HCA(0x%llx) port(%d)", status,
913 				    (longlong_t)event->ev_hca_guid,
914 				    event->ev_port);
915 
916 				continue;
917 			}
918 		}
919 
920 		status = iser_ib_activate_port(
921 		    idm_svc, event->ev_hca_guid, gid);
922 		if (status != IBT_SUCCESS) {
923 
924 			ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
925 			    "status(0x%x): Bind service on port "
926 			    "(%llx:%llx) failed",
927 			    status, (longlong_t)gid.gid_prefix,
928 			    (longlong_t)gid.gid_guid);
929 
930 			continue;
931 		}
932 		ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: service bound "
933 		    "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
934 		    event->ev_port);
935 	}
936 	mutex_exit(&idm.idm_global_mutex);
937 
938 	ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event success: "
939 	    "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
940 	    event->ev_port);
941 }
942 
943 /*
944  * iser_ib_handle_portdown_event()
945  * This handles the IBT_EVENT_PORT_DOWN unaffiliated asynchronous error.
946  *
947  * Unconfigure the CM service on the deactivated port and teardown the
948  * connections that are using the CM service.
949  */
950 /* ARGSUSED */
951 static void
952 iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
953 {
954 	iser_hca_t		*hca;
955 	ib_gid_t		gid;
956 	int			status;
957 
958 	/*
959 	 * Query all ports on the HCA and update the port information
960 	 * maintainted in the iser_hca_t structure
961 	 */
962 	hca = iser_ib_guid2hca(event->ev_hca_guid);
963 	ASSERT(hca != NULL);
964 
965 	status = iser_ib_update_hcaports(hca);
966 	if (status != IBT_SUCCESS) {
967 		ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event status(0x%x): "
968 		    "ibt_ib_update_hcaports failed: HCA(0x%llx) port(%d)",
969 		    status, (longlong_t)event->ev_hca_guid, event->ev_port);
970 		return;
971 	}
972 
973 	/* get the gid of the new port */
974 	gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0];
975 	iser_ib_deactivate_port(event->ev_hca_guid, gid);
976 
977 	ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event success: "
978 	    "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
979 	    event->ev_port);
980 }
981 
982 /*
983  * iser_ib_handle_hca_detach_event()
984  * Quiesce all activity bound for the port, teardown the connection, unbind
985  * iSER services on all ports and release the HCA handle.
986  */
987 /* ARGSUSED */
988 static void
989 iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
990 {
991 	iser_hca_t	*nexthca, *hca;
992 	int		i, status;
993 
994 	ISER_LOG(CE_NOTE, "iser_ib_handle_hca_detach_event: HCA(0x%llx)",
995 	    (longlong_t)event->ev_hca_guid);
996 
997 	hca = iser_ib_guid2hca(event->ev_hca_guid);
998 	for (i = 0; i < hca->hca_num_ports; i++) {
999 		iser_ib_deactivate_port(hca->hca_guid,
1000 		    hca->hca_port_info[i].p_sgid_tbl[0]);
1001 	}
1002 
1003 	/*
1004 	 * Update the HCA list maintained in the iser_state. Free the
1005 	 * resources allocated to the HCA, i.e. caches, protection domain
1006 	 */
1007 	mutex_enter(&iser_state->is_hcalist_lock);
1008 
1009 	for (hca = list_head(&iser_state->is_hcalist);
1010 	    hca != NULL;
1011 	    hca = nexthca) {
1012 
1013 		nexthca = list_next(&iser_state->is_hcalist, hca);
1014 
1015 		if (hca->hca_guid == event->ev_hca_guid) {
1016 
1017 			list_remove(&iser_state->is_hcalist, hca);
1018 			iser_state->is_num_hcas--;
1019 
1020 			status = iser_ib_free_hca(hca);
1021 			if (status != DDI_SUCCESS) {
1022 				ISER_LOG(CE_WARN, "iser_ib_handle_hca_detach: "
1023 				    "Failed to free hca(%p)", (void *)hca);
1024 				list_insert_tail(&iser_state->is_hcalist, hca);
1025 				iser_state->is_num_hcas++;
1026 			}
1027 			/* No way to return status to IBT if this fails */
1028 		}
1029 	}
1030 	mutex_exit(&iser_state->is_hcalist_lock);
1031 
1032 }
1033 
1034 /*
1035  * iser_ib_async_handler
1036  * An IBT Asynchronous Event handler is registered it with the framework and
1037  * passed via the ibt_attach() routine. This function handles the following
1038  * asynchronous events.
1039  * IBT_EVENT_PORT_UP
1040  * IBT_ERROR_PORT_DOWN
1041  * IBT_HCA_ATTACH_EVENT
1042  * IBT_HCA_DETACH_EVENT
1043  */
1044 /* ARGSUSED */
1045 void
1046 iser_ib_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code,
1047     ibt_async_event_t *event)
1048 {
1049 	switch (code) {
1050 	case IBT_EVENT_PORT_UP:
1051 		iser_ib_handle_portup_event(hdl, event);
1052 		break;
1053 
1054 	case IBT_ERROR_PORT_DOWN:
1055 		iser_ib_handle_portdown_event(hdl, event);
1056 		break;
1057 
1058 	case IBT_HCA_ATTACH_EVENT:
1059 		/*
1060 		 * A new HCA device is available for use, ignore this
1061 		 * event because the corresponding IBT_EVENT_PORT_UP
1062 		 * events will get triggered and handled accordingly.
1063 		 */
1064 		break;
1065 
1066 	case IBT_HCA_DETACH_EVENT:
1067 		iser_ib_handle_hca_detach_event(hdl, event);
1068 		break;
1069 
1070 	default:
1071 		break;
1072 	}
1073 }
1074 
1075 /*
1076  * iser_ib_init_hcas
1077  *
1078  * This function opens all the HCA devices, gathers the HCA state information
1079  * and adds the HCA handle for each HCA found in the iser_soft_state.
1080  */
1081 static int
1082 iser_ib_init_hcas(void)
1083 {
1084 	ib_guid_t	*guid;
1085 	int		num_hcas;
1086 	int		i;
1087 	iser_hca_t	*hca;
1088 
1089 	/* Retrieve the HCA list */
1090 	num_hcas = ibt_get_hca_list(&guid);
1091 	if (num_hcas == 0) {
1092 		/*
1093 		 * This shouldn't happen, but might if we have all HCAs
1094 		 * detach prior to initialization.
1095 		 */
1096 		return (DDI_FAILURE);
1097 	}
1098 
1099 	/* Initialize the hcalist lock */
1100 	mutex_init(&iser_state->is_hcalist_lock, NULL, MUTEX_DRIVER, NULL);
1101 
1102 	/* Create the HCA list */
1103 	list_create(&iser_state->is_hcalist, sizeof (iser_hca_t),
1104 	    offsetof(iser_hca_t, hca_node));
1105 
1106 	for (i = 0; i < num_hcas; i++) {
1107 
1108 		ISER_LOG(CE_NOTE, "iser_ib_init_hcas: initializing HCA "
1109 		    "(0x%llx)", (longlong_t)guid[i]);
1110 
1111 		hca = iser_ib_alloc_hca(guid[i]);
1112 		if (hca == NULL) {
1113 			/* This shouldn't happen, teardown and fail */
1114 			(void) iser_ib_fini_hcas();
1115 			(void) ibt_free_hca_list(guid, num_hcas);
1116 			return (DDI_FAILURE);
1117 		}
1118 
1119 		mutex_enter(&iser_state->is_hcalist_lock);
1120 		list_insert_tail(&iser_state->is_hcalist, hca);
1121 		iser_state->is_num_hcas++;
1122 		mutex_exit(&iser_state->is_hcalist_lock);
1123 
1124 	}
1125 
1126 	/* Free the IBT HCA list */
1127 	(void) ibt_free_hca_list(guid, num_hcas);
1128 
1129 	/* Check that we've initialized at least one HCA */
1130 	mutex_enter(&iser_state->is_hcalist_lock);
1131 	if (list_is_empty(&iser_state->is_hcalist)) {
1132 		ISER_LOG(CE_NOTE, "iser_ib_init_hcas: failed to initialize "
1133 		    "any HCAs");
1134 
1135 		mutex_exit(&iser_state->is_hcalist_lock);
1136 		(void) iser_ib_fini_hcas();
1137 		return (DDI_FAILURE);
1138 	}
1139 	mutex_exit(&iser_state->is_hcalist_lock);
1140 
1141 	return (DDI_SUCCESS);
1142 }
1143 
1144 /*
1145  * iser_ib_fini_hcas
1146  *
1147  * Teardown the iSER HCA list initialized above.
1148  */
1149 static int
1150 iser_ib_fini_hcas(void)
1151 {
1152 	iser_hca_t	*nexthca, *hca;
1153 	int		status;
1154 
1155 	mutex_enter(&iser_state->is_hcalist_lock);
1156 	for (hca = list_head(&iser_state->is_hcalist);
1157 	    hca != NULL;
1158 	    hca = nexthca) {
1159 
1160 		nexthca = list_next(&iser_state->is_hcalist, hca);
1161 
1162 		list_remove(&iser_state->is_hcalist, hca);
1163 
1164 		status = iser_ib_free_hca(hca);
1165 		if (status != IBT_SUCCESS) {
1166 			ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to free "
1167 			    "HCA during fini");
1168 			list_insert_tail(&iser_state->is_hcalist, hca);
1169 			return (DDI_FAILURE);
1170 		}
1171 
1172 		iser_state->is_num_hcas--;
1173 
1174 	}
1175 	mutex_exit(&iser_state->is_hcalist_lock);
1176 	list_destroy(&iser_state->is_hcalist);
1177 	mutex_destroy(&iser_state->is_hcalist_lock);
1178 
1179 	return (DDI_SUCCESS);
1180 }
1181 
1182 /*
1183  * iser_ib_alloc_hca
1184  *
1185  * This function opens the given HCA device, gathers the HCA state information
1186  * and adds the HCA handle
1187  */
1188 static iser_hca_t *
1189 iser_ib_alloc_hca(ib_guid_t guid)
1190 {
1191 	iser_hca_t	*hca;
1192 	int		status;
1193 
1194 	/* Allocate an iser_hca_t HCA handle */
1195 	hca = (iser_hca_t *)kmem_zalloc(sizeof (iser_hca_t), KM_SLEEP);
1196 
1197 	/* Open this HCA */
1198 	status = ibt_open_hca(iser_state->is_ibhdl, guid, &hca->hca_hdl);
1199 	if (status != IBT_SUCCESS) {
1200 		ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_open_hca failed:"
1201 		    " guid (0x%llx) status (0x%x)", (longlong_t)guid, status);
1202 		kmem_free(hca, sizeof (iser_hca_t));
1203 		return (NULL);
1204 	}
1205 
1206 	hca->hca_guid		= guid;
1207 	hca->hca_clnt_hdl	= iser_state->is_ibhdl;
1208 
1209 	/* Query the HCA */
1210 	status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr);
1211 	if (status != IBT_SUCCESS) {
1212 		ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_query_hca "
1213 		    "failure: guid (0x%llx) status (0x%x)",
1214 		    (longlong_t)guid, status);
1215 		(void) ibt_close_hca(hca->hca_hdl);
1216 		kmem_free(hca, sizeof (iser_hca_t));
1217 		return (NULL);
1218 	}
1219 
1220 	/* Query all ports on the HCA */
1221 	status = ibt_query_hca_ports(hca->hca_hdl, 0,
1222 	    &hca->hca_port_info, &hca->hca_num_ports,
1223 	    &hca->hca_port_info_sz);
1224 	if (status != IBT_SUCCESS) {
1225 		ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: "
1226 		    "ibt_query_hca_ports failure: guid (0x%llx) "
1227 		    "status (0x%x)", (longlong_t)guid, status);
1228 		(void) ibt_close_hca(hca->hca_hdl);
1229 		kmem_free(hca, sizeof (iser_hca_t));
1230 		return (NULL);
1231 	}
1232 
1233 	/* Allocate a single PD on this HCA */
1234 	status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS,
1235 	    &hca->hca_pdhdl);
1236 	if (status != IBT_SUCCESS) {
1237 		ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_alloc_pd "
1238 		    "failure: guid (0x%llx) status (0x%x)",
1239 		    (longlong_t)guid, status);
1240 		(void) ibt_close_hca(hca->hca_hdl);
1241 		ibt_free_portinfo(hca->hca_port_info, hca->hca_port_info_sz);
1242 		kmem_free(hca, sizeof (iser_hca_t));
1243 		return (NULL);
1244 	}
1245 
1246 	/* Initialize the message and data MR caches for this HCA */
1247 	iser_init_hca_caches(hca);
1248 
1249 	return (hca);
1250 }
1251 
1252 static int
1253 iser_ib_free_hca(iser_hca_t *hca)
1254 {
1255 	int			status;
1256 	ibt_hca_portinfo_t	*hca_port_info;
1257 	uint_t			hca_port_info_sz;
1258 
1259 	ASSERT(hca != NULL);
1260 	if (hca->hca_failed)
1261 		return (DDI_FAILURE);
1262 
1263 	hca_port_info = hca->hca_port_info;
1264 	hca_port_info_sz = hca->hca_port_info_sz;
1265 
1266 	/*
1267 	 * Free the memory regions before freeing
1268 	 * the associated protection domain
1269 	 */
1270 	iser_fini_hca_caches(hca);
1271 
1272 	status = ibt_free_pd(hca->hca_hdl, hca->hca_pdhdl);
1273 	if (status != IBT_SUCCESS) {
1274 		ISER_LOG(CE_NOTE, "iser_ib_free_hca: failed to free PD "
1275 		    "status=0x%x", status);
1276 		goto out_caches;
1277 	}
1278 
1279 	status = ibt_close_hca(hca->hca_hdl);
1280 	if (status != IBT_SUCCESS) {
1281 		ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to close HCA "
1282 		    "status=0x%x", status);
1283 		goto out_pd;
1284 	}
1285 
1286 	ibt_free_portinfo(hca_port_info, hca_port_info_sz);
1287 
1288 	kmem_free(hca, sizeof (iser_hca_t));
1289 	return (DDI_SUCCESS);
1290 
1291 	/*
1292 	 * We only managed to partially tear down the HCA, try to put it back
1293 	 * like it was before returning.
1294 	 */
1295 out_pd:
1296 	status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, &hca->hca_pdhdl);
1297 	if (status != IBT_SUCCESS) {
1298 		hca->hca_failed = B_TRUE;
1299 		/* Report error and exit */
1300 		ISER_LOG(CE_NOTE, "iser_ib_free_hca: could not re-alloc PD "
1301 		    "status=0x%x", status);
1302 		return (DDI_FAILURE);
1303 	}
1304 
1305 out_caches:
1306 	iser_init_hca_caches(hca);
1307 
1308 	return (DDI_FAILURE);
1309 }
1310 
1311 static int
1312 iser_ib_update_hcaports(iser_hca_t *hca)
1313 {
1314 	ibt_hca_portinfo_t	*pinfop, *oldpinfop;
1315 	uint_t			size, oldsize, nport;
1316 	int			status;
1317 
1318 	ASSERT(hca != NULL);
1319 
1320 	status = ibt_query_hca_ports(hca->hca_hdl, 0, &pinfop, &nport, &size);
1321 	if (status != IBT_SUCCESS) {
1322 		ISER_LOG(CE_NOTE, "ibt_query_hca_ports failed: %d", status);
1323 		return (status);
1324 	}
1325 
1326 	oldpinfop = hca->hca_port_info;
1327 	oldsize	= hca->hca_port_info_sz;
1328 	hca->hca_port_info = pinfop;
1329 	hca->hca_port_info_sz = size;
1330 
1331 	(void) ibt_free_portinfo(oldpinfop, oldsize);
1332 
1333 	return (IBT_SUCCESS);
1334 }
1335 
1336 /*
1337  * iser_ib_gid2hca
1338  * Given a gid, find the corresponding hca
1339  */
1340 iser_hca_t *
1341 iser_ib_gid2hca(ib_gid_t gid)
1342 {
1343 
1344 	iser_hca_t	*hca;
1345 	int		i;
1346 
1347 	mutex_enter(&iser_state->is_hcalist_lock);
1348 	for (hca = list_head(&iser_state->is_hcalist);
1349 	    hca != NULL;
1350 	    hca = list_next(&iser_state->is_hcalist, hca)) {
1351 
1352 		for (i = 0; i < hca->hca_num_ports; i++) {
1353 			if ((hca->hca_port_info[i].p_sgid_tbl[0].gid_prefix ==
1354 			    gid.gid_prefix) &&
1355 			    (hca->hca_port_info[i].p_sgid_tbl[0].gid_guid ==
1356 			    gid.gid_guid)) {
1357 
1358 				mutex_exit(&iser_state->is_hcalist_lock);
1359 
1360 				return (hca);
1361 			}
1362 		}
1363 	}
1364 	mutex_exit(&iser_state->is_hcalist_lock);
1365 	return (NULL);
1366 }
1367 
1368 /*
1369  * iser_ib_guid2hca
1370  * Given a HCA guid, find the corresponding HCA
1371  */
1372 iser_hca_t *
1373 iser_ib_guid2hca(ib_guid_t guid)
1374 {
1375 
1376 	iser_hca_t	*hca;
1377 
1378 	mutex_enter(&iser_state->is_hcalist_lock);
1379 	for (hca = list_head(&iser_state->is_hcalist);
1380 	    hca != NULL;
1381 	    hca = list_next(&iser_state->is_hcalist, hca)) {
1382 
1383 		if (hca->hca_guid == guid) {
1384 			mutex_exit(&iser_state->is_hcalist_lock);
1385 			return (hca);
1386 		}
1387 	}
1388 	mutex_exit(&iser_state->is_hcalist_lock);
1389 	return (NULL);
1390 }
1391 
1392 /*
1393  * iser_ib_conv_sockaddr2ibtaddr
1394  * This function converts a socket address into the IBT format
1395  */
1396 void iser_ib_conv_sockaddr2ibtaddr(
1397     idm_sockaddr_t *saddr, ibt_ip_addr_t *ibt_addr)
1398 {
1399 	if (saddr == NULL) {
1400 		ibt_addr->family = AF_UNSPEC;
1401 		ibt_addr->un.ip4addr = 0;
1402 	} else {
1403 		switch (saddr->sin.sa_family) {
1404 		case AF_INET:
1405 
1406 			ibt_addr->family	= saddr->sin4.sin_family;
1407 			ibt_addr->un.ip4addr	= saddr->sin4.sin_addr.s_addr;
1408 			break;
1409 
1410 		case AF_INET6:
1411 
1412 			ibt_addr->family	= saddr->sin6.sin6_family;
1413 			ibt_addr->un.ip6addr	= saddr->sin6.sin6_addr;
1414 			break;
1415 
1416 		default:
1417 			ibt_addr->family = AF_UNSPEC;
1418 		}
1419 
1420 	}
1421 }
1422 
1423 /*
1424  * iser_ib_conv_ibtaddr2sockaddr
1425  * This function converts an IBT ip address handle to a sockaddr
1426  */
1427 void iser_ib_conv_ibtaddr2sockaddr(struct sockaddr_storage *ss,
1428     ibt_ip_addr_t *ibt_addr, in_port_t port)
1429 {
1430 	struct sockaddr_in *sin;
1431 	struct sockaddr_in6 *sin6;
1432 
1433 	switch (ibt_addr->family) {
1434 	case AF_INET:
1435 	case AF_UNSPEC:
1436 
1437 		sin = (struct sockaddr_in *)ibt_addr;
1438 		sin->sin_port = ntohs(port);
1439 		bcopy(sin, ss, sizeof (struct sockaddr_in));
1440 		break;
1441 
1442 	case AF_INET6:
1443 
1444 		sin6 = (struct sockaddr_in6 *)ibt_addr;
1445 		sin6->sin6_port = ntohs(port);
1446 		bcopy(sin6, ss, sizeof (struct sockaddr_in6));
1447 		break;
1448 
1449 	default:
1450 		ISER_LOG(CE_NOTE, "iser_ib_conv_ibtaddr2sockaddr: "
1451 		    "unknown family type: 0x%x", ibt_addr->family);
1452 	}
1453 }
1454 
1455 /*
1456  * iser_ib_setup_cq
1457  * This function sets up the Completion Queue size and allocates the specified
1458  * Completion Queue
1459  */
1460 static int
1461 iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, ibt_cq_hdl_t *cq_hdl)
1462 {
1463 
1464 	ibt_cq_attr_t		cq_attr;
1465 	int			status;
1466 
1467 	cq_attr.cq_size		= cq_size;
1468 	cq_attr.cq_sched	= 0;
1469 	cq_attr.cq_flags	= IBT_CQ_NO_FLAGS;
1470 
1471 	/* Allocate a Completion Queue */
1472 	status = ibt_alloc_cq(hca_hdl, &cq_attr, cq_hdl, NULL);
1473 	if (status != IBT_SUCCESS) {
1474 		ISER_LOG(CE_NOTE, "iser_ib_setup_cq: ibt_alloc_cq failure (%d)",
1475 		    status);
1476 		return (status);
1477 	}
1478 
1479 	return (ISER_STATUS_SUCCESS);
1480 }
1481 
1482 /*
1483  * iser_ib_setup_chanargs
1484  *
1485  */
1486 static void
1487 iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl,
1488     ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size,
1489     ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs)
1490 {
1491 
1492 	bzero(cargs, sizeof (ibt_rc_chan_alloc_args_t));
1493 
1494 	/*
1495 	 * Set up the size of the channels send queue, receive queue and the
1496 	 * maximum number of elements in a scatter gather list of work requests
1497 	 * posted to the send and receive queues.
1498 	 */
1499 	cargs->rc_sizes.cs_sq		= sq_size;
1500 	cargs->rc_sizes.cs_rq		= rq_size;
1501 	cargs->rc_sizes.cs_sq_sgl	= ISER_IB_SGLIST_SIZE;
1502 	cargs->rc_sizes.cs_rq_sgl	= ISER_IB_SGLIST_SIZE;
1503 
1504 	/*
1505 	 * All Work requests signaled on a WR basis will receive a send
1506 	 * request completion.
1507 	 */
1508 	cargs->rc_flags			= IBT_ALL_SIGNALED;
1509 
1510 	/* Enable RDMA read and RDMA write on the channel end points */
1511 	cargs->rc_control		= IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
1512 
1513 	/* Set the local hca port on which the channel is allocated */
1514 	cargs->rc_hca_port_num		= hca_port;
1515 
1516 	/* Set the Send and Receive Completion Queue handles */
1517 	cargs->rc_scq			= scq_hdl;
1518 	cargs->rc_rcq			= rcq_hdl;
1519 
1520 	/* Set the protection domain associated with the channel */
1521 	cargs->rc_pd			= hca_pdhdl;
1522 
1523 	/* No SRQ usage */
1524 	cargs->rc_srq			= NULL;
1525 }
1526 
1527 /*
1528  * iser_ib_init_qp
1529  * Initialize the QP handle
1530  */
1531 void
1532 iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size)
1533 {
1534 	/* Initialize the handle lock */
1535 	mutex_init(&chan->ic_qp.qp_lock, NULL, MUTEX_DRIVER, NULL);
1536 
1537 	/* Record queue sizes */
1538 	chan->ic_qp.sq_size = sq_size;
1539 	chan->ic_qp.rq_size = rq_size;
1540 
1541 	/* Initialize the RQ monitoring data */
1542 	chan->ic_qp.rq_depth  = rq_size;
1543 	chan->ic_qp.rq_level  = 0;
1544 	chan->ic_qp.rq_lwm = (chan->ic_recvcq_sz * ISER_IB_RQ_LWM_PCT) / 100;
1545 
1546 	/* Initialize the taskq flag */
1547 	chan->ic_qp.rq_taskqpending = B_FALSE;
1548 }
1549 
1550 /*
1551  * iser_ib_fini_qp
1552  * Teardown the QP handle
1553  */
1554 void
1555 iser_ib_fini_qp(iser_qp_t *qp)
1556 {
1557 	/* Destroy the handle lock */
1558 	mutex_destroy(&qp->qp_lock);
1559 }
1560 
1561 static int
1562 iser_ib_activate_port(idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid)
1563 {
1564 	iser_svc_t	*iser_svc;
1565 	iser_sbind_t	*is_sbind;
1566 	int		status;
1567 
1568 	iser_svc = idm_svc->is_iser_svc;
1569 
1570 	/*
1571 	 * Save the address of the service bind handle in the
1572 	 * iser_svc_t to undo the service binding at a later time
1573 	 */
1574 	is_sbind = kmem_zalloc(sizeof (iser_sbind_t), KM_SLEEP);
1575 	is_sbind->is_gid	= gid;
1576 	is_sbind->is_guid	= guid;
1577 
1578 	status  = ibt_bind_service(iser_svc->is_srvhdl, gid, NULL,
1579 	    idm_svc, &is_sbind->is_sbindhdl);
1580 
1581 	if (status != IBT_SUCCESS) {
1582 		ISER_LOG(CE_NOTE, "iser_ib_activate_port: status(0x%x): "
1583 		    "Bind service(%llx) on port(%llx:%llx) failed",
1584 		    status, (longlong_t)iser_svc->is_svcid,
1585 		    (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid);
1586 
1587 		kmem_free(is_sbind, sizeof (iser_sbind_t));
1588 
1589 		return (status);
1590 	}
1591 
1592 	list_insert_tail(&iser_svc->is_sbindlist, is_sbind);
1593 
1594 	return (IBT_SUCCESS);
1595 }
1596 
1597 static void
1598 iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid)
1599 {
1600 	iser_svc_t	*iser_svc;
1601 	iser_conn_t	*iser_conn;
1602 	iser_sbind_t	*is_sbind;
1603 	idm_conn_t	*idm_conn;
1604 
1605 	/*
1606 	 * Iterate through the global list of IDM target connections.
1607 	 * Issue a TRANSPORT_FAIL for any connections on this port, and
1608 	 * if there is a bound service running on the port, tear it down.
1609 	 */
1610 	mutex_enter(&idm.idm_global_mutex);
1611 	for (idm_conn = list_head(&idm.idm_tgt_conn_list);
1612 	    idm_conn != NULL;
1613 	    idm_conn = list_next(&idm.idm_tgt_conn_list, idm_conn)) {
1614 
1615 		if (idm_conn->ic_transport_type != IDM_TRANSPORT_TYPE_ISER) {
1616 			/* this is not an iSER connection, skip it */
1617 			continue;
1618 		}
1619 
1620 		iser_conn = idm_conn->ic_transport_private;
1621 		if (iser_conn->ic_chan->ic_ibt_path.pi_hca_guid != hca_guid) {
1622 			/* this iSER connection is on a different port */
1623 			continue;
1624 		}
1625 
1626 		/* Fail the transport for this connection */
1627 		idm_conn_event(idm_conn, CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
1628 
1629 		if (idm_conn->ic_conn_type == CONN_TYPE_INI) {
1630 			/* initiator connection, nothing else to do */
1631 			continue;
1632 		}
1633 
1634 		/* Check for a service binding */
1635 		iser_svc = idm_conn->ic_svc_binding->is_iser_svc;
1636 		is_sbind = iser_ib_get_bind(iser_svc, hca_guid, gid);
1637 		if (is_sbind != NULL) {
1638 			/* This service is still bound, tear it down */
1639 			ibt_unbind_service(iser_svc->is_srvhdl,
1640 			    is_sbind->is_sbindhdl);
1641 			list_remove(&iser_svc->is_sbindlist, is_sbind);
1642 			kmem_free(is_sbind, sizeof (iser_sbind_t));
1643 		}
1644 	}
1645 	mutex_exit(&idm.idm_global_mutex);
1646 }
1647 
1648 static iser_sbind_t *
1649 iser_ib_get_bind(iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid)
1650 {
1651 	iser_sbind_t	*is_sbind;
1652 
1653 	for (is_sbind = list_head(&iser_svc->is_sbindlist);
1654 	    is_sbind != NULL;
1655 	    is_sbind = list_next(&iser_svc->is_sbindlist, is_sbind)) {
1656 
1657 		if ((is_sbind->is_guid == hca_guid) &&
1658 		    (is_sbind->is_gid.gid_prefix == gid.gid_prefix) &&
1659 		    (is_sbind->is_gid.gid_guid == gid.gid_guid)) {
1660 			return (is_sbind);
1661 		}
1662 	}
1663 	return (NULL);
1664 }
1665