1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/kmem.h>
28 #include <sys/conf.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/ksynch.h>
32 
33 #include <sys/ib/clients/eoib/enx_impl.h>
34 
35 /*
36  * Module (static) info passed to IBTL during ibt_attach
37  */
38 static ibt_clnt_modinfo_t eibnx_clnt_modinfo = {
39 	IBTI_V_CURR,
40 	IBT_GENERIC,
41 	eibnx_async_handler,
42 	NULL,
43 	"EoIB Nexus"
44 };
45 
46 ib_gid_t enx_advertise_mgid;
47 ib_gid_t enx_solicit_mgid;
48 
49 /*
50  * Static function declarations
51  */
52 static int eibnx_state_init(void);
53 static int eibnx_setup_txbufs(eibnx_thr_info_t *);
54 static int eibnx_setup_rxbufs(eibnx_thr_info_t *);
55 static int eibnx_join_solicit_mcg(eibnx_thr_info_t *);
56 static int eibnx_join_advertise_mcg(eibnx_thr_info_t *);
57 static int eibnx_rb_ibt_init(eibnx_t *);
58 static void eibnx_rb_state_init(void);
59 static void eibnx_rb_setup_txbufs(eibnx_thr_info_t *);
60 static void eibnx_rb_setup_rxbufs(eibnx_thr_info_t *);
61 static void eibnx_rb_join_solicit_mcg(eibnx_thr_info_t *);
62 static void eibnx_rb_join_advertise_mcg(eibnx_thr_info_t *);
63 
64 /*
65  * eibnx_ibt_init() is expected to be called during the nexus driver's
66  * attach time; given that there is only one instance of the nexus
67  * driver allowed, and no threads are active before the initialization
68  * is complete, we don't really have to acquire any driver specific mutex
69  * within this routine.
70  */
71 int
eibnx_ibt_init(eibnx_t * ss)72 eibnx_ibt_init(eibnx_t *ss)
73 {
74 	eibnx_hca_t *hca_list;
75 	eibnx_hca_t *hca_tail;
76 	eibnx_hca_t *hca;
77 	uint_t num_hcas;
78 	ib_guid_t *hca_guids;
79 	ibt_status_t ret;
80 	int i;
81 
82 	/*
83 	 * Do per-state initialization
84 	 */
85 	(void) eibnx_state_init();
86 
87 	/*
88 	 * Attach to IBTL
89 	 */
90 	if ((ret = ibt_attach(&eibnx_clnt_modinfo, ss->nx_dip, ss,
91 	    &ss->nx_ibt_hdl)) != IBT_SUCCESS) {
92 		ENX_DPRINTF_ERR("ibt_attach() failed, ret=%d", ret);
93 		eibnx_rb_state_init();
94 		return (ENX_E_FAILURE);
95 	}
96 
97 	/*
98 	 * Get the list of HCA guids on the system
99 	 */
100 	if ((num_hcas = ibt_get_hca_list(&hca_guids)) == 0) {
101 		ENX_DPRINTF_VERBOSE("no HCAs found on the system");
102 		if ((ret = ibt_detach(ss->nx_ibt_hdl)) != IBT_SUCCESS) {
103 			ENX_DPRINTF_ERR("ibt_detach() failed, ret=%d", ret);
104 		}
105 		ss->nx_ibt_hdl = NULL;
106 		return (ENX_E_FAILURE);
107 	}
108 
109 	/*
110 	 * Open the HCAs and store the handles
111 	 */
112 	hca_list = hca_tail = NULL;
113 	for (i = 0; i < num_hcas; i++) {
114 		/*
115 		 * If we cannot open a HCA, allocate a protection domain
116 		 * on it or get portinfo on it, print an error and move on
117 		 * to the next HCA.  Otherwise, queue it up in our hca list
118 		 */
119 		if ((hca = eibnx_prepare_hca(hca_guids[i])) == NULL)
120 			continue;
121 
122 		if (hca_tail) {
123 			hca_tail->hc_next = hca;
124 		} else {
125 			hca_list = hca;
126 		}
127 		hca_tail = hca;
128 	}
129 
130 	/*
131 	 * Free the HCA guid list we've allocated via ibt_get_hca_list()
132 	 */
133 	ibt_free_hca_list(hca_guids, num_hcas);
134 
135 	/*
136 	 * Put the hca list in the state structure
137 	 */
138 	mutex_enter(&ss->nx_lock);
139 	ss->nx_hca = hca_list;
140 	mutex_exit(&ss->nx_lock);
141 
142 	/*
143 	 * Register for subnet notices
144 	 */
145 	ibt_register_subnet_notices(ss->nx_ibt_hdl,
146 	    eibnx_subnet_notices_handler, ss);
147 
148 	return (ENX_E_SUCCESS);
149 }
150 
151 static int
eibnx_state_init(void)152 eibnx_state_init(void)
153 {
154 	eibnx_t *ss = enx_global_ss;
155 	kthread_t *kt;
156 
157 	/*
158 	 * Initialize synchronization primitives
159 	 */
160 	mutex_init(&ss->nx_lock, NULL, MUTEX_DRIVER, NULL);
161 	mutex_init(&ss->nx_nodeq_lock, NULL, MUTEX_DRIVER, NULL);
162 	cv_init(&ss->nx_nodeq_cv, NULL, CV_DEFAULT, NULL);
163 	mutex_init(&ss->nx_busop_lock, NULL, MUTEX_DRIVER, NULL);
164 	cv_init(&ss->nx_busop_cv, NULL, CV_DEFAULT, NULL);
165 
166 	/*
167 	 * Initialize well-known mgids: there must be a better way to
168 	 * do this instead of having to express every single gid as a
169 	 * tuple of two 8-byte integer quantities.
170 	 */
171 	enx_solicit_mgid.gid_prefix = EIB_GUID_SOLICIT_PREFIX;
172 	enx_solicit_mgid.gid_guid = 0;
173 	enx_advertise_mgid.gid_prefix = EIB_GUID_ADVERTISE_PREFIX;
174 	enx_advertise_mgid.gid_guid = 0;
175 
176 	/*
177 	 * Start up the eoib node creation thread
178 	 */
179 	kt = thread_create(NULL, 0, eibnx_create_eoib_node, NULL, 0,
180 	    &p0, TS_RUN, minclsyspri);
181 	ss->nx_nodeq_kt_did = kt->t_did;
182 
183 	return (ENX_E_SUCCESS);
184 }
185 
186 /*
187  * Locate the two multicast groups: the All-EoIB-GWs-GID and
188  * All-EoIB-ENodes-GID.  Make sure the MTU is something that
189  * we can work with and Qkey is as expected.
190  */
191 int
eibnx_find_mgroups(eibnx_thr_info_t * info)192 eibnx_find_mgroups(eibnx_thr_info_t *info)
193 {
194 	ibt_hca_portinfo_t *pi = info->ti_pi;
195 	ibt_mcg_attr_t mcg_attr;
196 	ib_gid_t rgid;
197 	ibt_status_t ret;
198 	uint_t entries;
199 
200 	mutex_enter(&info->ti_mcg_lock);
201 
202 	if ((info->ti_mcg_status & ENX_MCGS_FOUND) == ENX_MCGS_FOUND) {
203 		mutex_exit(&info->ti_mcg_lock);
204 		return (ENX_E_SUCCESS);
205 	}
206 
207 	/*
208 	 * Request GID defining this port
209 	 */
210 	rgid = pi->p_sgid_tbl[0];
211 
212 	/*
213 	 * First, locate the multicast group to use for sending solicit
214 	 * requests to the GW
215 	 */
216 	bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
217 	mcg_attr.mc_mgid = enx_solicit_mgid;
218 	mcg_attr.mc_pkey = (ib_pkey_t)EIB_ADMIN_PKEY;
219 	mcg_attr.mc_qkey = (ib_qkey_t)EIB_FIP_QKEY;
220 
221 	if ((ret = ibt_query_mcg(rgid, &mcg_attr, 1, &info->ti_solicit_mcg,
222 	    &entries)) != IBT_SUCCESS) {
223 		ENX_DPRINTF_WARN("solicit mcg (gid=%llx.%llx) not found, "
224 		    "ibt_query_mcg() returned %d", enx_solicit_mgid.gid_prefix,
225 		    enx_solicit_mgid.gid_guid, ret);
226 		goto find_mgroups_fail;
227 	}
228 
229 	/*
230 	 * Make sure the multicast mtu isn't bigger than the port mtu
231 	 * and the multicast group's qkey is the same as EIB_FIP_QKEY.
232 	 */
233 	if (info->ti_solicit_mcg->mc_mtu > pi->p_mtu) {
234 		ENX_DPRINTF_WARN("solicit mcg (gid=%llx.%llx) mtu too big, "
235 		    "0x%x > 0x%x", enx_solicit_mgid.gid_prefix,
236 		    enx_solicit_mgid.gid_guid, info->ti_solicit_mcg->mc_mtu,
237 		    pi->p_mtu);
238 		goto find_mgroups_fail;
239 	}
240 	if (info->ti_solicit_mcg->mc_qkey != EIB_FIP_QKEY) {
241 		ENX_DPRINTF_WARN("solicit mcg (gid=%llx.%llx) qkey bad, "
242 		    "actual=0x%x, expected=0x%x", enx_solicit_mgid.gid_prefix,
243 		    enx_solicit_mgid.gid_guid, info->ti_solicit_mcg->mc_qkey,
244 		    EIB_FIP_QKEY);
245 		goto find_mgroups_fail;
246 	}
247 
248 	/*
249 	 * Now, locate the multicast group for receiving discover
250 	 * advertisements from the GW
251 	 */
252 	bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
253 	mcg_attr.mc_mgid = enx_advertise_mgid;
254 	mcg_attr.mc_pkey = (ib_pkey_t)EIB_ADMIN_PKEY;
255 	mcg_attr.mc_qkey = (ib_qkey_t)EIB_FIP_QKEY;
256 
257 	if ((ret = ibt_query_mcg(rgid, &mcg_attr, 1, &info->ti_advertise_mcg,
258 	    &entries)) != IBT_SUCCESS) {
259 		ENX_DPRINTF_WARN("advertise mcg (gid=%llx.%llx) not found, "
260 		    "ibt_query_mcg() returned %d",
261 		    enx_advertise_mgid.gid_prefix,
262 		    enx_advertise_mgid.gid_guid, ret);
263 		goto find_mgroups_fail;
264 	}
265 
266 	/*
267 	 * Verify the multicast group's mtu and qkey as before
268 	 */
269 	if (info->ti_advertise_mcg->mc_mtu > pi->p_mtu) {
270 		ENX_DPRINTF_WARN("advertise mcg (gid=%llx.%llx) mtu too big, "
271 		    "0x%x > 0x%x", enx_advertise_mgid.gid_prefix,
272 		    enx_advertise_mgid.gid_guid,
273 		    info->ti_advertise_mcg->mc_mtu, pi->p_mtu);
274 		goto find_mgroups_fail;
275 	}
276 	if (info->ti_advertise_mcg->mc_qkey != EIB_FIP_QKEY) {
277 		ENX_DPRINTF_WARN("advertise mcg (gid=%llx.%llx) qkey bad, "
278 		    "actual=0x%x, expected=0x%x",
279 		    enx_advertise_mgid.gid_prefix, enx_advertise_mgid.gid_guid,
280 		    info->ti_advertise_mcg->mc_qkey, EIB_FIP_QKEY);
281 		goto find_mgroups_fail;
282 	}
283 
284 	info->ti_mcg_status |= ENX_MCGS_FOUND;
285 	mutex_exit(&info->ti_mcg_lock);
286 
287 	return (ENX_E_SUCCESS);
288 
289 find_mgroups_fail:
290 	if (info->ti_advertise_mcg) {
291 		ibt_free_mcg_info(info->ti_advertise_mcg, 1);
292 		info->ti_advertise_mcg = NULL;
293 	}
294 	if (info->ti_solicit_mcg) {
295 		ibt_free_mcg_info(info->ti_solicit_mcg, 1);
296 		info->ti_solicit_mcg = NULL;
297 	}
298 	mutex_exit(&info->ti_mcg_lock);
299 
300 	return (ENX_E_FAILURE);
301 }
302 
303 /*
304  * Allocate and setup a single completion queue for tx and rx
305  */
306 int
eibnx_setup_cq(eibnx_thr_info_t * info)307 eibnx_setup_cq(eibnx_thr_info_t *info)
308 {
309 	ibt_hca_attr_t hca_attr;
310 	ibt_cq_attr_t cq_attr;
311 	ibt_status_t ret;
312 	uint_t sz;
313 
314 	/*
315 	 * Get this HCA's attributes
316 	 */
317 	ret = ibt_query_hca(info->ti_hca, &hca_attr);
318 	if (ret != IBT_SUCCESS) {
319 		ENX_DPRINTF_ERR("ibt_query_hca(hca_hdl=0x%llx) failed, ret=%d",
320 		    info->ti_hca, ret);
321 		return (ENX_E_FAILURE);
322 	}
323 
324 	/*
325 	 * Allocate a completion queue for our sends and receives
326 	 */
327 	cq_attr.cq_sched = NULL;
328 	cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
329 	cq_attr.cq_size = (hca_attr.hca_max_cq_sz < ENX_CQ_SIZE) ?
330 	    hca_attr.hca_max_cq_sz : ENX_CQ_SIZE;
331 
332 	ret = ibt_alloc_cq(info->ti_hca, &cq_attr, &info->ti_cq_hdl, &sz);
333 	if (ret != IBT_SUCCESS) {
334 		ENX_DPRINTF_ERR("ibt_alloc_cq(hca_hdl=0x%llx, cq_sz=0x%lx) "
335 		    "failed, ret=%d", info->ti_hca, cq_attr.cq_size, ret);
336 		return (ENX_E_FAILURE);
337 	}
338 
339 	/*
340 	 * Set up other parameters for collecting completion information
341 	 */
342 	info->ti_cq_sz = sz;
343 	info->ti_wc = kmem_zalloc(sizeof (ibt_wc_t) * sz, KM_SLEEP);
344 
345 	return (ENX_E_SUCCESS);
346 }
347 
348 /*
349  * Allocate and setup the UD channel parameters
350  */
351 int
eibnx_setup_ud_channel(eibnx_thr_info_t * info)352 eibnx_setup_ud_channel(eibnx_thr_info_t *info)
353 {
354 	ibt_ud_chan_alloc_args_t alloc_attr;
355 	ibt_ud_chan_query_attr_t query_attr;
356 	ibt_status_t ret;
357 
358 	/*
359 	 * Protect against arbitrary additions to the chan_alloc_args
360 	 * and chan_query_attr structures (make sure the ones we don't
361 	 * use are zero'd).
362 	 */
363 	bzero(&alloc_attr, sizeof (ibt_ud_chan_alloc_args_t));
364 	bzero(&query_attr, sizeof (ibt_ud_chan_query_attr_t));
365 
366 	/*
367 	 * This ud channel is not going to be used by the nexus driver
368 	 * to send any LSO packets, so we won't need the IBT_USES_LSO flag.
369 	 */
370 	alloc_attr.ud_flags = IBT_ALL_SIGNALED;
371 	alloc_attr.ud_hca_port_num = info->ti_pi->p_port_num;
372 
373 	ret = ibt_pkey2index(info->ti_hca, info->ti_pi->p_port_num,
374 	    (ib_pkey_t)EIB_ADMIN_PKEY, &(alloc_attr.ud_pkey_ix));
375 	if (ret != IBT_SUCCESS) {
376 		ENX_DPRINTF_ERR("ibt_pkey2index(hca_hdl=0x%llx, "
377 		    "port_num=0x%x, pkey=0x%x) failed, ret=%d",
378 		    info->ti_hca, info->ti_pi->p_port_num,
379 		    EIB_ADMIN_PKEY, ret);
380 		return (ENX_E_FAILURE);
381 	}
382 
383 	alloc_attr.ud_sizes.cs_sq = ENX_NUM_SWQE;
384 	alloc_attr.ud_sizes.cs_rq = ENX_NUM_RWQE;
385 	alloc_attr.ud_sizes.cs_sq_sgl = 1;
386 	alloc_attr.ud_sizes.cs_rq_sgl = 1;
387 	alloc_attr.ud_sizes.cs_inline = 0;
388 
389 	alloc_attr.ud_qkey = EIB_FIP_QKEY;
390 	alloc_attr.ud_scq = info->ti_cq_hdl;
391 	alloc_attr.ud_rcq = info->ti_cq_hdl;
392 	alloc_attr.ud_pd = info->ti_pd;
393 
394 	ret = ibt_alloc_ud_channel(info->ti_hca, IBT_ACHAN_NO_FLAGS,
395 	    &alloc_attr, &info->ti_chan, NULL);
396 	if (ret != IBT_SUCCESS) {
397 		ENX_DPRINTF_ERR("ibt_alloc_ud_channel(hca_hdl=0x%llx, "
398 		    "cs_sq=0x%lx, cs_rq=0x%lx) failed, ret=%d",
399 		    info->ti_hca, alloc_attr.ud_sizes.cs_sq,
400 		    alloc_attr.ud_sizes.cs_rq, ret);
401 		return (ENX_E_FAILURE);
402 	}
403 
404 	ret = ibt_query_ud_channel(info->ti_chan, &query_attr);
405 	if (ret != IBT_SUCCESS) {
406 		ENX_DPRINTF_ERR("ibt_query_ud_channel(chan_hdl=0x%llx) "
407 		    "failed, ret=%d", info->ti_chan, ret);
408 		if ((ret = ibt_free_channel(info->ti_chan)) != IBT_SUCCESS) {
409 			ENX_DPRINTF_WARN("ibt_free_channel(chan_hdl=0x%llx) "
410 			    "failed, ret=%d", info->ti_chan, ret);
411 		}
412 		info->ti_chan = NULL;
413 		return (ENX_E_FAILURE);
414 	}
415 	info->ti_qpn = query_attr.ud_qpn;
416 
417 	return (ENX_E_SUCCESS);
418 }
419 
420 /*
421  * Set up the transmit buffers for communicating with the gateway. Since
422  * the EoIB Nexus driver only exchanges control messages with the
423  * gateway, we don't really need too much space.
424  */
425 static int
eibnx_setup_txbufs(eibnx_thr_info_t * info)426 eibnx_setup_txbufs(eibnx_thr_info_t *info)
427 {
428 	eibnx_tx_t *snd_p = &info->ti_snd;
429 	eibnx_wqe_t *swqe;
430 	ibt_mr_attr_t attr;
431 	ibt_mr_desc_t desc;
432 	ib_memlen_t tx_bufsz;
433 	ibt_status_t ret;
434 	ibt_ud_dest_hdl_t dest;
435 	uint8_t	*buf;
436 	uint_t mtu = (128 << info->ti_pi->p_mtu);
437 	int i;
438 
439 	/*
440 	 * Allocate for the tx buf
441 	 */
442 	tx_bufsz = ENX_NUM_SWQE * mtu;
443 	snd_p->tx_vaddr = (ib_vaddr_t)(uintptr_t)kmem_zalloc(tx_bufsz,
444 	    KM_SLEEP);
445 
446 	/*
447 	 * Register the memory region with IBTF for use
448 	 */
449 	attr.mr_vaddr = snd_p->tx_vaddr;
450 	attr.mr_len = tx_bufsz;
451 	attr.mr_as = NULL;
452 	attr.mr_flags = IBT_MR_SLEEP;
453 	if ((ret = ibt_register_mr(info->ti_hca, info->ti_pd, &attr,
454 	    &snd_p->tx_mr, &desc)) != IBT_SUCCESS) {
455 		ENX_DPRINTF_ERR("ibt_register_mr() failed for tx "
456 		    "region (0x%llx, 0x%llx) with ret=%d",
457 		    attr.mr_vaddr, attr.mr_len, ret);
458 		kmem_free((void *)(uintptr_t)(snd_p->tx_vaddr), tx_bufsz);
459 		return (ENX_E_FAILURE);
460 	}
461 	snd_p->tx_lkey = desc.md_lkey;
462 
463 	/*
464 	 * Now setup the send wqes
465 	 */
466 	buf = (uint8_t *)(uintptr_t)(snd_p->tx_vaddr);
467 	for (i = 0; i < ENX_NUM_SWQE; i++) {
468 		swqe = &snd_p->tx_wqe[i];
469 
470 		/*
471 		 * Allocate a UD destination handle
472 		 */
473 		ret = ibt_alloc_ud_dest(info->ti_hca, IBT_UD_DEST_NO_FLAGS,
474 		    info->ti_pd, &dest);
475 		if (ret != IBT_SUCCESS) {
476 			ENX_DPRINTF_ERR("ibt_alloc_ud_dest(hca_hdl=0x%llx) "
477 			    "failed, ret=%d", info->ti_hca, ret);
478 			eibnx_rb_setup_txbufs(info);
479 			return (ENX_E_FAILURE);
480 		}
481 
482 		/*
483 		 * We set up everything in the send wqes except initialize
484 		 * the UD destination and the state of the entry. The ds_len
485 		 * should also be adjusted correctly. All this should be
486 		 * done later in the appropriate routines, before posting.
487 		 */
488 		swqe->qe_type = ENX_QETYP_SWQE;
489 		swqe->qe_bufsz = mtu;
490 		swqe->qe_sgl.ds_va = (ib_vaddr_t)(uintptr_t)buf;
491 		swqe->qe_sgl.ds_key = snd_p->tx_lkey;
492 		swqe->qe_sgl.ds_len = swqe->qe_bufsz;
493 		swqe->qe_wr.send.wr_id = (ibt_wrid_t)(uintptr_t)swqe;
494 		swqe->qe_wr.send.wr_flags = IBT_WR_NO_FLAGS;
495 		swqe->qe_wr.send.wr_trans = IBT_UD_SRV;
496 		swqe->qe_wr.send.wr_opcode = IBT_WRC_SEND;
497 		swqe->qe_wr.send.wr_nds = 1;
498 		swqe->qe_wr.send.wr_sgl = &swqe->qe_sgl;
499 		swqe->qe_wr.send.wr.ud.udwr_dest = dest;
500 
501 		mutex_init(&swqe->qe_lock, NULL, MUTEX_DRIVER, NULL);
502 		swqe->qe_flags = 0;
503 
504 		buf += mtu;
505 	}
506 
507 	return (ENX_E_SUCCESS);
508 }
509 
510 /*
511  * Set up bufs for receiving gateway advertisements
512  */
513 static int
eibnx_setup_rxbufs(eibnx_thr_info_t * info)514 eibnx_setup_rxbufs(eibnx_thr_info_t *info)
515 {
516 	eibnx_rx_t *rcv_p = &info->ti_rcv;
517 	eibnx_wqe_t *rwqe;
518 	ibt_mr_attr_t attr;
519 	ibt_mr_desc_t desc;
520 	ib_memlen_t rx_bufsz;
521 	ibt_status_t ret;
522 	uint8_t	*buf;
523 	uint_t mtu = (128 << info->ti_pi->p_mtu);
524 	int i;
525 
526 	/*
527 	 * Allocate for the rx buf
528 	 */
529 	rx_bufsz = ENX_NUM_RWQE * (mtu + ENX_GRH_SZ);
530 	rcv_p->rx_vaddr = (ib_vaddr_t)(uintptr_t)kmem_zalloc(rx_bufsz,
531 	    KM_SLEEP);
532 
533 	attr.mr_vaddr = rcv_p->rx_vaddr;
534 	attr.mr_len = rx_bufsz;
535 	attr.mr_as = NULL;
536 	attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
537 	if ((ret = ibt_register_mr(info->ti_hca, info->ti_pd, &attr,
538 	    &rcv_p->rx_mr, &desc)) != IBT_SUCCESS) {
539 		ENX_DPRINTF_ERR("ibt_register_mr() failed for rx "
540 		    "region (0x%llx, 0x%llx) with ret=%d",
541 		    attr.mr_vaddr, attr.mr_len, ret);
542 		kmem_free((void *)(uintptr_t)(rcv_p->rx_vaddr), rx_bufsz);
543 		return (ENX_E_FAILURE);
544 	}
545 	rcv_p->rx_lkey = desc.md_lkey;
546 
547 	buf = (uint8_t *)(uintptr_t)(rcv_p->rx_vaddr);
548 	for (i = 0; i < ENX_NUM_RWQE; i++) {
549 		rwqe = &rcv_p->rx_wqe[i];
550 
551 		rwqe->qe_type = ENX_QETYP_RWQE;
552 		rwqe->qe_bufsz = mtu + ENX_GRH_SZ;
553 		rwqe->qe_sgl.ds_va = (ib_vaddr_t)(uintptr_t)buf;
554 		rwqe->qe_sgl.ds_key = rcv_p->rx_lkey;
555 		rwqe->qe_sgl.ds_len = rwqe->qe_bufsz;
556 		rwqe->qe_wr.recv.wr_id = (ibt_wrid_t)(uintptr_t)rwqe;
557 		rwqe->qe_wr.recv.wr_nds = 1;
558 		rwqe->qe_wr.recv.wr_sgl = &rwqe->qe_sgl;
559 
560 		mutex_init(&rwqe->qe_lock, NULL, MUTEX_DRIVER, NULL);
561 		rwqe->qe_flags = 0;
562 
563 		buf += (mtu + ENX_GRH_SZ);
564 	}
565 
566 	return (ENX_E_SUCCESS);
567 }
568 
569 /*
570  * Set up transmit and receive buffers and post the receive buffers
571  */
572 int
eibnx_setup_bufs(eibnx_thr_info_t * info)573 eibnx_setup_bufs(eibnx_thr_info_t *info)
574 {
575 	eibnx_rx_t *rcv_p = &info->ti_rcv;
576 	eibnx_wqe_t *rwqe;
577 	ibt_status_t ret;
578 	int i;
579 
580 	if (eibnx_setup_txbufs(info) != ENX_E_SUCCESS)
581 		return (ENX_E_FAILURE);
582 
583 	if (eibnx_setup_rxbufs(info) != ENX_E_SUCCESS) {
584 		eibnx_rb_setup_txbufs(info);
585 		return (ENX_E_FAILURE);
586 	}
587 
588 	for (i = 0; i < ENX_NUM_RWQE; i++) {
589 		rwqe = &rcv_p->rx_wqe[i];
590 
591 		mutex_enter(&rwqe->qe_lock);
592 
593 		rwqe->qe_flags |= (ENX_QEFL_INUSE | ENX_QEFL_POSTED);
594 		ret = ibt_post_recv(info->ti_chan, &(rwqe->qe_wr.recv), 1,
595 		    NULL);
596 
597 		mutex_exit(&rwqe->qe_lock);
598 
599 		if (ret != IBT_SUCCESS) {
600 			ENX_DPRINTF_ERR("ibt_post_recv(chan_hdl=0x%llx) "
601 			    "failed, ret=%d", info->ti_chan, ret);
602 
603 			ret = ibt_flush_channel(info->ti_chan);
604 			if (ret != IBT_SUCCESS) {
605 				ENX_DPRINTF_WARN("ibt_flush_channel"
606 				    "(chan_hdl=0x%llx) failed, ret=%d",
607 				    info->ti_chan, ret);
608 			}
609 
610 			eibnx_rb_setup_rxbufs(info);
611 			eibnx_rb_setup_txbufs(info);
612 			return (ENX_E_FAILURE);
613 		}
614 	}
615 
616 	return (ENX_E_SUCCESS);
617 }
618 
619 /*
620  * Set up the completion queue handler.  While we don't quit if  we cannot
621  * use soft interrupts, that path is really unreliable and untested.
622  */
623 int
eibnx_setup_cq_handler(eibnx_thr_info_t * info)624 eibnx_setup_cq_handler(eibnx_thr_info_t *info)
625 {
626 	eibnx_t *ss = enx_global_ss;
627 	ibt_status_t ret;
628 	int rv;
629 
630 	/*
631 	 * We'll try to use a softintr if possible.  If not, it's not
632 	 * fatal, we'll try and use the completion handler directly from
633 	 * the interrupt handler.
634 	 */
635 
636 	rv = ddi_intr_add_softint(ss->nx_dip, &info->ti_softint_hdl,
637 	    EIB_SOFTPRI_ADM, eibnx_comp_handler, info);
638 	if (rv != DDI_SUCCESS) {
639 		ENX_DPRINTF_WARN("ddi_intr_add_softint(dip=0x%llx) "
640 		    "failed, ret=%d", ss->nx_dip, rv);
641 	}
642 
643 	ibt_set_cq_handler(info->ti_cq_hdl, eibnx_comp_intr, info);
644 
645 	ret = ibt_enable_cq_notify(info->ti_cq_hdl, IBT_NEXT_COMPLETION);
646 	if (ret != IBT_SUCCESS) {
647 		ENX_DPRINTF_WARN("ibt_enable_cq_notify(cq_hdl=0x%llx) "
648 		    "failed, ret=%d", info->ti_cq_hdl, ret);
649 		if (info->ti_softint_hdl) {
650 			(void) ddi_intr_remove_softint(info->ti_softint_hdl);
651 			info->ti_softint_hdl = NULL;
652 		}
653 		return (ENX_E_FAILURE);
654 	}
655 
656 	return (ENX_E_SUCCESS);
657 }
658 
659 /*
660  * Join the solicit multicast group (All-EoIB-GWs-GID) as a full member
661  */
662 static int
eibnx_join_solicit_mcg(eibnx_thr_info_t * info)663 eibnx_join_solicit_mcg(eibnx_thr_info_t *info)
664 {
665 	ib_gid_t rgid = info->ti_pi->p_sgid_tbl[0];
666 	ibt_mcg_attr_t mcg_attr;
667 	ibt_mcg_info_t mcg_info;
668 	ibt_status_t ret;
669 
670 	bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
671 
672 	mcg_attr.mc_mgid = enx_solicit_mgid;
673 	mcg_attr.mc_qkey = (ib_qkey_t)EIB_FIP_QKEY;
674 	mcg_attr.mc_pkey = (ib_pkey_t)EIB_ADMIN_PKEY;
675 	mcg_attr.mc_join_state = IB_MC_JSTATE_FULL;
676 	mcg_attr.mc_flow = info->ti_solicit_mcg->mc_adds_vect.av_flow;
677 	mcg_attr.mc_tclass = info->ti_solicit_mcg->mc_adds_vect.av_tclass;
678 	mcg_attr.mc_sl = info->ti_solicit_mcg->mc_adds_vect.av_srvl;
679 	mcg_attr.mc_scope = IB_MC_SCOPE_SUBNET_LOCAL;
680 
681 	/*
682 	 * We only need to send to solicit mcg, so we only need to join
683 	 * the multicast group, no need to attach our qp to it
684 	 */
685 	ret = ibt_join_mcg(rgid, &mcg_attr, &mcg_info, NULL, NULL);
686 	if (ret != IBT_SUCCESS) {
687 		ENX_DPRINTF_ERR("ibt_join_mcg() failed for solicit "
688 		    "mgid=%llx.%llx, ret=%x", enx_solicit_mgid.gid_prefix,
689 		    enx_solicit_mgid.gid_guid, ret);
690 		return (ENX_E_FAILURE);
691 	}
692 
693 	/*
694 	 * We can throw away the old mcg info we got when we queried
695 	 * for the mcg and use the new one. They both should be the
696 	 * same, really.
697 	 */
698 	if (info->ti_solicit_mcg) {
699 		bcopy(&mcg_info, info->ti_solicit_mcg,
700 		    sizeof (ibt_mcg_info_t));
701 	}
702 
703 	return (ENX_E_SUCCESS);
704 }
705 
706 /*
707  * Join and attach to the advertise multicast group (All-EoIB-ENodes-GID)
708  * to receive unsolicitied advertisements from the gateways.
709  */
710 static int
eibnx_join_advertise_mcg(eibnx_thr_info_t * info)711 eibnx_join_advertise_mcg(eibnx_thr_info_t *info)
712 {
713 	ib_gid_t rgid = info->ti_pi->p_sgid_tbl[0];
714 	ibt_mcg_attr_t mcg_attr;
715 	ibt_mcg_info_t mcg_info;
716 	ibt_status_t ret;
717 
718 	if (info->ti_chan == NULL)
719 		return (ENX_E_FAILURE);
720 
721 	bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
722 
723 	mcg_attr.mc_mgid = enx_advertise_mgid;
724 	mcg_attr.mc_qkey = (ib_qkey_t)EIB_FIP_QKEY;
725 	mcg_attr.mc_pkey = (ib_pkey_t)EIB_ADMIN_PKEY;
726 	mcg_attr.mc_join_state = IB_MC_JSTATE_FULL;
727 	mcg_attr.mc_flow = info->ti_advertise_mcg->mc_adds_vect.av_flow;
728 	mcg_attr.mc_tclass = info->ti_advertise_mcg->mc_adds_vect.av_tclass;
729 	mcg_attr.mc_sl = info->ti_advertise_mcg->mc_adds_vect.av_srvl;
730 	mcg_attr.mc_scope = IB_MC_SCOPE_SUBNET_LOCAL;
731 
732 	ret = ibt_join_mcg(rgid, &mcg_attr, &mcg_info, NULL, NULL);
733 	if (ret != IBT_SUCCESS) {
734 		ENX_DPRINTF_ERR("ibt_join_mcg() failed for advertise "
735 		    "mgid=%llx.%llx, ret=%x", enx_advertise_mgid.gid_prefix,
736 		    enx_advertise_mgid.gid_guid, ret);
737 		return (ENX_E_FAILURE);
738 	}
739 
740 	/*
741 	 * We can throw away the old mcg info we got when we queried
742 	 * for the mcg and use the new one. They both should be the
743 	 * same, really.
744 	 */
745 	if (info->ti_advertise_mcg) {
746 		bcopy(&mcg_info, info->ti_advertise_mcg,
747 		    sizeof (ibt_mcg_info_t));
748 	}
749 
750 	/*
751 	 * Since we need to receive advertisements, we'll attach our qp
752 	 * to the advertise mcg
753 	 */
754 	ret = ibt_attach_mcg(info->ti_chan, info->ti_advertise_mcg);
755 	if (ret != IBT_SUCCESS) {
756 		ENX_DPRINTF_ERR("ibt_attach_mcg(chan_hdl=0x%llx, "
757 		    "advt_mcg=0x%llx) failed, ret=%d", info->ti_chan,
758 		    info->ti_advertise_mcg, ret);
759 		return (ENX_E_FAILURE);
760 	}
761 
762 	return (ENX_E_SUCCESS);
763 }
764 
765 /*
766  * Join the multicast groups we're interested in
767  */
768 int
eibnx_join_mcgs(eibnx_thr_info_t * info)769 eibnx_join_mcgs(eibnx_thr_info_t *info)
770 {
771 	mutex_enter(&info->ti_mcg_lock);
772 
773 	/*
774 	 * We should've located the mcg first
775 	 */
776 	if ((info->ti_mcg_status & ENX_MCGS_FOUND) == 0) {
777 		mutex_exit(&info->ti_mcg_lock);
778 		return (ENX_E_FAILURE);
779 	}
780 
781 	/*
782 	 * If we're already joined to the mcgs, we must leave first
783 	 */
784 	if ((info->ti_mcg_status & ENX_MCGS_JOINED) == ENX_MCGS_JOINED) {
785 		mutex_exit(&info->ti_mcg_lock);
786 		return (ENX_E_FAILURE);
787 	}
788 
789 	/*
790 	 * Join the two mcgs
791 	 */
792 	if (eibnx_join_advertise_mcg(info) != ENX_E_SUCCESS) {
793 		mutex_exit(&info->ti_mcg_lock);
794 		return (ENX_E_FAILURE);
795 	}
796 	if (eibnx_join_solicit_mcg(info) != ENX_E_SUCCESS) {
797 		eibnx_rb_join_advertise_mcg(info);
798 		mutex_exit(&info->ti_mcg_lock);
799 		return (ENX_E_FAILURE);
800 	}
801 
802 	info->ti_mcg_status |= ENX_MCGS_JOINED;
803 	mutex_exit(&info->ti_mcg_lock);
804 
805 	return (ENX_E_SUCCESS);
806 }
807 
808 int
eibnx_rejoin_mcgs(eibnx_thr_info_t * info)809 eibnx_rejoin_mcgs(eibnx_thr_info_t *info)
810 {
811 	/*
812 	 * Lookup the MCGs again and join them
813 	 */
814 	eibnx_rb_join_mcgs(info);
815 	eibnx_rb_find_mgroups(info);
816 
817 	if (eibnx_find_mgroups(info) != ENX_E_SUCCESS)
818 		return (ENX_E_FAILURE);
819 
820 	if (eibnx_join_mcgs(info) != ENX_E_SUCCESS)
821 		return (ENX_E_FAILURE);
822 
823 	return (ENX_E_SUCCESS);
824 }
825 
826 int
eibnx_ibt_fini(eibnx_t * ss)827 eibnx_ibt_fini(eibnx_t *ss)
828 {
829 	return (eibnx_rb_ibt_init(ss));
830 }
831 
832 static int
eibnx_rb_ibt_init(eibnx_t * ss)833 eibnx_rb_ibt_init(eibnx_t *ss)
834 {
835 	eibnx_hca_t *hca;
836 	eibnx_hca_t *hca_next;
837 	eibnx_hca_t *hca_list;
838 	ibt_status_t	ret;
839 
840 	/*
841 	 * Disable subnet notices callbacks
842 	 */
843 	ibt_register_subnet_notices(ss->nx_ibt_hdl, NULL, NULL);
844 
845 	/*
846 	 * Remove the hca list from the state structure
847 	 */
848 	mutex_enter(&ss->nx_lock);
849 	hca_list = ss->nx_hca;
850 	ss->nx_hca = NULL;
851 	mutex_exit(&ss->nx_lock);
852 
853 	/*
854 	 * For each HCA in the list, free up the portinfo/port structs,
855 	 * free the pd, close the hca handle and release the hca struct.
856 	 * If something goes wrong, try to put back whatever good remains
857 	 * back on the hca list and return failure.
858 	 */
859 	for (hca = hca_list; hca; hca = hca_next) {
860 		hca_next = hca->hc_next;
861 		if (eibnx_cleanup_hca(hca) != ENX_E_SUCCESS) {
862 			mutex_enter(&ss->nx_lock);
863 			ss->nx_hca = hca_next;
864 			mutex_exit(&ss->nx_lock);
865 			return (ENX_E_FAILURE);
866 		}
867 	}
868 
869 	if ((ret = ibt_detach(ss->nx_ibt_hdl)) != IBT_SUCCESS) {
870 		ENX_DPRINTF_WARN("ibt_detach(ibt_hdl=0x%llx) "
871 		    "failed, ret=%d", ss->nx_ibt_hdl, ret);
872 		return (ENX_E_FAILURE);
873 	}
874 	ss->nx_ibt_hdl = NULL;
875 
876 	eibnx_rb_state_init();
877 
878 	return (ENX_E_SUCCESS);
879 }
880 
881 static void
eibnx_rb_state_init(void)882 eibnx_rb_state_init(void)
883 {
884 	eibnx_t *ss = enx_global_ss;
885 	kt_did_t thr_id;
886 
887 	/*
888 	 * Ask the eoib node creation thread to die and wait for
889 	 * it to happen
890 	 */
891 	mutex_enter(&ss->nx_nodeq_lock);
892 
893 	thr_id = ss->nx_nodeq_kt_did;
894 	ss->nx_nodeq_thr_die = 1;
895 	ss->nx_nodeq_kt_did = 0;
896 
897 	cv_signal(&ss->nx_nodeq_cv);
898 	mutex_exit(&ss->nx_nodeq_lock);
899 
900 	if (thr_id) {
901 		thread_join(thr_id);
902 	}
903 
904 	cv_destroy(&ss->nx_busop_cv);
905 	mutex_destroy(&ss->nx_busop_lock);
906 	cv_destroy(&ss->nx_nodeq_cv);
907 	mutex_destroy(&ss->nx_nodeq_lock);
908 	mutex_destroy(&ss->nx_lock);
909 }
910 
911 void
eibnx_rb_find_mgroups(eibnx_thr_info_t * info)912 eibnx_rb_find_mgroups(eibnx_thr_info_t *info)
913 {
914 	mutex_enter(&info->ti_mcg_lock);
915 	if ((info->ti_mcg_status & ENX_MCGS_FOUND) == ENX_MCGS_FOUND) {
916 		if (info->ti_advertise_mcg) {
917 			ibt_free_mcg_info(info->ti_advertise_mcg, 1);
918 			info->ti_advertise_mcg = NULL;
919 		}
920 		if (info->ti_solicit_mcg) {
921 			ibt_free_mcg_info(info->ti_solicit_mcg, 1);
922 			info->ti_solicit_mcg = NULL;
923 		}
924 		info->ti_mcg_status &= (~ENX_MCGS_FOUND);
925 	}
926 	mutex_exit(&info->ti_mcg_lock);
927 }
928 
929 void
eibnx_rb_setup_cq(eibnx_thr_info_t * info)930 eibnx_rb_setup_cq(eibnx_thr_info_t *info)
931 {
932 	ibt_status_t ret;
933 
934 	if (info->ti_wc && info->ti_cq_sz)
935 		kmem_free(info->ti_wc, sizeof (ibt_wc_t) * info->ti_cq_sz);
936 
937 	info->ti_cq_sz = 0;
938 	info->ti_wc = NULL;
939 
940 	if (info->ti_cq_hdl) {
941 		ret = ibt_free_cq(info->ti_cq_hdl);
942 		if (ret != IBT_SUCCESS) {
943 			ENX_DPRINTF_WARN("ibt_free_cq(cq_hdl=0x%llx) "
944 			    "failed, ret=%d", info->ti_cq_hdl, ret);
945 		}
946 		info->ti_cq_hdl = NULL;
947 	}
948 }
949 
950 void
eibnx_rb_setup_ud_channel(eibnx_thr_info_t * info)951 eibnx_rb_setup_ud_channel(eibnx_thr_info_t *info)
952 {
953 	ibt_status_t ret;
954 
955 	if ((ret = ibt_free_channel(info->ti_chan)) != IBT_SUCCESS) {
956 		ENX_DPRINTF_WARN("ibt_free_channel(chan=0x%llx) "
957 		    "failed, ret=%d", info->ti_chan, ret);
958 	}
959 	info->ti_chan = NULL;
960 	info->ti_qpn = 0;
961 }
962 
963 static void
eibnx_rb_setup_txbufs(eibnx_thr_info_t * info)964 eibnx_rb_setup_txbufs(eibnx_thr_info_t *info)
965 {
966 	eibnx_tx_t *snd_p = &info->ti_snd;
967 	eibnx_wqe_t *swqe;
968 	ibt_status_t ret;
969 	int i;
970 	uint_t mtu = (128 << info->ti_pi->p_mtu);
971 
972 	/*
973 	 * Release any UD destination handle we may have allocated.  Note that
974 	 * the per swqe lock would've been initialized only if we were able to
975 	 * allocate the UD dest handle.
976 	 */
977 	for (i = 0; i < ENX_NUM_SWQE; i++) {
978 		swqe = &snd_p->tx_wqe[i];
979 
980 		if (swqe->qe_wr.send.wr.ud.udwr_dest) {
981 			mutex_destroy(&swqe->qe_lock);
982 
983 			ret =
984 			    ibt_free_ud_dest(swqe->qe_wr.send.wr.ud.udwr_dest);
985 			if (ret != IBT_SUCCESS) {
986 				ENX_DPRINTF_WARN("ibt_free_ud_dest(dest=0x%llx)"
987 				    " failed, ret=%d",
988 				    swqe->qe_wr.send.wr.ud.udwr_dest, ret);
989 			}
990 		}
991 	}
992 
993 	/*
994 	 * Clear all the workq entries
995 	 */
996 	bzero(snd_p->tx_wqe, sizeof (eibnx_wqe_t) * ENX_NUM_SWQE);
997 
998 	/*
999 	 * Clear Lkey and deregister any memory region we may have
1000 	 * registered earlier
1001 	 */
1002 	snd_p->tx_lkey = 0;
1003 	if (snd_p->tx_mr) {
1004 		if ((ret = ibt_deregister_mr(info->ti_hca,
1005 		    snd_p->tx_mr)) != IBT_SUCCESS) {
1006 			ENX_DPRINTF_WARN("ibt_deregister_TXmr(hca_hdl=0x%llx,"
1007 			    "mr=0x%llx) failed, ret=%d", info->ti_hca,
1008 			    snd_p->tx_mr, ret);
1009 		}
1010 		snd_p->tx_mr = NULL;
1011 	}
1012 
1013 	/*
1014 	 * Release any memory allocated for the tx bufs
1015 	 */
1016 	if (snd_p->tx_vaddr) {
1017 		kmem_free((void *)(uintptr_t)(snd_p->tx_vaddr),
1018 		    ENX_NUM_SWQE * mtu);
1019 		snd_p->tx_vaddr = 0;
1020 	}
1021 
1022 }
1023 
1024 static void
eibnx_rb_setup_rxbufs(eibnx_thr_info_t * info)1025 eibnx_rb_setup_rxbufs(eibnx_thr_info_t *info)
1026 {
1027 	eibnx_rx_t *rcv_p = &info->ti_rcv;
1028 	eibnx_wqe_t *rwqe;
1029 	ibt_status_t ret;
1030 	uint_t mtu = (128 << info->ti_pi->p_mtu);
1031 	int i;
1032 
1033 	for (i = 0; i < ENX_NUM_RWQE; i++) {
1034 		rwqe = &rcv_p->rx_wqe[i];
1035 		mutex_destroy(&rwqe->qe_lock);
1036 	}
1037 	bzero(rcv_p->rx_wqe, sizeof (eibnx_wqe_t) * ENX_NUM_RWQE);
1038 
1039 	rcv_p->rx_lkey = 0;
1040 
1041 	if ((ret = ibt_deregister_mr(info->ti_hca,
1042 	    rcv_p->rx_mr)) != IBT_SUCCESS) {
1043 		ENX_DPRINTF_WARN("ibt_deregister_RXmr(hca_hdl=0x%llx,"
1044 		    "mr=0x%llx) failed, ret=%d", info->ti_hca,
1045 		    rcv_p->rx_mr, ret);
1046 	}
1047 	rcv_p->rx_mr = NULL;
1048 
1049 	kmem_free((void *)(uintptr_t)(rcv_p->rx_vaddr),
1050 	    ENX_NUM_RWQE * (mtu + ENX_GRH_SZ));
1051 	rcv_p->rx_vaddr = 0;
1052 }
1053 
1054 void
eibnx_rb_setup_bufs(eibnx_thr_info_t * info)1055 eibnx_rb_setup_bufs(eibnx_thr_info_t *info)
1056 {
1057 	ibt_status_t ret;
1058 
1059 	if ((ret = ibt_flush_channel(info->ti_chan)) != IBT_SUCCESS) {
1060 		ENX_DPRINTF_WARN("ibt_flush_channel(chan_hdl=0x%llx) "
1061 		    "failed, ret=%d", info->ti_chan, ret);
1062 	}
1063 
1064 	eibnx_rb_setup_rxbufs(info);
1065 
1066 	eibnx_rb_setup_txbufs(info);
1067 }
1068 
1069 void
eibnx_rb_setup_cq_handler(eibnx_thr_info_t * info)1070 eibnx_rb_setup_cq_handler(eibnx_thr_info_t *info)
1071 {
1072 	ibt_set_cq_handler(info->ti_cq_hdl, NULL, NULL);
1073 
1074 	if (info->ti_softint_hdl) {
1075 		(void) ddi_intr_remove_softint(info->ti_softint_hdl);
1076 		info->ti_softint_hdl = NULL;
1077 	}
1078 }
1079 
1080 static void
eibnx_rb_join_solicit_mcg(eibnx_thr_info_t * info)1081 eibnx_rb_join_solicit_mcg(eibnx_thr_info_t *info)
1082 {
1083 	ib_gid_t rgid = info->ti_pi->p_sgid_tbl[0];
1084 	ib_gid_t rsvd_gid;
1085 	ibt_status_t ret;
1086 
1087 	rsvd_gid.gid_prefix = 0;
1088 	rsvd_gid.gid_guid = 0;
1089 
1090 	ret = ibt_leave_mcg(rgid, enx_solicit_mgid,
1091 	    rsvd_gid, IB_MC_JSTATE_FULL);
1092 	if (ret != IBT_SUCCESS) {
1093 		ENX_DPRINTF_WARN("ibt_leave_mcg(slct_mgid=%llx.%llx) "
1094 		    "failed, ret=%d", enx_solicit_mgid.gid_prefix,
1095 		    enx_solicit_mgid.gid_guid, ret);
1096 	}
1097 }
1098 
1099 static void
eibnx_rb_join_advertise_mcg(eibnx_thr_info_t * info)1100 eibnx_rb_join_advertise_mcg(eibnx_thr_info_t *info)
1101 {
1102 	ib_gid_t rgid = info->ti_pi->p_sgid_tbl[0];
1103 	ib_gid_t rsvd_gid;
1104 	ibt_status_t ret;
1105 
1106 	ret = ibt_detach_mcg(info->ti_chan, info->ti_advertise_mcg);
1107 	if (ret != IBT_SUCCESS) {
1108 		ENX_DPRINTF_WARN("ibt_detach_mcg(chan_hdl=0x%llx, "
1109 		    "advt_mcg=0x%llx) failed, ret=%d",
1110 		    info->ti_chan, info->ti_advertise_mcg, ret);
1111 	}
1112 
1113 	rsvd_gid.gid_prefix = 0;
1114 	rsvd_gid.gid_guid = 0;
1115 
1116 	ret = ibt_leave_mcg(rgid, enx_advertise_mgid,
1117 	    rsvd_gid, IB_MC_JSTATE_FULL);
1118 	if (ret != IBT_SUCCESS) {
1119 		ENX_DPRINTF_WARN("ibt_leave_mcg(advt_mgid=%llx.%llx) "
1120 		    "failed, ret=%d", enx_advertise_mgid.gid_prefix,
1121 		    enx_advertise_mgid.gid_guid, ret);
1122 	}
1123 }
1124 
1125 void
eibnx_rb_join_mcgs(eibnx_thr_info_t * info)1126 eibnx_rb_join_mcgs(eibnx_thr_info_t *info)
1127 {
1128 	mutex_enter(&info->ti_mcg_lock);
1129 	if ((info->ti_mcg_status & ENX_MCGS_JOINED) == ENX_MCGS_JOINED) {
1130 		eibnx_rb_join_solicit_mcg(info);
1131 		eibnx_rb_join_advertise_mcg(info);
1132 
1133 		info->ti_mcg_status &= (~ENX_MCGS_JOINED);
1134 	}
1135 	mutex_exit(&info->ti_mcg_lock);
1136 }
1137 
1138 eibnx_hca_t *
eibnx_prepare_hca(ib_guid_t hca_guid)1139 eibnx_prepare_hca(ib_guid_t hca_guid)
1140 {
1141 	eibnx_t *ss = enx_global_ss;
1142 	eibnx_hca_t *hca;
1143 	eibnx_port_t *port;
1144 	eibnx_port_t *port_tail;
1145 	ibt_hca_hdl_t hca_hdl;
1146 	ibt_pd_hdl_t pd_hdl;
1147 	ibt_hca_portinfo_t *pi;
1148 	uint_t num_pi;
1149 	uint_t size_pi;
1150 	ibt_hca_attr_t hca_attr;
1151 	ibt_status_t ret;
1152 	int i;
1153 
1154 	ret = ibt_open_hca(ss->nx_ibt_hdl, hca_guid, &hca_hdl);
1155 	if (ret != IBT_SUCCESS) {
1156 		ENX_DPRINTF_ERR("ibt_open_hca(hca_guid=0x%llx) "
1157 		    "failed, ret=%d", hca_guid, ret);
1158 		return (NULL);
1159 	}
1160 
1161 	bzero(&hca_attr, sizeof (ibt_hca_attr_t));
1162 	if ((ret = ibt_query_hca(hca_hdl, &hca_attr)) != IBT_SUCCESS) {
1163 		ENX_DPRINTF_ERR("ibt_query_hca(hca_hdl=0x%llx, "
1164 		    "hca_guid=0x%llx) failed, ret=%d",
1165 		    hca_hdl, hca_guid, ret);
1166 
1167 		if ((ret = ibt_close_hca(hca_hdl)) != IBT_SUCCESS) {
1168 			ENX_DPRINTF_WARN("ibt_close_hca(hca_hdl=0x%llx) "
1169 			    "failed, ret=%d", hca_hdl, ret);
1170 		}
1171 		return (NULL);
1172 	}
1173 
1174 	ret = ibt_alloc_pd(hca_hdl, IBT_PD_NO_FLAGS, &pd_hdl);
1175 	if (ret != IBT_SUCCESS) {
1176 		ENX_DPRINTF_ERR("ibt_alloc_pd(hca_hdl=0x%llx, "
1177 		    "hca_guid=0x%llx) failed, ret=%d",
1178 		    hca_hdl, hca_guid, ret);
1179 
1180 		if ((ret = ibt_close_hca(hca_hdl)) != IBT_SUCCESS) {
1181 			ENX_DPRINTF_WARN("ibt_close_hca(hca_hdl=0x%llx) "
1182 			    "failed, ret=%d", hca_hdl, ret);
1183 		}
1184 		return (NULL);
1185 	}
1186 
1187 	/*
1188 	 * We have all the information we want about this hca, create
1189 	 * a new struct and return it.
1190 	 */
1191 	hca = kmem_zalloc(sizeof (eibnx_hca_t), KM_SLEEP);
1192 	hca->hc_next = NULL;
1193 	hca->hc_guid = hca_guid;
1194 	hca->hc_hdl = hca_hdl;
1195 	hca->hc_pd = pd_hdl;
1196 	hca->hc_port = port_tail = NULL;
1197 
1198 	for (i = 0; i < hca_attr.hca_nports; i++) {
1199 		ret = ibt_query_hca_ports(hca_hdl, i + 1, &pi,
1200 		    &num_pi, &size_pi);
1201 		if (ret != IBT_SUCCESS) {
1202 			ENX_DPRINTF_WARN("ibt_query_hca_ports(hca_hdl=0x%llx, "
1203 			    "port=0x%x) failed, ret=%d", hca_hdl, i + 1, ret);
1204 		} else {
1205 			port = kmem_zalloc(sizeof (eibnx_port_t), KM_SLEEP);
1206 			port->po_next = NULL;
1207 			port->po_pi = pi;
1208 			port->po_pi_size = size_pi;
1209 
1210 			if (port_tail) {
1211 				port_tail->po_next = port;
1212 			} else {
1213 				hca->hc_port = port;
1214 			}
1215 			port_tail = port;
1216 		}
1217 	}
1218 
1219 	/*
1220 	 * If we couldn't query about any ports on the HCA, return failure
1221 	 */
1222 	if (hca->hc_port == NULL) {
1223 		ENX_DPRINTF_ERR("all hca port queries failed for "
1224 		    "hca_guid=0x%llx", hca_guid);
1225 		(void) eibnx_cleanup_hca(hca);
1226 		return (NULL);
1227 	}
1228 
1229 	return (hca);
1230 }
1231 
1232 int
eibnx_cleanup_hca(eibnx_hca_t * hca)1233 eibnx_cleanup_hca(eibnx_hca_t *hca)
1234 {
1235 	eibnx_port_t *port;
1236 	eibnx_port_t *port_next;
1237 	ibt_status_t ret;
1238 
1239 	for (port = hca->hc_port; port; port = port_next) {
1240 		port_next = port->po_next;
1241 
1242 		ibt_free_portinfo(port->po_pi, port->po_pi_size);
1243 		kmem_free(port, sizeof (eibnx_port_t));
1244 	}
1245 
1246 	if ((ret = ibt_free_pd(hca->hc_hdl, hca->hc_pd)) != IBT_SUCCESS) {
1247 		ENX_DPRINTF_WARN("ibt_free_pd(hca_hdl=0x%lx, pd_hd=0x%lx) "
1248 		    "failed, ret=%d", hca->hc_hdl, hca->hc_pd, ret);
1249 		return (ENX_E_FAILURE);
1250 	}
1251 
1252 	if ((ret = ibt_close_hca(hca->hc_hdl)) != IBT_SUCCESS) {
1253 		ENX_DPRINTF_WARN("ibt_close_hca(hca_hdl=0x%lx) failed, "
1254 		    "ret=%d", hca->hc_hdl, ret);
1255 		return (ENX_E_FAILURE);
1256 	}
1257 
1258 	kmem_free(hca, sizeof (eibnx_hca_t));
1259 
1260 	return (ENX_E_SUCCESS);
1261 }
1262