xref: /illumos-gate/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c (revision 015f8fff605f2fbd5fd0072e555576297804d57b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/ib/mgt/ibcm/ibcm_impl.h>
29 #include <sys/ib/ibtl/ibti.h>
30 #include <sys/ib/mgt/ibcm/ibcm_arp.h>
31 
32 /*
33  * ibcm_ti.c
34  *	These routines implement the Communication Manager's interfaces to IBTL.
35  */
36 
37 /* CM rc recycle task args structure definition */
38 typedef struct ibcm_taskq_recycle_arg_s {
39 	ibt_channel_hdl_t	rc_chan;
40 	ibt_cep_flags_t		control;
41 	uint8_t			hca_port_num;
42 	ibt_recycle_handler_t	func;
43 	void			*arg;
44 } ibcm_taskq_recycle_arg_t;
45 
46 _NOTE(READ_ONLY_DATA(ibcm_taskq_recycle_arg_s))
47 
48 static ibt_status_t	ibcm_init_reply_addr(ibcm_hca_info_t *hcap,
49     ibcm_mad_addr_t *reply_addr, ibt_chan_open_args_t *chan_args,
50     ibt_chan_open_flags_t flags, ib_time_t *cm_pkt_lt, ib_lid_t prim_slid);
51 static void		ibcm_process_abort_via_taskq(void *args);
52 static ibt_status_t	ibcm_process_rc_recycle_ret(void *recycle_arg);
53 static ibt_status_t	ibcm_process_join_mcg(void *taskq_arg);
54 static void		ibcm_process_async_join_mcg(void *tq_arg);
55 
56 static ibt_status_t ibcm_get_node_rec(ibmf_saa_handle_t, sa_node_record_t *,
57     uint64_t c_mask, void *, size_t *);
58 
59 static ibt_status_t ibcm_close_rc_channel(ibt_channel_hdl_t channel,
60     ibcm_state_data_t *statep, ibt_execution_mode_t mode);
61 
62 /* Address Record management definitions */
63 #define	IBCM_DAPL_ATS_NAME	"DAPL Address Translation Service"
64 #define	IBCM_DAPL_ATS_SID	0x10000CE100415453ULL
65 #define	IBCM_DAPL_ATS_NBYTES	16
66 ibcm_svc_info_t *ibcm_ar_svcinfop;
67 ibcm_ar_t	*ibcm_ar_list;
68 
69 /*
70  * Tunable parameter to turnoff the overriding of pi_path_mtu value.
71  *	1 	By default override the path record's pi_path_mtu value to
72  *		IB_MTU_1K for all RC channels. This is done only for the
73  *		channels established on Tavor HCA and the path's pi_path_mtu
74  *		is greater than IB_MTU_1K.
75  *	0	Do not override, use pi_path_mtu by default.
76  */
77 int	ibcm_override_path_mtu = 1;
78 
79 #ifdef DEBUG
80 static void	ibcm_print_reply_addr(ibt_channel_hdl_t channel,
81 		    ibcm_mad_addr_t *cm_reply_addr);
82 #endif
83 
84 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_port_info_s::{port_ibmf_hdl}))
85 
86 /* access is controlled between ibcm_sm.c and ibcm_ti.c by CVs */
87 _NOTE(SCHEME_PROTECTS_DATA("Serialized access by CV", {ibt_rc_returns_t
88     ibt_ud_returns_t ibt_ap_returns_t ibt_ar_t}))
89 
90 /*
91  * Typically, clients initialize these args in one api call, and use in
92  * another api
93  */
94 _NOTE(SCHEME_PROTECTS_DATA("Expected usage of ibtl api by client",
95     {ibt_path_info_s ibt_cep_path_s ibt_adds_vect_s ibt_mcg_info_s ib_gid_s
96     ibt_ud_dest_attr_s ibt_ud_dest_s ibt_srv_data_s ibt_redirect_info_s}))
97 
98 /*
99  * ibt_open_rc_channel()
100  *	ibt_open_rc_channel opens a communication channel on the specified
101  *	channel to the specified service. For connection service type qp's
102  *	the CM initiates the CEP to establish the connection and transitions
103  *	the QP/EEC to the "Ready to send" State modifying the QP/EEC's
104  *	attributes as necessary.
105  *	The implementation of this function assumes that alt path is different
106  *	from primary path. It is assumed that the Path functions ensure that.
107  *
108  * RETURN VALUES:
109  *	IBT_SUCCESS	on success (or respective failure on error)
110  */
111 ibt_status_t
112 ibt_open_rc_channel(ibt_channel_hdl_t channel, ibt_chan_open_flags_t flags,
113     ibt_execution_mode_t mode, ibt_chan_open_args_t *chan_args,
114     ibt_rc_returns_t *ret_args)
115 {
116 	/* all fields that are related to REQ MAD formation */
117 
118 	ib_pkey_t		prim_pkey;
119 	ib_lid_t		primary_slid, alternate_slid;
120 	ib_qpn_t		local_qpn = 0;
121 	ib_guid_t		hca_guid;
122 	ib_qkey_t		local_qkey = 0;
123 	ib_eecn_t		local_eecn = 0;
124 	ib_eecn_t		remote_eecn = 0;
125 	boolean_t		primary_grh;
126 	boolean_t		alternate_grh = B_FALSE;
127 	ib_lid_t		base_lid;
128 	ib_com_id_t		local_comid;
129 	ibmf_msg_t		*ibmf_msg, *ibmf_msg_dreq;
130 	ibcm_req_msg_t		*req_msgp;
131 
132 	uint8_t			rdma_in, rdma_out;
133 	uint8_t			cm_retries;
134 	uint64_t		local_cm_proc_time;	/* In usec */
135 	uint8_t			local_cm_resp_time;	/* IB time */
136 	uint64_t		remote_cm_resp_time;	/* In usec */
137 	uint32_t		starting_psn = 0;
138 
139 	/* CM path related fields */
140 	ibmf_handle_t		ibmf_hdl;
141 	ibcm_qp_list_t		*cm_qp_entry;
142 	ibcm_mad_addr_t		cm_reply_addr;
143 
144 	uint8_t			cm_pkt_lt;
145 
146 	/* Local args for ibtl/internal CM functions called within */
147 	ibt_status_t		status;
148 	ibcm_status_t		lkup_status;
149 	ibt_qp_query_attr_t	qp_query_attr;
150 
151 	/* Other misc local args */
152 	ibt_priv_data_len_t	len;
153 	ibcm_hca_info_t		*hcap;
154 	ibcm_state_data_t	*statep;
155 	uint8_t			port_no;
156 
157 	IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel(chan %p, %X, %x, %p, %p)",
158 	    channel, flags, mode, chan_args, ret_args);
159 
160 	if (IBCM_INVALID_CHANNEL(channel)) {
161 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: invalid channel");
162 		return (IBT_CHAN_HDL_INVALID);
163 	}
164 
165 	/* cm handler should always be specified */
166 	if (chan_args->oc_cm_handler == NULL) {
167 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
168 		    "CM handler is not be specified", channel);
169 		return (IBT_INVALID_PARAM);
170 	}
171 
172 	if (mode == IBT_NONBLOCKING) {
173 		if (ret_args != NULL) {
174 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
175 			    " ret_args should be NULL when called in "
176 			    "non-blocking mode", channel);
177 			return (IBT_INVALID_PARAM);
178 		}
179 	} else if (mode == IBT_BLOCKING) {
180 		if (ret_args == NULL) {
181 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
182 			    " ret_args should be Non-NULL when called in "
183 			    "blocking mode", channel);
184 			return (IBT_INVALID_PARAM);
185 		}
186 		if (ret_args->rc_priv_data_len > IBT_REP_PRIV_DATA_SZ) {
187 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
188 			    " private data length is too large", channel);
189 			return (IBT_INVALID_PARAM);
190 		}
191 		if ((ret_args->rc_priv_data_len > 0) &&
192 		    (ret_args->rc_priv_data == NULL)) {
193 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
194 			    " rc_priv_data_len > 0, but rc_priv_data NULL",
195 			    channel);
196 			return (IBT_INVALID_PARAM);
197 		}
198 	} else { /* any other mode is not valid for ibt_open_rc_channel */
199 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
200 		    "invalid mode %x specified", channel, mode);
201 		return (IBT_INVALID_PARAM);
202 	}
203 
204 	/*
205 	 * XXX: no support yet for ibt_chan_open_flags_t - IBT_OCHAN_DUP
206 	 */
207 	if (flags & IBT_OCHAN_DUP) {
208 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
209 		    "Unsupported Flags specified: 0x%X", channel, flags);
210 		return (IBT_INVALID_PARAM);
211 	}
212 
213 	if ((flags & IBT_OCHAN_REDIRECTED) &&
214 	    (flags & IBT_OCHAN_PORT_REDIRECTED)) {
215 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
216 		    "Illegal to specify IBT_OCHAN_REDIRECTED and "
217 		    "IBT_OCHAN_PORT_REDIRECTED flags together", channel);
218 		return (IBT_INVALID_PARAM);
219 	}
220 
221 	if (((flags & IBT_OCHAN_REDIRECTED) &&
222 	    (chan_args->oc_cm_redirect_info == NULL)) ||
223 	    ((flags & IBT_OCHAN_PORT_REDIRECTED) &&
224 	    (chan_args->oc_cm_cep_path == NULL))) {
225 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
226 		    "Redirect flag specified, but respective arg is NULL",
227 		    channel);
228 		return (IBT_INVALID_PARAM);
229 	}
230 
231 	if ((flags & IBT_OCHAN_REDIRECTED) &&
232 	    (chan_args->oc_cm_redirect_info->rdi_dlid == 0) &&
233 	    (chan_args->oc_cm_redirect_info->rdi_gid.gid_guid == 0)) {
234 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
235 		    "Either rdi_dlid or rdi_gid must be specified for"
236 		    " IBT_OCHAN_REDIRECTED", channel);
237 		return (IBT_INVALID_PARAM);
238 	}
239 
240 	/* primary dlid and hca_port_num should never be zero */
241 	port_no = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;
242 
243 	if ((IBCM_PRIM_ADDS_VECT(chan_args).av_dlid == 0) && (port_no == 0)) {
244 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
245 		    "Primary Path's information is not valid", channel);
246 		return (IBT_INVALID_PARAM);
247 	}
248 
249 	/* validate SID */
250 	if (chan_args->oc_path->pi_sid == 0) {
251 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
252 		    "ERROR: Service ID in path information is 0", channel);
253 		return (IBT_INVALID_PARAM);
254 	}
255 
256 	/* validate rnr_retry_cnt (enum has more than 3 bits) */
257 	if ((uint_t)chan_args->oc_path_rnr_retry_cnt > IBT_RNR_INFINITE_RETRY) {
258 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
259 		    "ERROR: oc_path_rnr_retry_cnt(%d) is out of range",
260 		    channel, chan_args->oc_path_rnr_retry_cnt);
261 		return (IBT_INVALID_PARAM);
262 	}
263 
264 	/*
265 	 * Ensure that client is not re-using a QP that is still associated
266 	 * with a statep
267 	 */
268 	IBCM_GET_CHAN_PRIVATE(channel, statep);
269 	if (statep != NULL) {
270 		IBCM_RELEASE_CHAN_PRIVATE(channel);
271 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
272 		    "Channel being re-used on active side", channel);
273 		return (IBT_CHAN_IN_USE);
274 	}
275 
276 	/* Get GUID from Channel */
277 	hca_guid = ibt_channel_to_hca_guid(channel);
278 
279 	/* validate QP's hca guid with that from primary path  */
280 	if (hca_guid != chan_args->oc_path->pi_hca_guid) {
281 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
282 		    "GUID from Channel and primary path don't match", channel);
283 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
284 		    "Channel GUID %llX primary path GUID %llX", channel,
285 		    hca_guid, chan_args->oc_path->pi_hca_guid);
286 		return (IBT_CHAN_HDL_INVALID);
287 	}
288 
289 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
290 	    "Local HCA GUID %llX", channel, hca_guid);
291 
292 	status = ibt_query_qp(channel, &qp_query_attr);
293 	if (status != IBT_SUCCESS) {
294 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
295 		    "ibt_query_qp failed %d", channel, status);
296 		return (status);
297 	}
298 
299 	/* If client specified "no port change on QP" */
300 	if ((qp_query_attr.qp_info.qp_transport.rc.rc_path.cep_hca_port_num !=
301 	    port_no) && (flags & IBT_OCHAN_PORT_FIXED)) {
302 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
303 		    "chan port %d and path port %d does not match", channel,
304 		    qp_query_attr.qp_info.qp_transport.rc.rc_path. \
305 		    cep_hca_port_num, port_no);
306 		return (IBT_INVALID_PARAM);
307 	}
308 
309 	if (qp_query_attr.qp_info.qp_trans != IBT_RC_SRV) {
310 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
311 		    "Invalid Channel type: Applicable only to RC Channel",
312 		    channel);
313 		return (IBT_CHAN_SRV_TYPE_INVALID);
314 	}
315 
316 	/* Check if QP is in INIT state or not */
317 	if (qp_query_attr.qp_info.qp_state != IBT_STATE_INIT) {
318 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
319 		    "QP is not in INIT state %x", channel,
320 		    qp_query_attr.qp_info.qp_state);
321 		return (IBT_CHAN_STATE_INVALID);
322 	}
323 
324 	local_qpn = qp_query_attr.qp_qpn;
325 
326 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p Active QPN 0x%x",
327 	    channel, local_qpn);
328 
329 #ifdef	NO_EEC_SUPPORT_YET
330 
331 	if (flags & IBT_OCHAN_RDC_EXISTS) {
332 		ibt_eec_query_attr_t	eec_query_attr;
333 
334 		local_qkey = qp_query_attr.qp_info.qp_transport.rd_qkey;
335 
336 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: RD");
337 
338 		status = ibt_query_eec(channel, &eec_query_attr);
339 		if (status != IBT_SUCCESS) {
340 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
341 			    " ibt_query_eec failed %d", channel, status);
342 			return (status);
343 		}
344 		local_eecn = eec_query_attr.eec_eecn;
345 	}
346 
347 #endif
348 
349 	/* If no HCA found return failure */
350 	if ((hcap = ibcm_find_hca_entry(hca_guid)) == NULL) {
351 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
352 		    "hcap is NULL. Probably hca is not in active state",
353 		    channel);
354 		return (IBT_CHAN_HDL_INVALID);
355 	}
356 
357 	rdma_out = chan_args->oc_rdma_ra_out;
358 	rdma_in = chan_args->oc_rdma_ra_in;
359 
360 	if ((rdma_in > hcap->hca_max_rdma_in_qp) ||
361 	    (rdma_out > hcap->hca_max_rdma_out_qp)) {
362 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
363 		    "rdma in %d/out %d values exceed hca limits", channel,
364 		    rdma_in, rdma_out);
365 		ibcm_dec_hca_acc_cnt(hcap);
366 		return (IBT_INVALID_PARAM);
367 	}
368 
369 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
370 	    "rdma_in %d rdma_out %d", channel, rdma_in, rdma_out);
371 
372 	if (chan_args->oc_path->pi_prim_pkt_lt > ibcm_max_ib_pkt_lt) {
373 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
374 		    "Huge Primary Pkt lt %d", channel,
375 		    chan_args->oc_path->pi_prim_pkt_lt);
376 		ibcm_dec_hca_acc_cnt(hcap);
377 		return (IBT_PATH_PKT_LT_TOO_HIGH);
378 	}
379 
380 	status = ibt_get_port_state_byguid(hcap->hca_guid, port_no,
381 	    NULL, &base_lid);
382 	if (status != IBT_SUCCESS) {
383 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
384 		    "primary port_num %d not active", channel, port_no);
385 		ibcm_dec_hca_acc_cnt(hcap);
386 		return (status);
387 	}
388 
389 	/* Validate P_KEY Index */
390 	status = ibt_index2pkey_byguid(hcap->hca_guid, port_no,
391 	    IBCM_PRIM_CEP_PATH(chan_args).cep_pkey_ix, &prim_pkey);
392 	if (status != IBT_SUCCESS) {
393 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
394 		    "Invalid Primary PKeyIx %x", channel,
395 		    IBCM_PRIM_CEP_PATH(chan_args).cep_pkey_ix);
396 		ibcm_dec_hca_acc_cnt(hcap);
397 		return (status);
398 	}
399 
400 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
401 	    "primary_port_num %d primary_pkey 0x%x", channel, port_no,
402 	    prim_pkey);
403 
404 	if ((hcap->hca_port_info[port_no - 1].port_ibmf_hdl == NULL) &&
405 	    ((status = ibcm_hca_reinit_port(hcap, port_no - 1))
406 	    != IBT_SUCCESS)) {
407 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
408 		    "ibmf reg or callback setup failed during re-initialize",
409 		    channel);
410 		ibcm_dec_hca_acc_cnt(hcap);
411 		return (status);
412 	}
413 
414 	ibmf_hdl = hcap->hca_port_info[port_no - 1].port_ibmf_hdl;
415 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
416 	    "primary ibmf_hdl = 0x%p", channel, ibmf_hdl);
417 
418 
419 	primary_slid = base_lid + IBCM_PRIM_ADDS_VECT(chan_args).av_src_path;
420 
421 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: channel 0x%p "
422 	    "primary SLID = %x", channel, primary_slid);
423 
424 	/* check first if alternate path exists or not as it is OPTIONAL */
425 	if (IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num != 0) {
426 		uint8_t	alt_port_no;
427 
428 		alt_port_no = IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num;
429 
430 		if (chan_args->oc_path->pi_alt_pkt_lt > ibcm_max_ib_pkt_lt) {
431 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
432 			    "Huge Alt Pkt lt %d", channel,
433 			    chan_args->oc_path->pi_alt_pkt_lt);
434 			ibcm_dec_hca_acc_cnt(hcap);
435 			return (IBT_PATH_PKT_LT_TOO_HIGH);
436 		}
437 
438 		if (port_no != alt_port_no) {
439 
440 			status = ibt_get_port_state_byguid(hcap->hca_guid,
441 			    alt_port_no, NULL, &base_lid);
442 			if (status != IBT_SUCCESS) {
443 
444 				IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
445 				    "chan 0x%p alt_port_num %d inactive %d",
446 				    channel, alt_port_no, status);
447 				ibcm_dec_hca_acc_cnt(hcap);
448 				return (status);
449 			}
450 
451 		}
452 		alternate_slid =
453 		    base_lid + IBCM_ALT_ADDS_VECT(chan_args).av_src_path;
454 
455 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan %0xp "
456 		    "alternate SLID = %x", channel, alternate_slid);
457 	}
458 
459 	/*
460 	 * only pkey needs to be zero'ed, because all other fields are set in
461 	 * in ibcm_init_reply_addr. But, let's bzero the complete struct for
462 	 * any future modifications.
463 	 */
464 	bzero(&cm_reply_addr, sizeof (cm_reply_addr));
465 
466 	/* Initialize the MAD destination address in stored_reply_addr */
467 	if ((status = ibcm_init_reply_addr(hcap, &cm_reply_addr, chan_args,
468 	    flags, &cm_pkt_lt, primary_slid)) != IBT_SUCCESS) {
469 
470 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
471 		    "ibcm_init_reply_addr failed status %d ", channel, status);
472 		ibcm_dec_hca_acc_cnt(hcap);
473 		return (status);
474 	}
475 
476 
477 	/* Initialize the pkey for CM MAD communication */
478 	if (cm_reply_addr.rcvd_addr.ia_p_key == 0)
479 		cm_reply_addr.rcvd_addr.ia_p_key = prim_pkey;
480 
481 #ifdef DEBUG
482 	ibcm_print_reply_addr(channel, &cm_reply_addr);
483 #endif
484 
485 	/* Retrieve an ibmf qp for sending CM MADs */
486 	if ((cm_qp_entry = ibcm_find_qp(hcap, port_no,
487 	    cm_reply_addr.rcvd_addr.ia_p_key)) == NULL) {
488 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
489 		    "unable to allocate ibmf qp for CM MADs", channel);
490 		ibcm_dec_hca_acc_cnt(hcap);
491 		return (IBT_INSUFF_RESOURCE);
492 	}
493 
494 
495 	if (ibcm_alloc_comid(hcap, &local_comid) != IBCM_SUCCESS) {
496 		ibcm_release_qp(cm_qp_entry);
497 		ibcm_dec_hca_acc_cnt(hcap);
498 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
499 		    " Unable to allocate comid", channel);
500 		return (IBT_INSUFF_KERNEL_RESOURCE);
501 	}
502 
503 	/* allocate an IBMF mad buffer (REQ) */
504 	if ((status = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg,
505 	    MAD_METHOD_SEND)) != IBT_SUCCESS) {
506 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
507 		    "chan 0x%p ibcm_alloc_out_msg failed", channel);
508 		ibcm_release_qp(cm_qp_entry);
509 		ibcm_free_comid(hcap, local_comid);
510 		ibcm_dec_hca_acc_cnt(hcap);
511 		return (status);
512 	}
513 
514 	/* allocate an IBMF mad buffer (DREQ) */
515 	if ((status = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg_dreq,
516 	    MAD_METHOD_SEND)) != IBT_SUCCESS) {
517 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
518 		    "chan 0x%p ibcm_alloc_out_msg failed", channel);
519 		(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg);
520 		ibcm_release_qp(cm_qp_entry);
521 		ibcm_free_comid(hcap, local_comid);
522 		ibcm_dec_hca_acc_cnt(hcap);
523 		return (status);
524 	}
525 
526 	/* Init to Init, if QP's port does not match with path information */
527 	if (qp_query_attr.qp_info.qp_transport.rc.rc_path.cep_hca_port_num !=
528 	    IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num) {
529 
530 		ibt_qp_info_t		qp_info;
531 		ibt_cep_modify_flags_t	cep_flags;
532 
533 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
534 		    "chan 0x%p chan port %d", channel,
535 		    qp_query_attr.qp_info.qp_transport.rc.rc_path.\
536 		    cep_hca_port_num);
537 
538 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
539 		    "chan 0x%p path port %d", channel, port_no);
540 
541 		bzero(&qp_info, sizeof (qp_info));
542 		/* For now, set it to RC type */
543 
544 		qp_info.qp_trans = IBT_RC_SRV;
545 		qp_info.qp_state = IBT_STATE_INIT;
546 		qp_info.qp_transport.rc.rc_path.cep_hca_port_num = port_no;
547 
548 		cep_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_PORT;
549 
550 		status = ibt_modify_qp(channel, cep_flags, &qp_info, NULL);
551 
552 		if (status != IBT_SUCCESS) {
553 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
554 			    "chan 0x%p ibt_modify_qp() = %d", channel, status);
555 			ibcm_release_qp(cm_qp_entry);
556 			ibcm_free_comid(hcap, local_comid);
557 			ibcm_dec_hca_acc_cnt(hcap);
558 			(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg);
559 			(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg_dreq);
560 			return (status);
561 		} else
562 			IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
563 			    "chan 0x%p ibt_modify_qp() = %d", channel, status);
564 	}
565 
566 	/* allocate ibcm_state_data_t before grabbing the WRITER lock */
567 	statep = kmem_zalloc(sizeof (ibcm_state_data_t), KM_SLEEP);
568 	rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
569 	lkup_status = ibcm_lookup_msg(IBCM_OUTGOING_REQ, local_comid, 0, 0,
570 	    hcap, &statep);
571 	rw_exit(&hcap->hca_state_rwlock);
572 
573 	/* CM should be seeing this for the first time */
574 	ASSERT(lkup_status == IBCM_LOOKUP_NEW);
575 
576 	/* Increment the hca's resource count */
577 	ibcm_inc_hca_res_cnt(hcap);
578 
579 	/* Once a resource created on hca, no need to hold the acc cnt */
580 	ibcm_dec_hca_acc_cnt(hcap);
581 
582 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
583 
584 	statep->timerid = 0;
585 	statep->local_hca_guid = hca_guid;
586 	statep->local_qpn = local_qpn;
587 	statep->stored_reply_addr.cm_qp_entry = cm_qp_entry;
588 	statep->prim_port = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;
589 	statep->alt_port = IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num;
590 
591 
592 	/* Save "statep" as channel's CM private data.  */
593 	statep->channel = channel;
594 	IBCM_SET_CHAN_PRIVATE(statep->channel, statep);
595 
596 	statep->stored_msg = ibmf_msg;
597 	statep->dreq_msg = ibmf_msg_dreq;
598 
599 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*req_msgp))
600 
601 	/* Start filling in the REQ MAD */
602 	req_msgp = (ibcm_req_msg_t *)IBCM_OUT_MSGP(statep->stored_msg);
603 	req_msgp->req_local_comm_id = h2b32(local_comid);
604 	req_msgp->req_svc_id = h2b64(chan_args->oc_path->pi_sid);
605 	req_msgp->req_local_ca_guid = h2b64(hca_guid);
606 	req_msgp->req_local_qkey = h2b32(local_qkey);	/* for EEC/RD */
607 
608 	/* Bytes 32-35 are req_local_qpn and req_off_resp_resources */
609 	req_msgp->req_local_qpn_plus = h2b32(local_qpn << 8 | rdma_in);
610 
611 	/* Bytes 36-39 are req_local_eec_no and req_off_initiator_depth */
612 	req_msgp->req_local_eec_no_plus = h2b32(local_eecn << 8 | rdma_out);
613 
614 	if (flags & IBT_OCHAN_REMOTE_CM_TM)
615 		remote_cm_resp_time = chan_args->oc_remote_cm_time;
616 	else
617 		remote_cm_resp_time = ibcm_remote_response_time;
618 
619 	/*
620 	 * Bytes 40-43 - remote_eecn, remote_cm_resp_time, tran_type,
621 	 * IBT_CM_FLOW_CONTROL is always set by default.
622 	 */
623 	req_msgp->req_remote_eecn_plus = h2b32(
624 	    remote_eecn << 8 | (ibt_usec2ib(remote_cm_resp_time) & 0x1f) << 3 |
625 	    IBT_RC_SRV << 1 | IBT_CM_FLOW_CONTROL);
626 
627 	if (flags & IBT_OCHAN_LOCAL_CM_TM)
628 		local_cm_proc_time = chan_args->oc_local_cm_time;
629 	else
630 		local_cm_proc_time = ibcm_local_processing_time;
631 
632 	local_cm_resp_time = ibt_usec2ib(local_cm_proc_time +
633 	    2 * ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt) +
634 	    ibcm_sw_delay);
635 
636 	/* save retry count */
637 	statep->cep_retry_cnt = chan_args->oc_path_retry_cnt;
638 
639 	if (flags & IBT_OCHAN_STARTING_PSN)
640 		starting_psn = chan_args->oc_starting_psn;
641 
642 	if (local_cm_resp_time > 0x1f)
643 		local_cm_resp_time = 0x1f;
644 
645 	/* Bytes 44-47 are req_starting_psn, local_cm_resp_time and retry_cnt */
646 	req_msgp->req_starting_psn_plus = h2b32(starting_psn << 8 |
647 	    local_cm_resp_time << 3 | statep->cep_retry_cnt);
648 
649 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
650 	    "Prim Pkt lt (IB time) 0x%x", channel,
651 	    chan_args->oc_path->pi_prim_pkt_lt);
652 
653 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
654 	    "local_cm_proc_time(usec) %d ", channel, local_cm_proc_time);
655 
656 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
657 	    "local_cm_resp_time(ib_time) %d", channel, local_cm_resp_time);
658 
659 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
660 	    "remote_cm_resp_time (usec) %d", channel, remote_cm_resp_time);
661 
662 	statep->starting_psn = starting_psn;
663 
664 	/* Pkey - bytes 48-49 */
665 	req_msgp->req_part_key = h2b16(prim_pkey);
666 
667 	if (flags & IBT_OCHAN_CM_RETRY)
668 		cm_retries = chan_args->oc_cm_retry_cnt;
669 	else
670 		cm_retries = ibcm_max_retries;
671 
672 	statep->max_cm_retries = statep->remaining_retry_cnt = cm_retries;
673 	req_msgp->req_max_cm_retries_plus = statep->max_cm_retries << 4;
674 
675 	/*
676 	 * Check whether SRQ is associated with this Channel, if yes, then
677 	 * set the SRQ Exists bit in the REQ.
678 	 */
679 	if (qp_query_attr.qp_srq != NULL) {
680 		req_msgp->req_max_cm_retries_plus |= (1 << 3);
681 	}
682 
683 	/*
684 	 * By default on Tavor, we override the PathMTU to 1K.
685 	 * To turn this off, set ibcm_override_path_mtu = 0.
686 	 */
687 	if (ibcm_override_path_mtu && IBCM_IS_HCA_TAVOR(hcap) &&
688 	    (chan_args->oc_path->pi_path_mtu > IB_MTU_1K)) {
689 		req_msgp->req_mtu_plus = IB_MTU_1K << 4 |
690 		    chan_args->oc_path_rnr_retry_cnt;
691 		IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p PathMTU"
692 		    " overidden to IB_MTU_1K(%d) from %d", channel, IB_MTU_1K,
693 		    chan_args->oc_path->pi_path_mtu);
694 	} else
695 		req_msgp->req_mtu_plus = chan_args->oc_path->pi_path_mtu << 4 |
696 		    chan_args->oc_path_rnr_retry_cnt;
697 
698 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p CM retry cnt %d"
699 	    " staring PSN %x", channel, cm_retries, starting_psn);
700 
701 
702 #ifdef	NO_EEC_SUPPORT_YET
703 	if (flags & IBT_OCHAN_RDC_EXISTS)
704 		req_msgp->req_mtu_plus |= 8;
705 #endif
706 
707 	/* Initialize the "primary" port stuff next - bytes 52-95 */
708 	req_msgp->req_primary_l_port_lid = h2b16(primary_slid);
709 	req_msgp->req_primary_r_port_lid =
710 	    h2b16(IBCM_PRIM_ADDS_VECT(chan_args).av_dlid);
711 	req_msgp->req_primary_l_port_gid.gid_prefix =
712 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_sgid.gid_prefix);
713 	req_msgp->req_primary_l_port_gid.gid_guid =
714 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_sgid.gid_guid);
715 	req_msgp->req_primary_r_port_gid.gid_prefix =
716 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_prefix);
717 	req_msgp->req_primary_r_port_gid.gid_guid =
718 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid);
719 	primary_grh = IBCM_PRIM_ADDS_VECT(chan_args).av_send_grh;
720 
721 	statep->remote_hca_guid = /* not correct, but helpful for debugging */
722 	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid;
723 
724 	/* Bytes 88-91 - primary_flowlbl, and primary_srate */
725 	req_msgp->req_primary_flow_label_plus =
726 	    h2b32(((primary_grh == B_TRUE) ?
727 	    (IBCM_PRIM_ADDS_VECT(chan_args).av_flow << 12) : 0) |
728 	    IBCM_PRIM_ADDS_VECT(chan_args).av_srate);
729 	req_msgp->req_primary_traffic_class = (primary_grh == B_TRUE) ?
730 	    IBCM_PRIM_ADDS_VECT(chan_args).av_tclass : 0;
731 	req_msgp->req_primary_hop_limit = (primary_grh == B_TRUE) ?
732 	    IBCM_PRIM_ADDS_VECT(chan_args).av_hop : 0xff;
733 	req_msgp->req_primary_sl_plus =
734 	    IBCM_PRIM_ADDS_VECT(chan_args).av_srvl << 4 |
735 	    ((primary_grh == B_TRUE) ? 0 : 8);
736 
737 	req_msgp->req_primary_localtime_plus =
738 	    ibt_usec2ib((2 * ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt)) +
739 	    ibt_ib2usec(hcap->hca_ack_delay)) << 3;
740 
741 	IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan %p statep %p",
742 	    channel, statep);
743 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
744 	    "active hca_ack_delay (usec) %d", channel,
745 	    req_msgp->req_primary_localtime_plus);
746 
747 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
748 	    "Sent primary cep timeout (IB Time) %d", channel,
749 	    hcap->hca_ack_delay);
750 
751 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p prim_dlid %x ",
752 	    channel, IBCM_PRIM_ADDS_VECT(chan_args).av_dlid);
753 
754 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
755 	    "prim GID %llX:%llX", channel,
756 	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_prefix,
757 	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid);
758 
759 	/* Initialize the "alternate" port stuff - optional */
760 	if (chan_args->oc_path->pi_alt_cep_path.cep_hca_port_num != 0) {
761 		ib_gid_t	tmp_gid;
762 
763 		req_msgp->req_alt_l_port_lid = h2b16(alternate_slid);
764 		req_msgp->req_alt_r_port_lid =
765 		    h2b16(IBCM_ALT_ADDS_VECT(chan_args).av_dlid);
766 		/*
767 		 * doing all this as req_alt_r/l_port_gid is at offset
768 		 * 100, 116 which is not divisible by 8
769 		 */
770 
771 		tmp_gid.gid_prefix =
772 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_prefix);
773 		tmp_gid.gid_guid =
774 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_guid);
775 		bcopy(&tmp_gid, &req_msgp->req_alt_r_port_gid[0],
776 		    sizeof (ib_gid_t));
777 		tmp_gid.gid_prefix =
778 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_sgid.gid_prefix);
779 		tmp_gid.gid_guid =
780 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_sgid.gid_guid);
781 
782 		bcopy(&tmp_gid, &req_msgp->req_alt_l_port_gid[0],
783 		    sizeof (ib_gid_t));
784 		alternate_grh = IBCM_ALT_ADDS_VECT(chan_args).av_send_grh;
785 
786 		/* Bytes 132-135 - alternate_flow_label, and alternate srate */
787 		req_msgp->req_alt_flow_label_plus = h2b32(
788 		    (((alternate_grh == B_TRUE) ?
789 		    (IBCM_ALT_ADDS_VECT(chan_args).av_flow << 12) : 0) |
790 		    IBCM_ALT_ADDS_VECT(chan_args).av_srate));
791 		req_msgp->req_alt_traffic_class = (alternate_grh == B_TRUE) ?
792 		    IBCM_ALT_ADDS_VECT(chan_args).av_tclass : 0;
793 		req_msgp->req_alt_hop_limit = (alternate_grh == B_TRUE) ?
794 		    IBCM_ALT_ADDS_VECT(chan_args).av_hop : 0xff;
795 		req_msgp->req_alt_sl_plus =
796 		    IBCM_ALT_ADDS_VECT(chan_args).av_srvl << 4 |
797 		    ((alternate_grh == B_TRUE) ? 0 : 8);
798 		req_msgp->req_alt_localtime_plus = ibt_usec2ib((2 *
799 		    ibt_ib2usec(chan_args->oc_path->pi_alt_pkt_lt)) +
800 		    ibt_ib2usec(hcap->hca_ack_delay)) << 3;
801 
802 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
803 		    "alt_dlid %x ", channel,
804 		    IBCM_ALT_ADDS_VECT(chan_args).av_dlid);
805 
806 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
807 		    "alt GID %llX:%llX", channel,
808 		    IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_prefix,
809 		    IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_guid);
810 	}
811 
812 	len = min(chan_args->oc_priv_data_len, IBT_REQ_PRIV_DATA_SZ);
813 	if ((len > 0) && chan_args->oc_priv_data)
814 		bcopy(chan_args->oc_priv_data, req_msgp->req_private_data, len);
815 
816 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*req_msgp))
817 
818 	/* return_data is filled up in the state machine code */
819 	if (ret_args != NULL) {
820 		statep->open_return_data = ret_args;
821 	}
822 
823 	/* initialize some statep fields here */
824 	statep->mode = IBCM_ACTIVE_MODE;
825 	statep->hcap = hcap;
826 
827 	statep->cm_handler = chan_args->oc_cm_handler;
828 	statep->state_cm_private = chan_args->oc_cm_clnt_private;
829 
830 	statep->pkt_life_time =
831 	    ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt);
832 
833 	statep->timer_value = ibt_ib2usec(ibt_usec2ib(
834 	    2 * ibt_ib2usec(cm_pkt_lt) + remote_cm_resp_time));
835 
836 	/* Initialize statep->stored_reply_addr */
837 	statep->stored_reply_addr.ibmf_hdl = ibmf_hdl;
838 
839 	/* Initialize stored reply addr fields */
840 	statep->stored_reply_addr.grh_hdr = cm_reply_addr.grh_hdr;
841 	statep->stored_reply_addr.rcvd_addr = cm_reply_addr.rcvd_addr;
842 	statep->stored_reply_addr.grh_exists = cm_reply_addr.grh_exists;
843 	statep->stored_reply_addr.port_num = cm_reply_addr.port_num;
844 
845 	/*
846 	 * The IPD on local/active side is calculated by path functions,
847 	 * hence available in the args of ibt_open_rc_channel
848 	 */
849 	statep->local_srate = IBCM_PRIM_ADDS_VECT(chan_args).av_srate;
850 	statep->local_alt_srate = IBCM_ALT_ADDS_VECT(chan_args).av_srate;
851 
852 	/* Store the source path bits for primary and alt paths */
853 	statep->prim_src_path_bits = IBCM_PRIM_ADDS_VECT(chan_args).av_src_path;
854 	statep->alt_src_path_bits = IBCM_ALT_ADDS_VECT(chan_args).av_src_path;
855 
856 	statep->open_flow = 1;
857 	statep->open_done = B_FALSE;
858 	statep->state = statep->timer_stored_state = IBCM_STATE_REQ_SENT;
859 	IBCM_REF_CNT_INCR(statep);	/* Decremented before return */
860 	IBCM_REF_CNT_INCR(statep);	/* Decremented after REQ is posted */
861 	statep->send_mad_flags |= IBCM_REQ_POST_BUSY;
862 
863 	IBCM_OUT_HDRP(statep->stored_msg)->AttributeID =
864 	    h2b16(IBCM_INCOMING_REQ + IBCM_ATTR_BASE_ID);
865 
866 	IBCM_OUT_HDRP(statep->stored_msg)->TransactionID =
867 	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_REQ, statep->local_comid,
868 	    0));
869 
870 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*statep))
871 
872 	ibcm_open_enqueue(statep);
873 
874 	mutex_enter(&statep->state_mutex);
875 
876 	if (mode == IBT_BLOCKING) {
877 
878 		/* wait for REQ/REP/RTU */
879 		while (statep->open_done != B_TRUE) {
880 			cv_wait(&statep->block_client_cv, &statep->state_mutex);
881 		}
882 
883 		/*
884 		 * In the case that open_channel() fails because of a
885 		 * REJ or timeout, change retval to IBT_CM_FAILURE
886 		 */
887 		if (statep->open_return_data->rc_status != IBT_CM_ACCEPT)
888 			status = IBT_CM_FAILURE;
889 
890 		IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p "
891 		    "ret status %d cm status %d", channel, status,
892 		    statep->open_return_data->rc_status);
893 	}
894 
895 	/* decrement the ref-count before leaving here */
896 	IBCM_REF_CNT_DECR(statep);
897 
898 	mutex_exit(&statep->state_mutex);
899 
900 	IBTF_DPRINTF_L4(cmlog, "ibt_open_rc_channel: chan 0x%p done", channel);
901 	return (status);
902 }
903 
904 /*
905  * ibcm_init_reply_addr:
906  *
907  * The brief description of functionality below.
908  *
909  * For IBT_OCHAN_PORT_REDIRECTED (ie., port redirected case):
910  *	Build CM path from chan_args->oc_cm_cep_path
911  *	Set CM pkt lt (ie.,life time) to chan_args->oc_cm_pkt_lt
912  *
913  * For IBT_OCHAN_REDIRECTED (ie., port and CM redirected case):
914  *	If Redirect LID is specified,
915  *		If Redirect GID is not specified or specified to be on the same
916  *		    subnet, then
917  *			Build CM path from chan_args->oc_cm_redirect_info
918  *			Set CM pkt lt to subnet timeout
919  *		Else (ie., GID specified, but on a different subnet)
920  *			Do a path lookup to build CM Path and set CM pkt lt
921  *
922  */
923 static ibt_status_t
924 ibcm_init_reply_addr(ibcm_hca_info_t *hcap, ibcm_mad_addr_t *reply_addr,
925     ibt_chan_open_args_t *chan_args, ibt_chan_open_flags_t flags,
926     ib_time_t *cm_pkt_lt, ib_lid_t prim_slid)
927 {
928 	ibt_adds_vect_t	*cm_adds;
929 	ibt_path_info_t	path;
930 	boolean_t	cm_grh;
931 	ibt_status_t	status;
932 
933 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_reply_addr:");
934 
935 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*reply_addr))
936 
937 	/*
938 	 * sending side CM lid/gid/port num are not based on any redirect
939 	 * params. These values are set to primary RC path lid/gid/port num.
940 	 * In the future, these values can be set based on framework policy
941 	 * decisions ensuring reachability.
942 	 */
943 	reply_addr->grh_hdr.ig_sender_gid =
944 	    IBCM_PRIM_ADDS_VECT(chan_args).av_sgid;
945 	reply_addr->rcvd_addr.ia_local_lid = prim_slid;
946 	reply_addr->port_num = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;
947 
948 	if (flags & IBT_OCHAN_PORT_REDIRECTED) {
949 		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: "
950 		    "IBT_OCHAN_PORT_REDIRECTED specified");
951 
952 		status = ibt_index2pkey_byguid(hcap->hca_guid,
953 		    chan_args->oc_cm_cep_path->cep_hca_port_num,
954 		    chan_args->oc_cm_cep_path->cep_pkey_ix,
955 		    &reply_addr->rcvd_addr.ia_p_key);
956 
957 		if (status != IBT_SUCCESS) {
958 			IBTF_DPRINTF_L2(cmlog, "ibcm_init_rely_addr: Invalid "
959 			    "CM PKeyIx %x port_num %x",
960 			    chan_args->oc_cm_cep_path->cep_pkey_ix,
961 			    chan_args->oc_cm_cep_path->cep_hca_port_num);
962 			return (status);
963 		}
964 
965 		cm_adds = &(chan_args->oc_cm_cep_path->cep_adds_vect);
966 		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: dlid = %x",
967 		    cm_adds->av_dlid);
968 
969 		reply_addr->rcvd_addr.ia_q_key = IB_GSI_QKEY;
970 		reply_addr->rcvd_addr.ia_remote_qno = 1;
971 		*cm_pkt_lt = chan_args->oc_cm_pkt_lt;
972 
973 	} else if (flags & IBT_OCHAN_REDIRECTED) {
974 		ibt_redirect_info_t	*redirect_info;
975 		ibt_hca_portinfo_t	*port_infop;
976 		uint_t			psize, nports;
977 
978 		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: "
979 		    "IBT_OCHAN_REDIRECTED specified");
980 
981 		redirect_info = chan_args->oc_cm_redirect_info;
982 
983 		if ((redirect_info->rdi_gid.gid_prefix == 0) ||
984 		    (redirect_info->rdi_gid.gid_guid == 0)) {
985 			IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
986 			    "ERROR: Re-direct GID value NOT Provided.");
987 			return (IBT_INVALID_PARAM);
988 		}
989 
990 		/* As per spec definition 1.1, it's always IB_GSI_QKEY */
991 		reply_addr->rcvd_addr.ia_q_key = redirect_info->rdi_qkey;
992 		reply_addr->rcvd_addr.ia_remote_qno = redirect_info->rdi_qpn;
993 		reply_addr->rcvd_addr.ia_p_key = redirect_info->rdi_pkey;
994 
995 		/*
996 		 * if LID is non-zero in classportinfo then use classportinfo
997 		 * fields to form CM MAD destination address.
998 		 */
999 		if (redirect_info->rdi_dlid != 0) {
1000 			status = ibtl_cm_query_hca_ports_byguid(hcap->hca_guid,
1001 			    reply_addr->port_num, &port_infop, &nports, &psize);
1002 			if ((status != IBT_SUCCESS) || (nports == 0)) {
1003 				IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
1004 				    "Query Ports Failed: %d", status);
1005 				return (status);
1006 			} else if (port_infop->p_subnet_timeout >
1007 			    IBCM_MAX_IB_PKT_LT) {
1008 				IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
1009 				    "large subnet timeout %x port_no %x",
1010 				    port_infop->p_subnet_timeout,
1011 				    reply_addr->port_num);
1012 				ibt_free_portinfo(port_infop, psize);
1013 				return (IBT_PATH_PKT_LT_TOO_HIGH);
1014 			} else {
1015 				IBTF_DPRINTF_L3(cmlog, "ibcm_init_reply_addr: "
1016 				    "subnet timeout %x port_no %x",
1017 				    port_infop->p_subnet_timeout,
1018 				    reply_addr->port_num);
1019 
1020 				*cm_pkt_lt =
1021 				    ibt_ib2usec(min(ibcm_max_ib_mad_pkt_lt,
1022 				    port_infop->p_subnet_timeout));
1023 
1024 				ibt_free_portinfo(port_infop, psize);
1025 			}
1026 
1027 			reply_addr->rcvd_addr.ia_remote_lid =
1028 			    redirect_info->rdi_dlid;
1029 			reply_addr->rcvd_addr.ia_service_level =
1030 			    redirect_info->rdi_sl;
1031 			reply_addr->grh_exists = B_TRUE;
1032 			reply_addr->grh_hdr.ig_recver_gid =
1033 			    redirect_info->rdi_gid;
1034 			reply_addr->grh_hdr.ig_tclass =
1035 			    redirect_info->rdi_tclass;
1036 			reply_addr->grh_hdr.ig_flow_label =
1037 			    redirect_info->rdi_flow;
1038 
1039 			/* Classportinfo doesn't have hoplimit field */
1040 			reply_addr->grh_hdr.ig_hop_limit = 0xff;
1041 			return (IBT_SUCCESS);
1042 
1043 		} else {
1044 			ibt_path_attr_t	path_attr;
1045 			ib_gid_t	path_dgid[1];
1046 
1047 			/*
1048 			 * If GID is specified, and LID is zero in classportinfo
1049 			 * do a path lookup using specified GID, Pkey,
1050 			 * in classportinfo
1051 			 */
1052 
1053 			bzero(&path_attr, sizeof (path_attr));
1054 
1055 			path_attr.pa_dgids = &path_dgid[0];
1056 			path_attr.pa_dgids[0] = redirect_info->rdi_gid;
1057 
1058 			/*
1059 			 * use reply_addr below, as sender_gid in reply_addr
1060 			 * may have been set above based on some policy decision
1061 			 * for originating end point for CM MADs above
1062 			 */
1063 			path_attr.pa_sgid = reply_addr->grh_hdr.ig_sender_gid;
1064 			path_attr.pa_num_dgids = 1;
1065 			path_attr.pa_pkey = redirect_info->rdi_pkey;
1066 
1067 			if ((status = ibt_get_paths(ibcm_ibt_handle,
1068 			    IBT_PATH_PKEY, &path_attr, 1, &path, NULL)) !=
1069 			    IBT_SUCCESS)
1070 				return (status);
1071 
1072 			/* Initialize cm_adds */
1073 			cm_adds = &path.pi_prim_cep_path.cep_adds_vect;
1074 			*cm_pkt_lt = path.pi_prim_pkt_lt;
1075 		}
1076 
1077 	} else	{ /* cm_pkey initialized in ibt_open_rc_channel */
1078 		reply_addr->rcvd_addr.ia_q_key = IB_GSI_QKEY;
1079 		reply_addr->rcvd_addr.ia_remote_qno = 1;
1080 		*cm_pkt_lt = chan_args->oc_path->pi_prim_pkt_lt;
1081 		cm_adds = &(IBCM_PRIM_ADDS_VECT(chan_args));
1082 	}
1083 
1084 
1085 	cm_grh = cm_adds->av_send_grh;
1086 	reply_addr->grh_exists = cm_grh;
1087 
1088 	reply_addr->rcvd_addr.ia_remote_lid =
1089 	    cm_adds->av_dlid;
1090 	reply_addr->grh_hdr.ig_recver_gid =
1091 	    cm_adds->av_dgid;
1092 	reply_addr->grh_hdr.ig_flow_label =
1093 	    cm_adds->av_flow & IB_GRH_FLOW_LABEL_MASK;
1094 	reply_addr->grh_hdr.ig_tclass =
1095 	    (cm_grh == B_TRUE) ? cm_adds->av_tclass : 0;
1096 	reply_addr->grh_hdr.ig_hop_limit =
1097 	    (cm_grh == B_TRUE) ? cm_adds->av_hop : 0xff;
1098 	reply_addr->rcvd_addr.ia_service_level =
1099 	    cm_adds->av_srvl;
1100 
1101 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*reply_addr))
1102 
1103 	return (IBT_SUCCESS);
1104 }
1105 
1106 
1107 /*
1108  * ibt_prime_close_rc_channel()
1109  *	It allocates resources required for close channel operation, so
1110  *	ibt_close_rc_channel can be called from interrupt routine.
1111  *
1112  * INPUTS:
1113  *	channel			The address of an ibt_channel_t struct that
1114  *				specifies the channel to open.
1115  *
1116  * RETURN VALUES:
1117  *	IBT_SUCCESS	on success(or respective failure on error)
1118  *
1119  * Clients are typically expected to call this function in established state
1120  */
1121 ibt_status_t
1122 ibt_prime_close_rc_channel(ibt_channel_hdl_t channel)
1123 {
1124 	ibcm_state_data_t	*statep;
1125 	ibt_status_t		status = IBT_SUCCESS;
1126 
1127 	IBTF_DPRINTF_L3(cmlog, "ibt_prime_close_rc_channel(%p)", channel);
1128 
1129 	/* validate channel, first */
1130 	if (IBCM_INVALID_CHANNEL(channel)) {
1131 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1132 		    "invalid channel", channel);
1133 		return (IBT_CHAN_HDL_INVALID);
1134 	}
1135 
1136 	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
1137 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1138 		    "Invalid Channel type: Applicable only to RC Channel",
1139 		    channel);
1140 		return (IBT_CHAN_SRV_TYPE_INVALID);
1141 	}
1142 
1143 	/* get the statep */
1144 	IBCM_GET_CHAN_PRIVATE(channel, statep);
1145 
1146 	/*
1147 	 * This can happen, if the statep is already gone by a DREQ from
1148 	 * the remote side
1149 	 */
1150 
1151 	if (statep == NULL) {
1152 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1153 		    "statep NULL", channel);
1154 		return (IBT_SUCCESS);
1155 	}
1156 
1157 	mutex_enter(&statep->state_mutex);
1158 	IBCM_RELEASE_CHAN_PRIVATE(channel);
1159 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1160 		mutex_exit(&statep->state_mutex);
1161 		return (IBT_CHAN_STATE_INVALID);
1162 	}
1163 	IBCM_REF_CNT_INCR(statep);
1164 	IBTF_DPRINTF_L4(cmlog, "ibt_prime_close_rc_channel: chan 0x%p statep %p"
1165 	    " state %x", channel, statep, statep->state);
1166 	mutex_exit(&statep->state_mutex);
1167 
1168 	/* clients could pre-allocate dreq mad, even before connection est */
1169 	if (statep->dreq_msg == NULL)
1170 		status = ibcm_alloc_out_msg(statep->stored_reply_addr.ibmf_hdl,
1171 		    &statep->dreq_msg, MAD_METHOD_SEND);
1172 
1173 	mutex_enter(&statep->state_mutex);
1174 	IBCM_REF_CNT_DECR(statep);
1175 	mutex_exit(&statep->state_mutex);
1176 
1177 	if (status != IBT_SUCCESS) {
1178 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1179 		    "ibcm_alloc_out_msg failed ", channel);
1180 		return (status);
1181 	}
1182 
1183 	/* If this message isn't seen then ibt_prime_close_rc_channel failed */
1184 	IBTF_DPRINTF_L5(cmlog, "ibt_prime_close_rc_channel: chan 0x%p done",
1185 	    channel);
1186 
1187 	return (IBT_SUCCESS);
1188 }
1189 
1190 /*
1191  * ibt_close_rc_channel()
1192  *	It closes an established channel.
1193  *
1194  * RETURN VALUES:
1195  *	IBT_SUCCESS	on success(or respective failure on error)
1196  */
1197 ibt_status_t
1198 ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode,
1199     void *priv_data, ibt_priv_data_len_t priv_data_len, uint8_t *ret_status,
1200     void *ret_priv_data, ibt_priv_data_len_t *ret_priv_data_len_p)
1201 {
1202 	ibcm_state_data_t	*statep;
1203 
1204 	IBTF_DPRINTF_L3(cmlog, "ibt_close_rc_channel(%p, %x, %p, %d, %p)",
1205 	    channel, mode, priv_data, priv_data_len,
1206 	    (ret_priv_data_len_p == NULL) ? 0 : *ret_priv_data_len_p);
1207 
1208 	/* validate channel, first */
1209 	if (IBCM_INVALID_CHANNEL(channel)) {
1210 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1211 		    "invalid channel", channel);
1212 		return (IBT_CHAN_HDL_INVALID);
1213 	}
1214 
1215 	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
1216 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1217 		    "Invalid Channel type: Applicable only to RC Channel",
1218 		    channel);
1219 		return (IBT_CHAN_SRV_TYPE_INVALID);
1220 	}
1221 
1222 	if (mode == IBT_BLOCKING) {
1223 		/* valid only for BLOCKING MODE */
1224 		if ((ret_priv_data_len_p != NULL) &&
1225 		    (*ret_priv_data_len_p > IBT_DREP_PRIV_DATA_SZ)) {
1226 			IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p"
1227 			    " private data len %d is too large", channel,
1228 			    *ret_priv_data_len_p);
1229 			return (IBT_INVALID_PARAM);
1230 		}
1231 	} else if ((mode != IBT_NONBLOCKING) && (mode != IBT_NOCALLBACKS)) {
1232 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1233 		    "invalid mode %x specified", channel, mode);
1234 		return (IBT_INVALID_PARAM);
1235 	}
1236 
1237 	if (ibtl_cm_is_chan_closing(channel) ||
1238 	    ibtl_cm_is_chan_closed(channel)) {
1239 		if (ret_status)
1240 			*ret_status = IBT_CM_CLOSED_ALREADY;
1241 
1242 		/* No private data to return to the client */
1243 		if (ret_priv_data_len_p != NULL)
1244 			*ret_priv_data_len_p = 0;
1245 
1246 		IBTF_DPRINTF_L3(cmlog, "ibt_close_rc_channel: chan 0x%p "
1247 		    "already marked for closing", channel);
1248 
1249 		return (IBT_SUCCESS);
1250 	}
1251 
1252 	/* get the statep */
1253 	IBCM_GET_CHAN_PRIVATE(channel, statep);
1254 	if (statep == NULL) {
1255 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1256 		    "statep NULL", channel);
1257 		return (IBT_CHAN_STATE_INVALID);
1258 	}
1259 
1260 	mutex_enter(&statep->state_mutex);
1261 
1262 	if (statep->dreq_msg == NULL) {
1263 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1264 		    "Fatal Error: dreq_msg is NULL", channel);
1265 		IBCM_RELEASE_CHAN_PRIVATE(channel);
1266 		mutex_exit(&statep->state_mutex);
1267 		return (IBT_CHAN_STATE_INVALID);
1268 	}
1269 
1270 	if ((ret_priv_data == NULL) || (ret_priv_data_len_p == NULL)) {
1271 		statep->close_ret_priv_data = NULL;
1272 		statep->close_ret_priv_data_len = NULL;
1273 	} else {
1274 		statep->close_ret_priv_data = ret_priv_data;
1275 		statep->close_ret_priv_data_len = ret_priv_data_len_p;
1276 	}
1277 
1278 	priv_data_len = min(priv_data_len, IBT_DREQ_PRIV_DATA_SZ);
1279 	if ((priv_data != NULL) && (priv_data_len > 0)) {
1280 		bcopy(priv_data, ((ibcm_dreq_msg_t *)
1281 		    IBCM_OUT_MSGP(statep->dreq_msg))->dreq_private_data,
1282 		    priv_data_len);
1283 	}
1284 	statep->close_ret_status = ret_status;
1285 
1286 	IBCM_RELEASE_CHAN_PRIVATE(channel);
1287 	IBCM_REF_CNT_INCR(statep);
1288 
1289 	if (mode != IBT_NONBLOCKING) {
1290 		return (ibcm_close_rc_channel(channel, statep, mode));
1291 	}
1292 
1293 	/* IBT_NONBLOCKING */
1294 	ibcm_close_enqueue(statep);
1295 	mutex_exit(&statep->state_mutex);
1296 
1297 	return (IBT_SUCCESS);
1298 }
1299 
1300 void
1301 ibcm_close_start(ibcm_state_data_t *statep)
1302 {
1303 	mutex_enter(&statep->state_mutex);
1304 	(void) ibcm_close_rc_channel(statep->channel, statep, IBT_NONBLOCKING);
1305 }
1306 
1307 static
1308 ibt_status_t
1309 ibcm_close_rc_channel(ibt_channel_hdl_t channel, ibcm_state_data_t *statep,
1310     ibt_execution_mode_t mode)
1311 {
1312 	ibcm_hca_info_t		*hcap;
1313 
1314 	_NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&statep->state_mutex));
1315 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1316 
1317 	IBTF_DPRINTF_L3(cmlog, "ibcm_close_rc_channel: chan 0x%p statep %p",
1318 	    channel, statep);
1319 
1320 	hcap = statep->hcap;
1321 
1322 	/* HCA must have been in active state. If not, it's a client bug */
1323 	if (!IBCM_ACCESS_HCA_OK(hcap)) {
1324 		IBTF_DPRINTF_L2(cmlog, "ibcm_close_rc_channel: chan 0x%p "
1325 		    "hcap 0x%p not active", channel, hcap);
1326 		IBCM_REF_CNT_DECR(statep);
1327 		mutex_exit(&statep->state_mutex);
1328 		return (IBT_CHAN_HDL_INVALID);
1329 	}
1330 
1331 	if (statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) {
1332 		while (statep->cep_in_rts == IBCM_BLOCK)
1333 			cv_wait(&statep->block_mad_cv, &statep->state_mutex);
1334 	}
1335 
1336 	/* Do TRANSIENT_DREQ check after TRANSIENT_ESTABLISHED check */
1337 	while (statep->state == IBCM_STATE_TRANSIENT_DREQ_SENT)
1338 		cv_wait(&statep->block_mad_cv, &statep->state_mutex);
1339 
1340 	IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: chan 0x%p "
1341 	    "connection state is %x", channel, statep->state);
1342 
1343 	/* If state is in pre-established states, abort the connection est */
1344 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1345 		statep->cm_retries++;	/* ensure connection trace is dumped */
1346 
1347 		/* No DREP private data possible */
1348 		if (statep->close_ret_priv_data_len != NULL)
1349 			*statep->close_ret_priv_data_len = 0;
1350 
1351 		/*
1352 		 * If waiting for a response mad, then cancel the timer,
1353 		 * and delete the connection
1354 		 */
1355 		if (statep->state == IBCM_STATE_REQ_SENT ||
1356 		    statep->state == IBCM_STATE_REP_SENT ||
1357 		    statep->state == IBCM_STATE_REP_WAIT ||
1358 		    statep->state == IBCM_STATE_MRA_REP_RCVD) {
1359 			timeout_id_t		timer_val = statep->timerid;
1360 			ibcm_conn_state_t	old_state;
1361 
1362 			IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
1363 			    "chan 0x%p connection aborted in state %x", channel,
1364 			    statep->state);
1365 
1366 			old_state = statep->state;
1367 			statep->state = IBCM_STATE_DELETE;
1368 
1369 			if (mode == IBT_NONBLOCKING) {
1370 				if (taskq_dispatch(ibcm_taskq,
1371 				    ibcm_process_abort_via_taskq, statep,
1372 				    TQ_NOSLEEP) == 0) {
1373 
1374 					IBCM_REF_CNT_DECR(statep);
1375 					statep->state = old_state;
1376 					mutex_exit(&statep->state_mutex);
1377 					return (IBT_INSUFF_KERNEL_RESOURCE);
1378 				}	/* if taskq_dispatch succeeds */
1379 				/* Cancel the timer */
1380 				statep->timerid = 0;
1381 				mutex_exit(&statep->state_mutex);
1382 			} else {
1383 				/* Cancel the timer */
1384 				statep->timerid = 0;
1385 				mutex_exit(&statep->state_mutex);
1386 				(void) taskq_dispatch(ibcm_taskq,
1387 				    ibcm_process_abort_via_taskq, statep,
1388 				    TQ_SLEEP);
1389 			}
1390 
1391 			/* cancel the currently running timer */
1392 			if (timer_val != 0)
1393 				(void) untimeout(timer_val);
1394 
1395 			/* wait until cm handler returns for BLOCKING cases */
1396 			mutex_enter(&statep->state_mutex);
1397 			if ((mode == IBT_BLOCKING) ||
1398 			    (mode == IBT_NOCALLBACKS)) {
1399 				while (statep->close_done != B_TRUE)
1400 					cv_wait(&statep->block_client_cv,
1401 					    &statep->state_mutex);
1402 			}
1403 
1404 			if (statep->close_ret_status)
1405 				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
1406 			mutex_exit(&statep->state_mutex);
1407 
1408 			/*
1409 			 * It would ideal to post a REJ MAD, but that would
1410 			 * be non-conformance to spec. Hence, delete the state
1411 			 * data. Assuming that happens quickly, any retransmits
1412 			 * from the remote are replied by CM with reject
1413 			 * reason " no valid com id". That would stop remote
1414 			 * sending any more MADs.
1415 			 */
1416 			ibcm_delete_state_data(statep);
1417 			return (IBT_SUCCESS);
1418 
1419 		/* if CM busy in cm handler, wait until cm handler returns */
1420 		} else if (statep->state == IBCM_STATE_REQ_RCVD ||
1421 		    statep->state == IBCM_STATE_REP_RCVD ||
1422 		    statep->state == IBCM_STATE_MRA_SENT ||
1423 		    statep->state == IBCM_STATE_MRA_REP_SENT) {
1424 
1425 			/* take control of statep */
1426 			statep->abort_flag |= IBCM_ABORT_CLIENT;
1427 
1428 			IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
1429 			    "chan 0x%p connection aborted in state = %x",
1430 			    channel, statep->state);
1431 
1432 			/*
1433 			 * wait until state machine modifies qp state to error,
1434 			 * including disassociating statep and QP
1435 			 */
1436 			if ((mode == IBT_BLOCKING) || (mode == IBT_NOCALLBACKS))
1437 				while (statep->close_done != B_TRUE)
1438 					cv_wait(&statep->block_client_cv,
1439 					    &statep->state_mutex);
1440 
1441 			/* a sanity setting */
1442 			if (mode == IBT_NOCALLBACKS)
1443 				statep->cm_handler = NULL;
1444 			IBCM_REF_CNT_DECR(statep);
1445 
1446 			/*
1447 			 * In rare situations, connection attempt could be
1448 			 * terminated for some other reason, before abort is
1449 			 * processed, but CM still returns ret_status as abort
1450 			 */
1451 			if (statep->close_ret_status)
1452 				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
1453 			mutex_exit(&statep->state_mutex);
1454 
1455 			/*
1456 			 * REJ MAD is posted by the CM state machine for this
1457 			 * case, hence state structure is deleted in the
1458 			 * state machine processing.
1459 			 */
1460 			return (IBT_SUCCESS);
1461 
1462 		} else if ((statep->state == IBCM_STATE_TIMEWAIT) ||
1463 		    (statep->state == IBCM_STATE_DELETE)) {
1464 
1465 			/* State already in timewait, so no return priv data */
1466 			IBCM_REF_CNT_DECR(statep);
1467 
1468 			/* The teardown has already been done */
1469 			if (statep->close_ret_status)
1470 				*statep->close_ret_status =
1471 				    IBT_CM_CLOSED_ALREADY;
1472 			mutex_exit(&statep->state_mutex);
1473 
1474 			return (IBT_SUCCESS);
1475 
1476 		} else if ((statep->state == IBCM_STATE_DREQ_RCVD) ||
1477 		    (statep->state == IBCM_STATE_DREQ_SENT) ||
1478 		    (statep->state == IBCM_STATE_DREP_RCVD) ||
1479 		    ((statep->state == IBCM_STATE_TIMED_OUT) &&
1480 		    (statep->timedout_state == IBCM_STATE_DREQ_SENT))) {
1481 
1482 			/*
1483 			 * Either the remote or local client has already
1484 			 * initiated the teardown.  IBCM_STATE_DREP_RCVD is
1485 			 * possible, if CM initiated teardown without client's
1486 			 * knowledge, for stale handling, etc.,
1487 			 */
1488 			if (mode == IBT_NOCALLBACKS) {
1489 				if (statep->close_nocb_state == IBCM_UNBLOCK) {
1490 					statep->close_nocb_state = IBCM_FAIL;
1491 					/* enable free qp after return */
1492 					ibtl_cm_chan_is_closing(
1493 					    statep->channel);
1494 				} else while (statep->close_nocb_state ==
1495 				    IBCM_BLOCK)
1496 					cv_wait(&statep->block_client_cv,
1497 					    &statep->state_mutex);
1498 				statep->cm_handler = NULL; /* sanity setting */
1499 				if (statep->close_ret_status)
1500 					*statep->close_ret_status =
1501 					    IBT_CM_CLOSED_ALREADY;
1502 			} else if (mode == IBT_BLOCKING) {
1503 				/* wait until state is moved to timewait */
1504 				while (statep->close_done != B_TRUE)
1505 					cv_wait(&statep->block_client_cv,
1506 					    &statep->state_mutex);
1507 			}
1508 
1509 			IBCM_REF_CNT_DECR(statep);
1510 			mutex_exit(&statep->state_mutex);
1511 
1512 			/* ret_status is set in state machine code */
1513 			return (IBT_SUCCESS);
1514 
1515 		} else if (statep->state == IBCM_STATE_TIMED_OUT) {
1516 
1517 			if ((mode == IBT_BLOCKING) ||
1518 			    (mode == IBT_NOCALLBACKS)) {
1519 
1520 				/*
1521 				 * wait until cm handler invocation and
1522 				 * disassociation between statep and channel
1523 				 * is complete
1524 				 */
1525 				while (statep->close_done != B_TRUE)
1526 					cv_wait(&statep->block_client_cv,
1527 					    &statep->state_mutex);
1528 			}
1529 
1530 			if (statep->close_ret_status)
1531 				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
1532 			IBCM_REF_CNT_DECR(statep);
1533 			mutex_exit(&statep->state_mutex);
1534 
1535 			return (IBT_SUCCESS);
1536 		} else {
1537 			IBCM_REF_CNT_DECR(statep);
1538 			mutex_exit(&statep->state_mutex);
1539 
1540 			return (IBT_CM_FAILURE);
1541 		}
1542 	}
1543 
1544 	ASSERT(statep->close_nocb_state != IBCM_BLOCK);
1545 
1546 	if (mode == IBT_NOCALLBACKS) {
1547 		statep->close_nocb_state = IBCM_FAIL;
1548 		statep->cm_handler = NULL;
1549 		ibtl_cm_chan_is_closing(statep->channel);
1550 		IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
1551 		    "NOCALLBACKS on in statep = %p", statep);
1552 	}
1553 
1554 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1555 		goto lost_race;
1556 	}
1557 
1558 	/*
1559 	 * Cancel/wait for any pending ibt_set_alt_path, and
1560 	 * release state mutex
1561 	 */
1562 	ibcm_sync_lapr_idle(statep);
1563 
1564 	ibcm_close_enter();
1565 
1566 	mutex_enter(&statep->state_mutex);
1567 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1568 		ibcm_close_exit();
1569 		goto lost_race;
1570 	}
1571 
1572 	statep->state = IBCM_STATE_TRANSIENT_DREQ_SENT;
1573 	statep->timerid = 0;
1574 	statep->close_done = B_FALSE;
1575 	statep->close_flow = 1;
1576 	mutex_exit(&statep->state_mutex);
1577 
1578 	ibcm_post_dreq_mad(statep);
1579 
1580 	mutex_enter(&statep->state_mutex);
1581 
1582 lost_race:
1583 	if (mode == IBT_BLOCKING) {
1584 
1585 		/* wait for DREP */
1586 		while (statep->close_done != B_TRUE)
1587 			cv_wait(&statep->block_client_cv,
1588 			    &statep->state_mutex);
1589 
1590 		IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: chan 0x%p "
1591 		    "done blocking", channel);
1592 	}
1593 
1594 	IBCM_REF_CNT_DECR(statep);
1595 	mutex_exit(&statep->state_mutex);
1596 
1597 	/* If this message isn't seen then ibt_close_rc_channel failed */
1598 	IBTF_DPRINTF_L5(cmlog, "ibcm_close_rc_channel: chan 0x%p done",
1599 	    channel);
1600 
1601 	return (IBT_SUCCESS);
1602 }
1603 
1604 ibt_status_t
1605 ibt_recycle_rc(ibt_channel_hdl_t rc_chan, ibt_cep_flags_t control,
1606     uint8_t hca_port_num, ibt_recycle_handler_t func, void *arg)
1607 {
1608 	ibcm_state_data_t		*statep;
1609 	ibcm_taskq_recycle_arg_t	*ibcm_tq_recycle_arg;
1610 	ibt_qp_query_attr_t		qp_attr;
1611 	ibt_status_t			retval;
1612 
1613 	IBTF_DPRINTF_L3(cmlog, "ibt_recycle_rc (%p, 0x%X, %d, %p, %p)", rc_chan,
1614 	    control, hca_port_num, func, arg);
1615 
1616 	if (IBCM_INVALID_CHANNEL(rc_chan)) {
1617 		IBTF_DPRINTF_L2(cmlog, "ibt_recycle_rc: invalid channel");
1618 		return (IBT_CHAN_HDL_INVALID);
1619 	}
1620 
1621 	/* check qp state */
1622 	retval = ibt_query_qp(rc_chan, &qp_attr);
1623 
1624 	if (retval != IBT_SUCCESS)
1625 		return (retval);
1626 
1627 	if (qp_attr.qp_info.qp_trans != IBT_RC_SRV)
1628 		return (IBT_CHAN_SRV_TYPE_INVALID);
1629 
1630 	if (qp_attr.qp_info.qp_state != IBT_STATE_ERROR)
1631 		return (IBT_CHAN_STATE_INVALID);
1632 
1633 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibcm_tq_recycle_arg))
1634 
1635 	ibcm_tq_recycle_arg = kmem_alloc(sizeof (ibcm_taskq_recycle_arg_t),
1636 	    KM_SLEEP);
1637 
1638 	ibcm_tq_recycle_arg->rc_chan		= rc_chan;
1639 	ibcm_tq_recycle_arg->control		= control;
1640 	ibcm_tq_recycle_arg->hca_port_num	= hca_port_num;
1641 	ibcm_tq_recycle_arg->func		= func;
1642 	ibcm_tq_recycle_arg->arg		= arg;
1643 
1644 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ibcm_tq_recycle_arg))
1645 
1646 	IBCM_GET_CHAN_PRIVATE(rc_chan, statep);
1647 
1648 	/*
1649 	 * If non-blocking ie., func specified and channel has not yet completed
1650 	 * the timewait, then schedule the work for later
1651 	 */
1652 	if ((func != NULL) && (statep != NULL)) {
1653 		IBCM_RELEASE_CHAN_PRIVATE(rc_chan);
1654 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(statep->recycle_arg))
1655 		statep->recycle_arg = ibcm_tq_recycle_arg;
1656 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(statep->recycle_arg))
1657 		return (IBT_SUCCESS);
1658 	}
1659 
1660 	/*
1661 	 * if blocking ie., func specified, and channel has not yet completed
1662 	 * the timewait, then block until the channel completes the timewait
1663 	 */
1664 	if (statep != NULL)
1665 		IBCM_RELEASE_CHAN_PRIVATE(rc_chan);
1666 	IBCM_WAIT_CHAN_PRIVATE(rc_chan);
1667 
1668 	if (func) {	/* NON BLOCKING case. Taskq for QP state change */
1669 		(void) taskq_dispatch(ibcm_taskq, ibcm_process_rc_recycle,
1670 		    ibcm_tq_recycle_arg, TQ_SLEEP);
1671 		return (IBT_SUCCESS);
1672 	} else	/* BLOCKING case */
1673 		return (ibcm_process_rc_recycle_ret(ibcm_tq_recycle_arg));
1674 }
1675 
1676 void
1677 ibcm_process_rc_recycle(void *recycle_arg)
1678 {
1679 	(void) ibcm_process_rc_recycle_ret(recycle_arg);
1680 }
1681 
1682 static ibt_status_t
1683 ibcm_process_rc_recycle_ret(void *recycle_arg)
1684 {
1685 	ibt_qp_info_t			qp_info;
1686 	ibt_status_t			ibt_status = IBT_SUCCESS;
1687 	ibt_cep_modify_flags_t		cep_flags;
1688 	ibt_qp_query_attr_t		qp_attr;
1689 	ibcm_taskq_recycle_arg_t	*ibcm_tq_recycle_arg =
1690 	    (ibcm_taskq_recycle_arg_t *)recycle_arg;
1691 
1692 	/* QP must have been in error state */
1693 	ibt_status = ibt_query_qp(ibcm_tq_recycle_arg->rc_chan, &qp_attr);
1694 	if (ibt_status != IBT_SUCCESS)
1695 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
1696 		    "chanp %p ibt_query_qp() = %d",
1697 		    ibcm_tq_recycle_arg->rc_chan, ibt_status);
1698 	else {
1699 		/* perform the QP state change from ERROR to RESET */
1700 		bzero(&qp_info, sizeof (qp_info));
1701 
1702 		qp_info.qp_trans = IBT_RC_SRV;
1703 		qp_info.qp_state = IBT_STATE_RESET;
1704 
1705 		/* Call modify_qp to move to RESET state */
1706 		ibt_status = ibt_modify_qp(ibcm_tq_recycle_arg->rc_chan,
1707 		    IBT_CEP_SET_STATE, &qp_info, NULL);
1708 
1709 		if (ibt_status != IBT_SUCCESS)
1710 			IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
1711 			    "chanp %p ibt_modify_qp() = %d for ERROR to RESET",
1712 			    ibcm_tq_recycle_arg->rc_chan, ibt_status);
1713 	}
1714 
1715 	if (ibt_status == IBT_SUCCESS) {
1716 
1717 		qp_info.qp_state = IBT_STATE_INIT;
1718 
1719 		/* set flags for all mandatory args from RESET to INIT */
1720 		cep_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_PORT;
1721 		cep_flags |= IBT_CEP_SET_RDMA_R | IBT_CEP_SET_RDMA_W;
1722 		cep_flags |= IBT_CEP_SET_ATOMIC;
1723 
1724 		qp_info.qp_transport.rc.rc_path.cep_hca_port_num =
1725 		    ibcm_tq_recycle_arg->hca_port_num;
1726 		qp_info.qp_flags |=
1727 		    ibcm_tq_recycle_arg->control & IBT_CEP_RDMA_RD;
1728 		qp_info.qp_flags |=
1729 		    ibcm_tq_recycle_arg->control & IBT_CEP_RDMA_WR;
1730 		qp_info.qp_flags |=
1731 		    ibcm_tq_recycle_arg->control & IBT_CEP_ATOMIC;
1732 
1733 		/* Always use the existing pkey */
1734 		qp_info.qp_transport.rc.rc_path.cep_pkey_ix =
1735 		    qp_attr. qp_info.qp_transport.rc.rc_path.cep_pkey_ix;
1736 
1737 		/* Call modify_qp to move to INIT state */
1738 		ibt_status = ibt_modify_qp(ibcm_tq_recycle_arg->rc_chan,
1739 		    cep_flags, &qp_info, NULL);
1740 
1741 		if (ibt_status != IBT_SUCCESS)
1742 			IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
1743 			    "chanp %p ibt_modify_qp() = %d for RESET to INIT",
1744 			    ibcm_tq_recycle_arg->rc_chan, ibt_status);
1745 	}
1746 
1747 	/* Change the QP CM state to indicate QP being re-used */
1748 	if (ibt_status == IBT_SUCCESS)
1749 		ibtl_cm_chan_is_reused(ibcm_tq_recycle_arg->rc_chan);
1750 
1751 	/* Call func, if defined */
1752 	if (ibcm_tq_recycle_arg->func)
1753 		(*(ibcm_tq_recycle_arg->func))(ibt_status,
1754 		    ibcm_tq_recycle_arg->arg);
1755 
1756 	kmem_free(ibcm_tq_recycle_arg, sizeof (ibcm_taskq_recycle_arg_t));
1757 
1758 	return (ibt_status);
1759 }
1760 
1761 static void
1762 ibcm_process_abort_via_taskq(void *args)
1763 {
1764 	ibcm_state_data_t	*statep = (ibcm_state_data_t *)args;
1765 
1766 	ibcm_process_abort(statep);
1767 	mutex_enter(&statep->state_mutex);
1768 	IBCM_REF_CNT_DECR(statep);
1769 	mutex_exit(&statep->state_mutex);
1770 }
1771 
1772 /*
1773  * Local UD CM Handler's private data, used during ibt_request_ud_dest() in
1774  * Non-Blocking mode operations.
1775  */
1776 typedef struct ibcm_local_handler_s {
1777 	ibt_cm_ud_handler_t	actual_cm_handler;
1778 	void			*actual_cm_private;
1779 	ibt_ud_dest_t		*dest_hdl;
1780 } ibcm_local_handler_t;
1781 
1782 _NOTE(READ_ONLY_DATA(ibcm_local_handler_s))
1783 
1784 /*
1785  * Local UD CM Handler, used when ibt_alloc_ud_dest() is issued in
1786  * NON-Blocking mode.
1787  *
1788  * Out here, we update the UD Destination handle with
1789  * the obtained DQPN and QKey (from SIDR REP) and invokes actual client
1790  * handler that was specified by the client.
1791  */
1792 static ibt_cm_status_t
1793 ibcm_local_cm_handler(void *priv, ibt_cm_ud_event_t *event,
1794     ibt_cm_ud_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
1795 {
1796 	ibcm_local_handler_t	*handler_priv = (ibcm_local_handler_t *)priv;
1797 
1798 	IBTF_DPRINTF_L4(cmlog, "ibcm_local_cm_handler: event %d",
1799 	    event->cm_type);
1800 
1801 	ASSERT(handler_priv != NULL);
1802 
1803 	switch (event->cm_type) {
1804 	case IBT_CM_UD_EVENT_SIDR_REP:
1805 		/* Update QPN & QKey from event into destination handle. */
1806 		if (handler_priv->dest_hdl != NULL) {
1807 			handler_priv->dest_hdl->ud_dst_qpn =
1808 			    event->cm_event.sidr_rep.srep_remote_qpn;
1809 			handler_priv->dest_hdl->ud_qkey =
1810 			    event->cm_event.sidr_rep.srep_remote_qkey;
1811 		}
1812 
1813 		/* Invoke the client handler - inform only, so ignore retval */
1814 		(void) handler_priv->actual_cm_handler(
1815 		    handler_priv->actual_cm_private, event, ret_args, priv_data,
1816 		    len);
1817 
1818 		/* Free memory allocated for local handler's private data. */
1819 		if (handler_priv != NULL)
1820 			kmem_free(handler_priv, sizeof (*handler_priv));
1821 
1822 		break;
1823 	default:
1824 		IBTF_DPRINTF_L2(cmlog, "ibcm_local_cm_handler: ERROR");
1825 		break;
1826 	}
1827 
1828 	return (IBT_CM_ACCEPT);
1829 }
1830 
1831 
1832 /* Validate the input UD destination attributes.  */
1833 static ibt_status_t
1834 ibcm_validate_dqpn_data(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
1835     ibt_ud_returns_t *ret_args)
1836 {
1837 	/* cm handler must always be specified */
1838 	if (mode == IBT_NONBLOCKING && attr->ud_cm_handler == NULL) {
1839 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1840 		    "CM handler is not specified ");
1841 		return (IBT_INVALID_PARAM);
1842 	}
1843 
1844 	if (mode == IBT_NONBLOCKING) {
1845 		if (ret_args != NULL) {
1846 			IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1847 			    "ret_args should be NULL when called in "
1848 			    "non-blocking mode");
1849 			return (IBT_INVALID_PARAM);
1850 		}
1851 	} else if (mode == IBT_BLOCKING) {
1852 		if (ret_args == NULL) {
1853 			IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1854 			    "ret_args should be Non-NULL when called in "
1855 			    "blocking mode");
1856 			return (IBT_INVALID_PARAM);
1857 		}
1858 	} else {
1859 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1860 		    "invalid mode %x specified ", mode);
1861 		return (IBT_INVALID_PARAM);
1862 	}
1863 
1864 	if (attr->ud_sid == 0) {
1865 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1866 		    "ServiceID must be specified. ");
1867 		return (IBT_INVALID_PARAM);
1868 	}
1869 
1870 	if (attr->ud_addr == NULL) {
1871 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1872 		    "Address Info NULL");
1873 		return (IBT_INVALID_PARAM);
1874 	}
1875 
1876 	/* Validate SGID */
1877 	if ((attr->ud_addr->av_sgid.gid_prefix == 0) ||
1878 	    (attr->ud_addr->av_sgid.gid_guid == 0)) {
1879 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: Invalid SGID");
1880 		return (IBT_INVALID_PARAM);
1881 	}
1882 	IBTF_DPRINTF_L3(cmlog, "ibcm_validate_dqpn_data: SGID<%llX:%llX>",
1883 	    attr->ud_addr->av_sgid.gid_prefix,
1884 	    attr->ud_addr->av_sgid.gid_guid);
1885 
1886 	/* Validate DGID */
1887 	if ((attr->ud_addr->av_dgid.gid_prefix == 0) ||
1888 	    (attr->ud_addr->av_dgid.gid_guid == 0)) {
1889 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: Invalid DGID");
1890 		return (IBT_INVALID_PARAM);
1891 	}
1892 	IBTF_DPRINTF_L3(cmlog, "ibcm_validate_dqpn_data: DGID<%llX:%llX>",
1893 	    attr->ud_addr->av_dgid.gid_prefix,
1894 	    attr->ud_addr->av_dgid.gid_guid);
1895 
1896 	return (IBT_SUCCESS);
1897 }
1898 
1899 
1900 /* Perform SIDR to retrieve DQPN and QKey.  */
1901 static ibt_status_t
1902 ibcm_ud_get_dqpn(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
1903     ibt_ud_returns_t *ret_args)
1904 {
1905 	ibt_status_t		retval;
1906 	ib_pkey_t		ud_pkey;
1907 	ibmf_handle_t		ibmf_hdl;
1908 	ibmf_msg_t		*ibmf_msg;
1909 	ibcm_hca_info_t		*hcap;
1910 	ibcm_sidr_req_msg_t	*sidr_req_msgp;
1911 	ibcm_ud_state_data_t	*ud_statep;
1912 	ibtl_cm_hca_port_t	port;
1913 	ibcm_sidr_srch_t	sidr_entry;
1914 	ibcm_qp_list_t		*cm_qp_entry;
1915 
1916 	/* Retrieve HCA GUID value from the available SGID info. */
1917 	retval = ibtl_cm_get_hca_port(attr->ud_addr->av_sgid, 0, &port);
1918 	if ((retval != IBT_SUCCESS) || (port.hp_port == 0)) {
1919 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1920 		    "ibtl_cm_get_hca_port failed: %d", retval);
1921 		return (retval);
1922 	}
1923 
1924 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: "
1925 	    "HCA GUID:%llX, port_num:%d", port.hp_hca_guid, port.hp_port);
1926 
1927 	/* Lookup the HCA info for this GUID */
1928 	if ((hcap = ibcm_find_hca_entry(port.hp_hca_guid)) == NULL) {
1929 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: hcap is NULL");
1930 		return (IBT_HCA_INVALID);
1931 	}
1932 
1933 	/* Return failure if the HCA device or Port is not operational */
1934 
1935 	if ((retval = ibt_get_port_state_byguid(port.hp_hca_guid, port.hp_port,
1936 	    NULL, NULL)) != IBT_SUCCESS) {
1937 		/* Device Port is not in good state, don't use it. */
1938 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: Invalid "
1939 		    "port specified or port not active");
1940 		ibcm_dec_hca_acc_cnt(hcap);
1941 		return (retval);
1942 	}
1943 
1944 	retval = ibt_index2pkey_byguid(port.hp_hca_guid, port.hp_port,
1945 	    attr->ud_pkey_ix, &ud_pkey);
1946 	if (retval != IBT_SUCCESS) {
1947 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1948 		    "Failed to convert index2pkey: %d", retval);
1949 		ibcm_dec_hca_acc_cnt(hcap);
1950 		return (retval);
1951 	}
1952 
1953 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(sidr_entry))
1954 
1955 	/* Allocate a new request id */
1956 	if (ibcm_alloc_reqid(hcap, &sidr_entry.srch_req_id) == IBCM_FAILURE) {
1957 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1958 		    "no req id available");
1959 		ibcm_dec_hca_acc_cnt(hcap);
1960 		return (IBT_INSUFF_KERNEL_RESOURCE);
1961 	}
1962 
1963 	if ((hcap->hca_port_info[port.hp_port - 1].port_ibmf_hdl == NULL) &&
1964 	    ((retval = ibcm_hca_reinit_port(hcap, port.hp_port - 1))
1965 	    != IBT_SUCCESS)) {
1966 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1967 		    "ibmf reg or callback setup failed during re-initialize");
1968 		return (retval);
1969 	}
1970 
1971 	ibmf_hdl = hcap->hca_port_info[port.hp_port - 1].port_ibmf_hdl;
1972 
1973 	/* find the ibmf QP to post the SIDR REQ */
1974 	if ((cm_qp_entry = ibcm_find_qp(hcap, port.hp_port, ud_pkey)) ==
1975 	    NULL) {
1976 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: IBMF QP allocation"
1977 		    " failed");
1978 		ibcm_dec_hca_acc_cnt(hcap);
1979 		return (IBT_INSUFF_RESOURCE);
1980 	}
1981 
1982 	if ((retval = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg, MAD_METHOD_SEND))
1983 	    != IBT_SUCCESS) {
1984 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: IBMF MSG allocation"
1985 		    " failed");
1986 		ibcm_release_qp(cm_qp_entry);
1987 		ibcm_dec_hca_acc_cnt(hcap);
1988 		return (retval);
1989 	}
1990 
1991 	sidr_entry.srch_lid = port.hp_base_lid;
1992 	sidr_entry.srch_gid = attr->ud_addr->av_sgid;
1993 	sidr_entry.srch_grh_exists = attr->ud_addr->av_send_grh;
1994 	sidr_entry.srch_mode = IBCM_ACTIVE_MODE;
1995 
1996 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(sidr_entry))
1997 
1998 	/* do various allocations needed here */
1999 	rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);
2000 
2001 	(void) ibcm_find_sidr_entry(&sidr_entry, hcap, &ud_statep,
2002 	    IBCM_FLAG_ADD);
2003 	rw_exit(&hcap->hca_sidr_list_lock);
2004 
2005 	/* Increment hca's resource count */
2006 	ibcm_inc_hca_res_cnt(hcap);
2007 
2008 	/* After a resource created on hca, no need to hold the acc cnt */
2009 	ibcm_dec_hca_acc_cnt(hcap);
2010 
2011 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ud_statep))
2012 
2013 	/* Initialize some ud_statep fields */
2014 	ud_statep->ud_stored_msg = ibmf_msg;
2015 	ud_statep->ud_svc_id = attr->ud_sid;
2016 	ud_statep->ud_pkt_life_time =
2017 	    ibt_ib2usec(attr->ud_pkt_lt);
2018 	ud_statep->ud_stored_reply_addr.cm_qp_entry = cm_qp_entry;
2019 
2020 	/* set remaining retry cnt */
2021 	ud_statep->ud_remaining_retry_cnt = ud_statep->ud_max_cm_retries;
2022 
2023 	/*
2024 	 * Get UD handler and corresponding args which is pass it back
2025 	 * as first argument for the handler.
2026 	 */
2027 	ud_statep->ud_state_cm_private = attr->ud_cm_private;
2028 
2029 	if (mode == IBT_BLOCKING)
2030 		ud_statep->ud_return_data = ret_args;
2031 	else
2032 		ud_statep->ud_cm_handler = attr->ud_cm_handler;
2033 
2034 	/* Initialize the fields of ud_statep->ud_stored_reply_addr */
2035 	ud_statep->ud_stored_reply_addr.grh_exists = attr->ud_addr->av_send_grh;
2036 	ud_statep->ud_stored_reply_addr.ibmf_hdl = ibmf_hdl;
2037 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_hop_limit =
2038 	    attr->ud_addr->av_hop;
2039 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_sender_gid =
2040 	    attr->ud_addr->av_sgid;
2041 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_recver_gid =
2042 	    attr->ud_addr->av_dgid;
2043 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_tclass =
2044 	    attr->ud_addr->av_tclass;
2045 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_flow_label =
2046 	    attr->ud_addr->av_flow & IB_GRH_FLOW_LABEL_MASK;
2047 
2048 	/* needs to be derived based on the base LID and path bits */
2049 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_local_lid =
2050 	    port.hp_base_lid;
2051 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_remote_lid =
2052 	    attr->ud_addr->av_dlid;
2053 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_p_key = ud_pkey;
2054 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_q_key = IB_GSI_QKEY;
2055 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_service_level =
2056 	    attr->ud_addr->av_srvl;
2057 
2058 	/*
2059 	 * This may be enchanced later, to use a remote qno based on past
2060 	 * redirect rej mad responses. This would be the place to specify
2061 	 * appropriate remote qno
2062 	 */
2063 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_remote_qno = 1;
2064 
2065 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sidr_req_msgp))
2066 
2067 	/* Initialize the SIDR REQ message fields */
2068 	sidr_req_msgp =
2069 	    (ibcm_sidr_req_msg_t *)IBCM_OUT_MSGP(ud_statep->ud_stored_msg);
2070 
2071 	sidr_req_msgp->sidr_req_request_id = h2b32(ud_statep->ud_req_id);
2072 	sidr_req_msgp->sidr_req_service_id = h2b64(attr->ud_sid);
2073 	sidr_req_msgp->sidr_req_pkey = h2b16(ud_pkey);
2074 	IBCM_OUT_HDRP(ud_statep->ud_stored_msg)->AttributeID =
2075 	    h2b16(IBCM_INCOMING_SIDR_REQ + IBCM_ATTR_BASE_ID);
2076 
2077 	if ((attr->ud_priv_data != NULL) && (attr->ud_priv_data_len > 0)) {
2078 		bcopy(attr->ud_priv_data, sidr_req_msgp->sidr_req_private_data,
2079 		    min(attr->ud_priv_data_len, IBT_SIDR_REQ_PRIV_DATA_SZ));
2080 	}
2081 
2082 	/* Send out the SIDR REQ message */
2083 	ud_statep->ud_state = IBCM_STATE_SIDR_REQ_SENT;
2084 	ud_statep->ud_timer_stored_state = IBCM_STATE_SIDR_REQ_SENT;
2085 	IBCM_UD_REF_CNT_INCR(ud_statep); /* for non-blocking SIDR REQ post */
2086 	ud_statep->ud_timer_value = ibt_ib2usec(ibcm_max_sidr_rep_proctime) +
2087 	    (ud_statep->ud_pkt_life_time * 2);
2088 
2089 	IBCM_OUT_HDRP(ud_statep->ud_stored_msg)->TransactionID =
2090 	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_SIDR_REQ,
2091 	    ud_statep->ud_req_id, 0));
2092 
2093 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: timer_value in HZ = %x",
2094 	    ud_statep->ud_timer_value);
2095 
2096 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ud_statep))
2097 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sidr_req_msgp))
2098 
2099 	ibcm_post_ud_mad(ud_statep, ud_statep->ud_stored_msg,
2100 	    ibcm_post_sidr_req_complete, ud_statep);
2101 
2102 	mutex_enter(&ud_statep->ud_state_mutex);
2103 
2104 	/* Wait for SIDR_REP */
2105 	if (mode == IBT_BLOCKING) {
2106 		IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: blocking");
2107 
2108 		while (ud_statep->ud_blocking_done != B_TRUE) {
2109 			cv_wait(&ud_statep->ud_block_client_cv,
2110 			    &ud_statep->ud_state_mutex);
2111 		}
2112 
2113 		IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: finished blocking");
2114 
2115 		if (ret_args->ud_status == IBT_CM_SREP_QPN_VALID) {
2116 			IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: DQPN = %x, "
2117 			    "status = %x, QKey = %x", ret_args->ud_dqpn,
2118 			    ret_args->ud_status, ret_args->ud_qkey);
2119 
2120 		} else {
2121 			IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: Status<%x>",
2122 			    ret_args->ud_status);
2123 			retval = IBT_CM_FAILURE;
2124 		}
2125 	}
2126 
2127 	IBCM_UD_REF_CNT_DECR(ud_statep);
2128 	mutex_exit(&ud_statep->ud_state_mutex);
2129 
2130 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: done");
2131 
2132 	return (retval);
2133 }
2134 
2135 
2136 /*
2137  * Function:
2138  *	ibt_request_ud_dest
2139  * Input:
2140  *	ud_dest		A previously allocated UD destination handle.
2141  *	mode		This function can execute in blocking or non blocking
2142  *			modes.
2143  *	attr		UD destination attributes to be modified.
2144  * Output:
2145  *	ud_ret_args	If the function is called in blocking mode, ud_ret_args
2146  *			should be a pointer to an ibt_ud_returns_t struct.
2147  * Returns:
2148  *	IBT_SUCCESS
2149  * Description:
2150  *	Modify a previously allocated UD destination handle based on the
2151  *	results of doing the SIDR protocol.
2152  */
2153 ibt_status_t
2154 ibt_request_ud_dest(ibt_ud_dest_hdl_t ud_dest, ibt_execution_mode_t mode,
2155     ibt_ud_dest_attr_t *attr, ibt_ud_returns_t *ud_ret_args)
2156 {
2157 	ibt_status_t		retval;
2158 	ibt_ud_dest_t		*ud_destp;
2159 	ibcm_local_handler_t	*local_handler_priv = NULL;
2160 
2161 	IBTF_DPRINTF_L3(cmlog, "ibt_request_ud_dest(%p, %x, %p, %p)",
2162 	    ud_dest, mode, attr, ud_ret_args);
2163 
2164 	retval = ibcm_validate_dqpn_data(attr, mode, ud_ret_args);
2165 	if (retval != IBT_SUCCESS) {
2166 		return (retval);
2167 	}
2168 
2169 	ud_destp = ud_dest;
2170 
2171 	/* Allocate an Address handle. */
2172 	retval = ibt_modify_ah(ud_destp->ud_dest_hca, ud_destp->ud_ah,
2173 	    attr->ud_addr);
2174 	if (retval != IBT_SUCCESS) {
2175 		IBTF_DPRINTF_L2(cmlog, "ibt_request_ud_dest: "
2176 		    "Address Handle Modification failed: %d", retval);
2177 		return (retval);
2178 	}
2179 
2180 	if (mode == IBT_NONBLOCKING) {
2181 		/*
2182 		 * In NON-BLOCKING mode, and we need to update the destination
2183 		 * handle with the DQPN and QKey that are obtained from
2184 		 * SIDR REP, hook-up our own handler, so that we can catch
2185 		 * the event, and we ourselves call the actual client's
2186 		 * ud_cm_handler, in our handler.
2187 		 */
2188 
2189 		/* Allocate memory for local handler's private data. */
2190 		local_handler_priv =
2191 		    kmem_alloc(sizeof (*local_handler_priv), KM_SLEEP);
2192 
2193 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*local_handler_priv))
2194 
2195 		local_handler_priv->actual_cm_handler = attr->ud_cm_handler;
2196 		local_handler_priv->actual_cm_private = attr->ud_cm_private;
2197 		local_handler_priv->dest_hdl = ud_destp;
2198 
2199 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*local_handler_priv))
2200 
2201 		attr->ud_cm_handler = ibcm_local_cm_handler;
2202 		attr->ud_cm_private = local_handler_priv;
2203 	}
2204 
2205 	/* In order to get DQPN and Destination QKey, perform SIDR */
2206 	retval = ibcm_ud_get_dqpn(attr, mode, ud_ret_args);
2207 	if (retval != IBT_SUCCESS) {
2208 		IBTF_DPRINTF_L2(cmlog, "ibt_request_ud_dest: "
2209 		    "Failed to get DQPN: %d", retval);
2210 
2211 		/* Free memory allocated for local handler's private data. */
2212 		if (local_handler_priv != NULL)
2213 			kmem_free(local_handler_priv,
2214 			    sizeof (*local_handler_priv));
2215 		return (retval);
2216 	}
2217 
2218 	/*
2219 	 * Fill in the dqpn and dqkey as obtained from ud_ret_args,
2220 	 * values will be valid only on BLOCKING mode.
2221 	 */
2222 	if (mode == IBT_BLOCKING) {
2223 		ud_destp->ud_dst_qpn = ud_ret_args->ud_dqpn;
2224 		ud_destp->ud_qkey = ud_ret_args->ud_qkey;
2225 	}
2226 
2227 	return (retval);
2228 }
2229 
2230 /*
2231  * Function:
2232  *	ibt_ud_get_dqpn
2233  * Input:
2234  *	attr		A pointer to an ibt_ud_dest_attr_t struct that are
2235  *			required for SIDR REQ message. Not specified attributes
2236  *			should be set to "NULL" or "0".
2237  *			ud_sid, ud_addr and ud_pkt_lt must be specified.
2238  *	mode		This function can execute in blocking or non blocking
2239  *			modes.
2240  * Output:
2241  *	returns		If the function is called in blocking mode, returns
2242  *			should be a pointer to an ibt_ud_returns_t struct.
2243  * Return:
2244  *	IBT_SUCCESS	on success or respective failure on error.
2245  * Description:
2246  *	Finds the destination QPN at the specified destination that the
2247  *	specified service can be reached on. The IBTF CM initiates the
2248  *	service ID resolution protocol (SIDR) to determine a destination QPN.
2249  *
2250  * NOTE: SIDR_REQ is initiated from active side.
2251  */
2252 ibt_status_t
2253 ibt_ud_get_dqpn(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
2254     ibt_ud_returns_t *returns)
2255 {
2256 	ibt_status_t		retval;
2257 
2258 	IBTF_DPRINTF_L3(cmlog, "ibt_ud_get_dqpn(%p, %x, %p)",
2259 	    attr, mode, returns);
2260 
2261 	retval = ibcm_validate_dqpn_data(attr, mode, returns);
2262 	if (retval != IBT_SUCCESS) {
2263 		return (retval);
2264 	}
2265 
2266 	return (ibcm_ud_get_dqpn(attr, mode, returns));
2267 }
2268 
2269 
2270 /*
2271  * ibt_cm_delay:
2272  *	A client CM handler function can call this function
2273  *	to extend its response time to a CM event.
2274  * INPUTS:
2275  *	flags		Indicates what CM message processing is being delayed
2276  *			by the CM handler, valid values are:
2277  *				IBT_CM_DELAY_REQ
2278  *				IBT_CM_DELAY_REP
2279  *				IBT_CM_DELAY_LAP
2280  *	cm_session_id	The session ID that was passed to client srv_handler
2281  *			by the CM
2282  *	service_time	The extended service time
2283  *	priv_data	Vendor specific data to be sent in the CM generated
2284  *			MRA message. Should be NULL if not specified.
2285  *	len		The number of bytes of data specified by priv_data.
2286  *
2287  * RETURN VALUES:
2288  *	IBT_SUCCESS	on success (or respective failure on error)
2289  */
2290 ibt_status_t
2291 ibt_cm_delay(ibt_cmdelay_flags_t flags, void *cm_session_id,
2292     clock_t service_time, void *priv_data, ibt_priv_data_len_t len)
2293 {
2294 	uint8_t			msg_typ = 0;
2295 	ibcm_mra_msg_t		*mra_msgp;
2296 	ibcm_state_data_t	*statep;
2297 	ibt_status_t		status;
2298 
2299 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_delay(0x%x, %p, 0x%x)",
2300 	    flags, cm_session_id, service_time);
2301 
2302 	/*
2303 	 * Make sure channel is associated with a statep
2304 	 */
2305 	statep = (ibcm_state_data_t *)cm_session_id;
2306 
2307 	if (statep == NULL) {
2308 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: statep NULL");
2309 		return (IBT_INVALID_PARAM);
2310 	}
2311 
2312 	IBTF_DPRINTF_L4(cmlog, "ibt_cm_delay: statep %p", statep);
2313 
2314 	/* Allocate an ibmf msg for mra, if not allocated yet */
2315 	if (statep->mra_msg == NULL) {
2316 		if ((status = ibcm_alloc_out_msg(
2317 		    statep->stored_reply_addr.ibmf_hdl, &statep->mra_msg,
2318 		    MAD_METHOD_SEND)) != IBT_SUCCESS) {
2319 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: chan 0x%p"
2320 			    "IBMF MSG allocation failed", statep->channel);
2321 			return (status);
2322 		}
2323 	}
2324 
2325 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mra_msgp))
2326 
2327 	mra_msgp = (ibcm_mra_msg_t *)IBCM_OUT_MSGP(statep->mra_msg);
2328 	mra_msgp->mra_local_comm_id = h2b32(statep->local_comid);
2329 	mra_msgp->mra_remote_comm_id = h2b32(statep->remote_comid);
2330 
2331 	/* fill in rest of MRA's fields - Message MRAed and Service Timeout */
2332 	if (flags == IBT_CM_DELAY_REQ) {
2333 		msg_typ = IBT_CM_MRA_TYPE_REQ;
2334 	} else if (flags == IBT_CM_DELAY_REP) {
2335 		msg_typ = IBT_CM_MRA_TYPE_REP;
2336 	} else if (flags == IBT_CM_DELAY_LAP) {
2337 		msg_typ = IBT_CM_MRA_TYPE_LAP;
2338 	}
2339 
2340 	mra_msgp->mra_message_type_plus = msg_typ << 6;
2341 	mra_msgp->mra_service_timeout_plus = ibt_usec2ib(service_time) << 3;
2342 
2343 	len = min(len, IBT_MRA_PRIV_DATA_SZ);
2344 	if (priv_data && (len > 0))
2345 		bcopy(priv_data, mra_msgp->mra_private_data, len);
2346 
2347 	IBCM_OUT_HDRP(statep->mra_msg)->AttributeID =
2348 	    h2b16(IBCM_INCOMING_MRA + IBCM_ATTR_BASE_ID);
2349 
2350 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mra_msgp))
2351 
2352 	mutex_enter(&statep->state_mutex);
2353 
2354 	if ((statep->mode == IBCM_ACTIVE_MODE) &&
2355 	    (statep->state == IBCM_STATE_REP_RCVD)) {
2356 		statep->state = IBCM_STATE_MRA_REP_SENT;
2357 	} else if (statep->mode == IBCM_PASSIVE_MODE) {
2358 		if (statep->state == IBCM_STATE_REQ_RCVD) {
2359 			statep->state = IBCM_STATE_MRA_SENT;
2360 		} else if (statep->ap_state == IBCM_AP_STATE_LAP_RCVD) {
2361 			statep->ap_state = IBCM_AP_STATE_MRA_LAP_RCVD;
2362 		} else {
2363 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: invalid state "
2364 			    "/ap_state/mode %x, %x, %x", statep->state,
2365 			    statep->ap_state, statep->mode);
2366 			mutex_exit(&statep->state_mutex);
2367 			return (IBT_CHAN_STATE_INVALID);
2368 		}
2369 	} else {
2370 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: invalid state "
2371 		    "/ap_state/mode %x, %x, %x", statep->state,
2372 		    statep->ap_state, statep->mode);
2373 		mutex_exit(&statep->state_mutex);
2374 
2375 		return (IBT_CHAN_STATE_INVALID);
2376 	}
2377 	/* service time is usecs, stale_clock is nsecs */
2378 	statep->stale_clock = gethrtime() +
2379 	    (hrtime_t)ibt_ib2usec(ibt_usec2ib(service_time)) * (1000 *
2380 	    statep->max_cm_retries);
2381 
2382 	statep->send_mad_flags |= IBCM_MRA_POST_BUSY;
2383 	IBCM_REF_CNT_INCR(statep);	/* for ibcm_post_mra_complete */
2384 	mutex_exit(&statep->state_mutex);
2385 
2386 	IBCM_OUT_HDRP(statep->mra_msg)->TransactionID =
2387 	    IBCM_OUT_HDRP(statep->stored_msg)->TransactionID;
2388 
2389 	/* post the MRA mad in blocking mode, as no timers involved */
2390 	ibcm_post_rc_mad(statep, statep->mra_msg, ibcm_post_mra_complete,
2391 	    statep);
2392 	ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_MRA);
2393 	/* If this message isn't seen then ibt_cm_delay failed */
2394 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_delay: done !!");
2395 
2396 	return (IBT_SUCCESS);
2397 }
2398 
2399 
2400 /*
2401  * ibt_register_service()
2402  *	Register a service with the IBCM
2403  *
2404  * INPUTS:
2405  *	ibt_hdl		The IBT client handle returned to the client
2406  *			on an ibt_attach() call.
2407  *
2408  *	srv		The address of a ibt_srv_desc_t that describes
2409  *			the service, containing the following:
2410  *
2411  *		sd_ud_handler	The Service CM UD event Handler.
2412  *		sd_handler	The Service CM RC/UC/RD event Handler.
2413  *		sd_flags	Service flags (peer-to-peer, or not).
2414  *
2415  *	sid		This tells CM if the service is local (sid is 0) or
2416  *			wellknown (sid is the starting service id of the range).
2417  *
2418  *	num_sids	The number of contiguous service-ids to reserve.
2419  *
2420  *	srv_hdl		The address of a service identification handle, used
2421  *			to deregister a service, and to bind GIDs to.
2422  *
2423  *	ret_sid		The address to store the Service ID return value.
2424  *			If num_sids > 1, ret_sid is the first Service ID
2425  *			in the range.
2426  *
2427  * ibt_register_service() returns:
2428  *	IBT_SUCCESS		- added a service successfully.
2429  *	IBT_INVALID_PARAM	- invalid input parameter.
2430  *	IBT_CM_FAILURE		- failed to add the service.
2431  *	IBT_CM_SERVICE_EXISTS	- service already exists.
2432  *	IBT_INSUFF_KERNEL_RESOURCE - ran out of local service ids (should
2433  *				     never happen).
2434  */
2435 ibt_status_t
2436 ibt_register_service(ibt_clnt_hdl_t ibt_hdl, ibt_srv_desc_t *srv,
2437     ib_svc_id_t sid, int num_sids, ibt_srv_hdl_t *srv_hdl, ib_svc_id_t *ret_sid)
2438 {
2439 	ibcm_svc_info_t		*svcinfop;
2440 
2441 	IBTF_DPRINTF_L2(cmlog, "ibt_register_service(%p, %p, %llx, %d)",
2442 	    ibt_hdl, srv, (longlong_t)sid, num_sids);
2443 
2444 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*svcinfop))
2445 
2446 	*srv_hdl = NULL;
2447 
2448 	if (num_sids <= 0) {
2449 		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
2450 		    "Invalid number of service-ids specified (%d)", num_sids);
2451 		return (IBT_INVALID_PARAM);
2452 	}
2453 
2454 	if (sid == 0) {
2455 		if (ret_sid == NULL)
2456 			return (IBT_INVALID_PARAM);
2457 		sid = ibcm_alloc_local_sids(num_sids);
2458 		if (sid == 0)
2459 			return (IBT_INSUFF_KERNEL_RESOURCE);
2460 
2461 	/* Make sure that the ServiceId specified is not of LOCAL AGN type. */
2462 	} else if ((sid & IB_SID_AGN_MASK) == IB_SID_AGN_LOCAL) {
2463 		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
2464 		    "Invalid non-LOCAL SID specified: 0x%llX",
2465 		    (longlong_t)sid);
2466 		return (IBT_INVALID_PARAM);
2467 	}
2468 
2469 	svcinfop = ibcm_create_svc_entry(sid, num_sids);
2470 
2471 	if (svcinfop == NULL) {
2472 		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
2473 		    "Service-ID 0x%llx already registered", (longlong_t)sid);
2474 		return (IBT_CM_SERVICE_EXISTS);
2475 	}
2476 
2477 	/*
2478 	 * 'sid' and 'num_sids' are filled in ibcm_create_svc_entry()
2479 	 */
2480 	svcinfop->svc_flags = srv->sd_flags;
2481 	svcinfop->svc_rc_handler = srv->sd_handler;
2482 	svcinfop->svc_ud_handler = srv->sd_ud_handler;
2483 
2484 	if (ret_sid != NULL)
2485 		*ret_sid = sid;
2486 
2487 	*srv_hdl = svcinfop;
2488 
2489 	ibtl_cm_change_service_cnt(ibt_hdl, num_sids);
2490 
2491 	/* If this message isn't seen, then ibt_register_service failed. */
2492 	IBTF_DPRINTF_L2(cmlog, "ibt_register_service: done (%p, %llX)",
2493 	    svcinfop, sid);
2494 
2495 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*svcinfop))
2496 
2497 	return (IBT_SUCCESS);
2498 }
2499 
2500 
2501 static ibt_status_t
2502 ibcm_write_service_record(ibmf_saa_handle_t saa_handle,
2503     sa_service_record_t *srv_recp, ibmf_saa_access_type_t saa_type)
2504 {
2505 	int	rval;
2506 	int	retry;
2507 
2508 	ibcm_sa_access_enter();
2509 	for (retry = 0; retry < ibcm_max_sa_retries; retry++) {
2510 		rval = ibmf_saa_update_service_record(
2511 		    saa_handle, srv_recp, saa_type, 0);
2512 		if (rval != IBMF_TRANS_TIMEOUT) {
2513 			break;
2514 		}
2515 		IBTF_DPRINTF_L2(cmlog, "ibcm_write_service_record: "
2516 		    "ibmf_saa_update_service_record timed out"
2517 		    " SID = %llX, rval = %d, saa_type = %d",
2518 		    (longlong_t)srv_recp->ServiceID, rval, saa_type);
2519 		delay(ibcm_sa_timeout_delay);
2520 	}
2521 	ibcm_sa_access_exit();
2522 
2523 	if (rval != IBMF_SUCCESS) {
2524 		IBTF_DPRINTF_L2(cmlog, "ibcm_write_service_record: "
2525 		    "ibmf_saa_update_service_record() : Failed - %d", rval);
2526 		return (ibcm_ibmf_analyze_error(rval));
2527 	} else
2528 		return (IBT_SUCCESS);
2529 }
2530 
2531 
2532 static void
2533 ibcm_rem_stale_srec(ibmf_saa_handle_t saa_handle, sa_service_record_t *srec)
2534 {
2535 	ibt_status_t		retval;
2536 	uint_t			num_found;
2537 	size_t			length;
2538 	sa_service_record_t	*srv_resp;
2539 	void			*results_p;
2540 	uint_t			i;
2541 	uint64_t		component_mask;
2542 	ibmf_saa_access_args_t	access_args;
2543 
2544 	component_mask =
2545 	    SA_SR_COMPMASK_PKEY | SA_SR_COMPMASK_NAME | SA_SR_COMPMASK_GID;
2546 
2547 	/* Call in SA Access retrieve routine to get Service Records. */
2548 	access_args.sq_attr_id = SA_SERVICERECORD_ATTRID;
2549 	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
2550 	access_args.sq_component_mask = component_mask;
2551 	access_args.sq_template = srec;
2552 	access_args.sq_template_length = sizeof (sa_service_record_t);
2553 	access_args.sq_callback = NULL;
2554 	access_args.sq_callback_arg = NULL;
2555 
2556 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
2557 	    &results_p);
2558 	if (retval != IBT_SUCCESS) {
2559 		IBTF_DPRINTF_L2(cmlog, "ibcm_rem_stale_srec: "
2560 		    "SA Access Failure");
2561 		return;
2562 	}
2563 
2564 	num_found = length / sizeof (sa_service_record_t);
2565 
2566 	if (num_found)
2567 		IBTF_DPRINTF_L3(cmlog, "ibcm_rem_stale_srec: "
2568 		    "Found %d matching Service Records.", num_found);
2569 
2570 	/* Validate the returned number of records. */
2571 	if ((results_p != NULL) && (num_found > 0)) {
2572 
2573 		/* Remove all the records. */
2574 		for (i = 0; i < num_found; i++) {
2575 
2576 			srv_resp = (sa_service_record_t *)
2577 			    ((uchar_t *)results_p +
2578 			    i * sizeof (sa_service_record_t));
2579 
2580 			/*
2581 			 * Found some matching records, but check out whether
2582 			 * this Record is really stale or just happens to match
2583 			 * the current session records. If yes, don't remove it.
2584 			 */
2585 			mutex_enter(&ibcm_svc_info_lock);
2586 			if (ibcm_find_svc_entry(srv_resp->ServiceID) != NULL) {
2587 				/* This record is NOT STALE. */
2588 				mutex_exit(&ibcm_svc_info_lock);
2589 				IBTF_DPRINTF_L3(cmlog, "ibcm_rem_stale_srec: "
2590 				    "This is not Stale, it's an active record");
2591 				continue;
2592 			}
2593 			mutex_exit(&ibcm_svc_info_lock);
2594 
2595 			IBTF_DPRINTF_L2(cmlog, "ibcm_rem_stale_srec: "
2596 			    "Removing Stale Rec: %s, %llX",
2597 			    srv_resp->ServiceName, srv_resp->ServiceID);
2598 
2599 			IBCM_DUMP_SERVICE_REC(srv_resp);
2600 
2601 			/*
2602 			 * Remove the Service Record Entry from SA.
2603 			 *
2604 			 * Get ServiceID info from Response Buf, other
2605 			 * attributes are already filled-in.
2606 			 */
2607 
2608 			 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(srec->ServiceID))
2609 
2610 			srec->ServiceID = srv_resp->ServiceID;
2611 
2612 			 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(srec->ServiceID))
2613 
2614 			(void) ibcm_write_service_record(saa_handle, srec,
2615 			    IBMF_SAA_DELETE);
2616 		}
2617 
2618 		/* Deallocate the memory for results_p. */
2619 		kmem_free(results_p, length);
2620 	}
2621 }
2622 
2623 
2624 
2625 /*
2626  * ibt_bind_service()
2627  *	Register a service with the IBCM
2628  *
2629  * INPUTS:
2630  *	srv_hdl		The service id handle returned to the client
2631  *			on an ibt_service_register() call.
2632  *
2633  *	gid		The GID to which to bind the service.
2634  *
2635  *	srv_bind	The address of a ibt_srv_bind_t that describes
2636  *			the service record.  This should be NULL if there
2637  *			is to be no service record.  This contains:
2638  *
2639  *		sb_lease	Lease period
2640  *		sb_pkey		Partition
2641  *		sb_name		pointer to ASCII string Service Name,
2642  *				NULL terminated.
2643  *		sb_key[]	Key to secure the service record.
2644  *		sb_data		Service Data structure (64-byte)
2645  *
2646  *	cm_private	First argument of Service handler.
2647  *
2648  *	sb_hdl_p	The address of a service bind handle, used
2649  *			to undo the service binding.
2650  *
2651  * ibt_bind_service() returns:
2652  *	IBT_SUCCESS		- added a service successfully.
2653  *	IBT_INVALID_PARAM	- invalid input parameter.
2654  *	IBT_CM_FAILURE		- failed to add the service.
2655  *	IBT_CM_SERVICE_EXISTS	- service already exists.
2656  */
2657 ibt_status_t
2658 ibt_bind_service(ibt_srv_hdl_t srv_hdl, ib_gid_t gid, ibt_srv_bind_t *srv_bind,
2659     void *cm_private, ibt_sbind_hdl_t *sb_hdl_p)
2660 {
2661 	ibt_status_t		status;
2662 	ibtl_cm_hca_port_t	port;
2663 	ibcm_svc_bind_t		*sbindp, *sbp;
2664 	ibcm_hca_info_t		*hcap;
2665 	ib_svc_id_t		sid, start_sid, end_sid;
2666 	ibmf_saa_handle_t	saa_handle;
2667 	sa_service_record_t	srv_rec;
2668 	uint16_t		pkey_ix;
2669 
2670 	if (sb_hdl_p != NULL)
2671 		*sb_hdl_p = NULL;	/* return value for error cases */
2672 
2673 	IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: srv_hdl %p, gid (%llX:%llX)",
2674 	    srv_hdl, (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid);
2675 
2676 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sbindp))
2677 
2678 	/* Call ibtl_cm_get_hca_port to get the port number and the HCA GUID. */
2679 	if ((status = ibtl_cm_get_hca_port(gid, 0, &port)) != IBT_SUCCESS) {
2680 		IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2681 		    "ibtl_cm_get_hca_port failed: %d", status);
2682 		return (status);
2683 	}
2684 	IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: Port:%d HCA GUID:%llX",
2685 	    port.hp_port, port.hp_hca_guid);
2686 
2687 	hcap = ibcm_find_hca_entry(port.hp_hca_guid);
2688 	if (hcap == NULL) {
2689 		IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: NO HCA found");
2690 		return (IBT_HCA_BUSY_DETACHING);
2691 	}
2692 	IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: hcap = %p", hcap);
2693 
2694 	if (srv_bind != NULL) {
2695 		saa_handle = ibcm_get_saa_handle(hcap, port.hp_port);
2696 		if (saa_handle == NULL) {
2697 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2698 			    "saa_handle is NULL");
2699 			ibcm_dec_hca_acc_cnt(hcap);
2700 			return (IBT_HCA_PORT_NOT_ACTIVE);
2701 		}
2702 		if (srv_bind->sb_pkey == 0) {
2703 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2704 			    "P_Key must not be 0");
2705 			ibcm_dec_hca_acc_cnt(hcap);
2706 			return (IBT_INVALID_PARAM);
2707 		}
2708 		if (strlen(srv_bind->sb_name) >= IB_SVC_NAME_LEN) {
2709 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2710 			    "Service Name is too long");
2711 			ibcm_dec_hca_acc_cnt(hcap);
2712 			return (IBT_INVALID_PARAM);
2713 		} else
2714 			IBTF_DPRINTF_L3(cmlog, "ibt_bind_service: "
2715 			    "Service Name='%s'", srv_bind->sb_name);
2716 		status = ibt_pkey2index_byguid(port.hp_hca_guid,
2717 		    port.hp_port, srv_bind->sb_pkey, &pkey_ix);
2718 		if (status != IBT_SUCCESS) {
2719 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2720 			    "P_Key 0x%x not found in P_Key_Table",
2721 			    srv_bind->sb_pkey);
2722 			ibcm_dec_hca_acc_cnt(hcap);
2723 			return (status);
2724 		}
2725 	}
2726 
2727 	/* assume success - allocate before locking */
2728 	sbindp = kmem_zalloc(sizeof (*sbindp), KM_SLEEP);
2729 	sbindp->sbind_cm_private = cm_private;
2730 	sbindp->sbind_gid = gid;
2731 	sbindp->sbind_hcaguid = port.hp_hca_guid;
2732 	sbindp->sbind_port = port.hp_port;
2733 
2734 	mutex_enter(&ibcm_svc_info_lock);
2735 
2736 	sbp = srv_hdl->svc_bind_list;
2737 	while (sbp != NULL) {
2738 		if (sbp->sbind_gid.gid_guid == gid.gid_guid &&
2739 		    sbp->sbind_gid.gid_prefix == gid.gid_prefix) {
2740 			if (srv_bind == NULL ||
2741 			    srv_bind->sb_pkey == sbp->sbind_pkey) {
2742 				IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2743 				    "failed: GID %llX:%llX and PKEY %x is "
2744 				    "already bound", gid.gid_guid,
2745 				    gid.gid_prefix, sbp->sbind_pkey);
2746 				mutex_exit(&ibcm_svc_info_lock);
2747 				ibcm_dec_hca_acc_cnt(hcap);
2748 				kmem_free(sbindp, sizeof (*sbindp));
2749 				return (IBT_CM_SERVICE_EXISTS);
2750 			}
2751 		}
2752 		sbp = sbp->sbind_link;
2753 	}
2754 	/* no entry found */
2755 
2756 	sbindp->sbind_link = srv_hdl->svc_bind_list;
2757 	srv_hdl->svc_bind_list = sbindp;
2758 
2759 	mutex_exit(&ibcm_svc_info_lock);
2760 
2761 	if (srv_bind != NULL) {
2762 		bzero(&srv_rec, sizeof (srv_rec));
2763 
2764 		srv_rec.ServiceLease =
2765 		    sbindp->sbind_lease = srv_bind->sb_lease;
2766 		srv_rec.ServiceP_Key =
2767 		    sbindp->sbind_pkey = srv_bind->sb_pkey;
2768 		srv_rec.ServiceKey_hi =
2769 		    sbindp->sbind_key[0] = srv_bind->sb_key[0];
2770 		srv_rec.ServiceKey_lo =
2771 		    sbindp->sbind_key[1] = srv_bind->sb_key[1];
2772 		(void) strcpy(sbindp->sbind_name, srv_bind->sb_name);
2773 		(void) strcpy((char *)srv_rec.ServiceName, srv_bind->sb_name);
2774 		srv_rec.ServiceGID = gid;
2775 
2776 		/*
2777 		 * Find out whether we have any stale Local Service records
2778 		 * matching the current attributes.  If yes, we shall try to
2779 		 * remove them from SA using the current request's ServiceKey.
2780 		 *
2781 		 * We will perform this operation only for Local Services, as
2782 		 * it is handled by SA automatically for WellKnown Services.
2783 		 *
2784 		 * Ofcourse, clients can specify NOT to do this clean-up by
2785 		 * setting IBT_SBIND_NO_CLEANUP flag (srv_bind->sb_flag).
2786 		 */
2787 		if ((srv_hdl->svc_id & IB_SID_AGN_LOCAL) &&
2788 		    (!(srv_bind->sb_flag & IBT_SBIND_NO_CLEANUP))) {
2789 			ibcm_rem_stale_srec(saa_handle, &srv_rec);
2790 		}
2791 
2792 		/* Handle endianess for service data. */
2793 		ibcm_swizzle_from_srv(&srv_bind->sb_data, sbindp->sbind_data);
2794 
2795 		bcopy(sbindp->sbind_data, srv_rec.ServiceData, IB_SVC_DATA_LEN);
2796 
2797 		/* insert srv record into the SA */
2798 		start_sid = srv_hdl->svc_id;
2799 		end_sid = start_sid + srv_hdl->svc_num_sids - 1;
2800 		for (sid = start_sid; sid <= end_sid; sid++) {
2801 
2802 			srv_rec.ServiceID = sid;
2803 
2804 			IBCM_DUMP_SERVICE_REC(&srv_rec);
2805 
2806 			IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: "
2807 			    "ibmf_saa_write_service_record, SvcId = %llX",
2808 			    (longlong_t)sid);
2809 
2810 			status = ibcm_write_service_record(saa_handle, &srv_rec,
2811 			    IBMF_SAA_UPDATE);
2812 			if (status != IBT_SUCCESS) {
2813 				IBTF_DPRINTF_L2(cmlog, "ibt_bind_service:"
2814 				    " ibcm_write_service_record fails %d, "
2815 				    "sid %llX", status, (longlong_t)sid);
2816 
2817 				if (sid != start_sid) {
2818 					/*
2819 					 * Bind failed while bind SID other than
2820 					 * first in the sid_range.  So we need
2821 					 * to unbind those, which are passed.
2822 					 *
2823 					 * Need to increment svc count to
2824 					 * compensate for ibt_unbind_service().
2825 					 */
2826 					ibcm_inc_hca_svc_cnt(hcap);
2827 					ibcm_dec_hca_acc_cnt(hcap);
2828 
2829 					(void) ibt_unbind_service(srv_hdl,
2830 					    sbindp);
2831 				} else {
2832 					ibcm_svc_bind_t		**sbpp;
2833 
2834 					/*
2835 					 * Bind failed for the first SID or the
2836 					 * only SID in question, then no need
2837 					 * to unbind, just free memory and
2838 					 * return error.
2839 					 */
2840 					mutex_enter(&ibcm_svc_info_lock);
2841 
2842 					sbpp = &srv_hdl->svc_bind_list;
2843 					sbp = *sbpp;
2844 					while (sbp != NULL) {
2845 						if (sbp == sbindp) {
2846 							*sbpp = sbp->sbind_link;
2847 							break;
2848 						}
2849 						sbpp = &sbp->sbind_link;
2850 						sbp = *sbpp;
2851 					}
2852 					mutex_exit(&ibcm_svc_info_lock);
2853 					ibcm_dec_hca_acc_cnt(hcap);
2854 
2855 					kmem_free(sbindp, sizeof (*sbindp));
2856 				}
2857 				return (status);
2858 			}
2859 		}
2860 	}
2861 	ibcm_inc_hca_svc_cnt(hcap);
2862 	ibcm_dec_hca_acc_cnt(hcap);
2863 
2864 	/* If this message isn't seen then ibt_bind_service failed */
2865 	IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: DONE (%p, %llX:%llX)",
2866 	    srv_hdl, gid.gid_prefix, gid.gid_guid);
2867 
2868 	if (sb_hdl_p != NULL)
2869 		*sb_hdl_p = sbindp;
2870 
2871 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sbindp))
2872 
2873 	return (IBT_SUCCESS);
2874 }
2875 
2876 ibt_status_t
2877 ibt_unbind_service(ibt_srv_hdl_t srv_hdl, ibt_sbind_hdl_t sbindp)
2878 {
2879 	ib_svc_id_t	sid, end_sid;
2880 	ibt_status_t	rval;
2881 	ibcm_hca_info_t	*hcap;
2882 	ibcm_svc_bind_t	*sbp, **sbpp;
2883 
2884 	IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service(%p, %p)",
2885 	    srv_hdl, sbindp);
2886 
2887 	hcap = ibcm_find_hca_entry(sbindp->sbind_hcaguid);
2888 
2889 	/* If there is a service on hca, respective hcap cannot go away */
2890 	ASSERT(hcap != NULL);
2891 
2892 	mutex_enter(&ibcm_svc_info_lock);
2893 
2894 	sbpp = &srv_hdl->svc_bind_list;
2895 	sbp = *sbpp;
2896 	while (sbp != NULL) {
2897 		if (sbp == sbindp) {
2898 			*sbpp = sbp->sbind_link;
2899 			break;
2900 		}
2901 		sbpp = &sbp->sbind_link;
2902 		sbp = *sbpp;
2903 	}
2904 	sid = srv_hdl->svc_id;
2905 	end_sid = srv_hdl->svc_id + srv_hdl->svc_num_sids - 1;
2906 	if (sbp != NULL)
2907 		while (sbp->sbind_rewrite_state == IBCM_REWRITE_BUSY)
2908 			cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
2909 	mutex_exit(&ibcm_svc_info_lock);
2910 
2911 	if (sbp == NULL) {
2912 		IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2913 		    "service binding not found: srv_hdl %p, srv_bind %p",
2914 		    srv_hdl, sbindp);
2915 		ibcm_dec_hca_acc_cnt(hcap);
2916 		return (IBT_INVALID_PARAM);
2917 	}
2918 
2919 	if (sbindp->sbind_pkey != 0) {	/* Are there service records? */
2920 		ibtl_cm_hca_port_t	port;
2921 		sa_service_record_t	srv_rec;
2922 		ibmf_saa_handle_t	saa_handle;
2923 		ibt_status_t		status;
2924 
2925 		/* get the default SGID of the port */
2926 		if ((status = ibtl_cm_get_hca_port(sbindp->sbind_gid, 0, &port))
2927 		    != IBT_SUCCESS) {
2928 			IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2929 			    "ibtl_cm_get_hca_port failed: %d", status);
2930 			/* we're done, but there may be stale service records */
2931 			goto done;
2932 		}
2933 
2934 		saa_handle = ibcm_get_saa_handle(hcap, port.hp_port);
2935 		if (saa_handle == NULL) {
2936 			IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2937 			    "saa_handle is NULL");
2938 			/* we're done, but there may be stale service records */
2939 			goto done;
2940 		}
2941 
2942 		/* Fill in fields of srv_rec */
2943 		bzero(&srv_rec, sizeof (srv_rec));
2944 
2945 		srv_rec.ServiceP_Key = sbindp->sbind_pkey;
2946 		srv_rec.ServiceKey_hi = sbindp->sbind_key[0];
2947 		srv_rec.ServiceKey_lo = sbindp->sbind_key[1];
2948 		srv_rec.ServiceGID = sbindp->sbind_gid;
2949 		(void) strcpy((char *)srv_rec.ServiceName, sbindp->sbind_name);
2950 
2951 		while (sid <= end_sid) {
2952 
2953 			srv_rec.ServiceID = sid;
2954 			IBCM_DUMP_SERVICE_REC(&srv_rec);
2955 
2956 			rval = ibcm_write_service_record(saa_handle, &srv_rec,
2957 			    IBMF_SAA_DELETE);
2958 
2959 			IBTF_DPRINTF_L4(cmlog, "ibt_unbind_service: "
2960 			    "ibcm_write_service_record rval = %d, SID %llx",
2961 			    rval, sid);
2962 			if (rval != IBT_SUCCESS) {
2963 				/* this is not considered a reason to fail */
2964 				IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2965 				    "ibcm_write_service_record fails %d, "
2966 				    "sid %llx", rval, sid);
2967 			}
2968 			sid++;
2969 		}
2970 	}
2971 done:
2972 	ibcm_dec_hca_svc_cnt(hcap);
2973 	ibcm_dec_hca_acc_cnt(hcap);
2974 	kmem_free(sbindp, sizeof (*sbindp));
2975 
2976 	/* If this message isn't seen then ibt_unbind_service failed */
2977 	IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: done !!");
2978 
2979 	return (IBT_SUCCESS);
2980 }
2981 
2982 /*
2983  * Simply pull off each binding from the list and unbind it.
2984  * If any of the unbind calls fail, we fail.
2985  */
2986 ibt_status_t
2987 ibt_unbind_all_services(ibt_srv_hdl_t srv_hdl)
2988 {
2989 	ibt_status_t	status;
2990 	ibcm_svc_bind_t	*sbp;
2991 
2992 	mutex_enter(&ibcm_svc_info_lock);
2993 	sbp = NULL;
2994 
2995 	/* this compare keeps the loop from being infinite */
2996 	while (sbp != srv_hdl->svc_bind_list) {
2997 		sbp = srv_hdl->svc_bind_list;
2998 		mutex_exit(&ibcm_svc_info_lock);
2999 		status = ibt_unbind_service(srv_hdl, sbp);
3000 		if (status != IBT_SUCCESS)
3001 			return (status);
3002 		mutex_enter(&ibcm_svc_info_lock);
3003 		if (srv_hdl->svc_bind_list == NULL)
3004 			break;
3005 	}
3006 	mutex_exit(&ibcm_svc_info_lock);
3007 	return (IBT_SUCCESS);
3008 }
3009 
3010 /*
3011  * ibt_deregister_service()
3012  *	Deregister a service with the IBCM
3013  *
3014  * INPUTS:
3015  *	ibt_hdl		The IBT client handle returned to the client
3016  *			on an ibt_attach() call.
3017  *
3018  *	srv_hdl		The address of a service identification handle, used
3019  *			to de-register a service.
3020  * RETURN VALUES:
3021  *	IBT_SUCCESS	on success (or respective failure on error)
3022  */
3023 ibt_status_t
3024 ibt_deregister_service(ibt_clnt_hdl_t ibt_hdl, ibt_srv_hdl_t srv_hdl)
3025 {
3026 	ibcm_svc_info_t		*svcp;
3027 	ibcm_svc_lookup_t	svc;
3028 
3029 	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service(%p, %p)",
3030 	    ibt_hdl, srv_hdl);
3031 
3032 	mutex_enter(&ibcm_svc_info_lock);
3033 
3034 	if (srv_hdl->svc_bind_list != NULL) {
3035 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service:"
3036 		    " srv_hdl %p still has bindings", srv_hdl);
3037 		mutex_exit(&ibcm_svc_info_lock);
3038 		return (IBT_CM_SERVICE_BUSY);
3039 	}
3040 	svc.sid = srv_hdl->svc_id;
3041 	svc.num_sids = 1;
3042 	IBTF_DPRINTF_L3(cmlog, "ibt_deregister_service: SID 0x%llX, numsids %d",
3043 	    srv_hdl->svc_id, srv_hdl->svc_num_sids);
3044 
3045 #ifdef __lock_lint
3046 	ibcm_svc_compare(NULL, NULL);
3047 #endif
3048 	svcp = avl_find(&ibcm_svc_avl_tree, &svc, NULL);
3049 	if (svcp != srv_hdl) {
3050 		mutex_exit(&ibcm_svc_info_lock);
3051 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service(): "
3052 		    "srv_hdl %p not found", srv_hdl);
3053 		return (IBT_INVALID_PARAM);
3054 	}
3055 	avl_remove(&ibcm_svc_avl_tree, svcp);
3056 
3057 	/* wait for active REQ/SREQ handling to be done */
3058 	svcp->svc_to_delete = 1;
3059 	while (svcp->svc_ref_cnt != 0)
3060 		cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
3061 
3062 	mutex_exit(&ibcm_svc_info_lock);
3063 
3064 	if ((srv_hdl->svc_id & IB_SID_AGN_MASK) == IB_SID_AGN_LOCAL)
3065 		ibcm_free_local_sids(srv_hdl->svc_id, srv_hdl->svc_num_sids);
3066 
3067 	ibtl_cm_change_service_cnt(ibt_hdl, -srv_hdl->svc_num_sids);
3068 	kmem_free(srv_hdl, sizeof (*srv_hdl));
3069 
3070 	/* If this message isn't seen then ibt_deregister_service failed */
3071 	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service: done !!");
3072 
3073 	return (IBT_SUCCESS);
3074 }
3075 
3076 ibcm_status_t
3077 ibcm_ar_init(void)
3078 {
3079 	ib_svc_id_t	sid = IBCM_DAPL_ATS_SID;
3080 	ibcm_svc_info_t *tmp_svcp;
3081 
3082 	IBTF_DPRINTF_L3(cmlog, "ibcm_ar_init()");
3083 
3084 	/* remove this special SID from the pool of available SIDs */
3085 	if ((tmp_svcp = ibcm_create_svc_entry(sid, 1)) == NULL) {
3086 		IBTF_DPRINTF_L2(cmlog, "ibcm_ar_init: "
3087 		    "DAPL ATS SID 0x%llx already registered", (longlong_t)sid);
3088 		return (IBCM_FAILURE);
3089 	}
3090 	mutex_enter(&ibcm_svc_info_lock);
3091 	ibcm_ar_svcinfop = tmp_svcp;
3092 	ibcm_ar_list = NULL;	/* no address records registered yet */
3093 	mutex_exit(&ibcm_svc_info_lock);
3094 	return (IBCM_SUCCESS);
3095 }
3096 
3097 ibcm_status_t
3098 ibcm_ar_fini(void)
3099 {
3100 	ibcm_ar_t	*ar_list;
3101 	ibcm_svc_info_t	*tmp_svcp;
3102 
3103 	mutex_enter(&ibcm_svc_info_lock);
3104 	ar_list = ibcm_ar_list;
3105 
3106 	if (ar_list == NULL &&
3107 	    avl_numnodes(&ibcm_svc_avl_tree) == 1 &&
3108 	    avl_first(&ibcm_svc_avl_tree) == ibcm_ar_svcinfop) {
3109 		avl_remove(&ibcm_svc_avl_tree, ibcm_ar_svcinfop);
3110 		tmp_svcp = ibcm_ar_svcinfop;
3111 		mutex_exit(&ibcm_svc_info_lock);
3112 		kmem_free(tmp_svcp, sizeof (*ibcm_ar_svcinfop));
3113 		return (IBCM_SUCCESS);
3114 	}
3115 	mutex_exit(&ibcm_svc_info_lock);
3116 	return (IBCM_FAILURE);
3117 }
3118 
3119 
3120 /*
3121  * Return to the caller:
3122  *	IBT_SUCCESS		Found a perfect match.
3123  *				*arpp is set to the record.
3124  *	IBT_INCONSISTENT_AR	Found a record that's inconsistent.
3125  *	IBT_AR_NOT_REGISTERED	Found no record with same GID/pkey and
3126  *				found no record with same data.
3127  */
3128 static ibt_status_t
3129 ibcm_search_ar(ibt_ar_t *arp, ibcm_ar_t **arpp)
3130 {
3131 	ibcm_ar_t	*tmp;
3132 	int		i;
3133 
3134 	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
3135 	tmp = ibcm_ar_list;
3136 	while (tmp != NULL) {
3137 		if (tmp->ar.ar_gid.gid_prefix == arp->ar_gid.gid_prefix &&
3138 		    tmp->ar.ar_gid.gid_guid == arp->ar_gid.gid_guid &&
3139 		    tmp->ar.ar_pkey == arp->ar_pkey) {
3140 			for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++)
3141 				if (tmp->ar.ar_data[i] != arp->ar_data[i])
3142 					return (IBT_INCONSISTENT_AR);
3143 			*arpp = tmp;
3144 			return (IBT_SUCCESS);
3145 		} else {
3146 			/* if all the data bytes match, we have inconsistency */
3147 			for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++)
3148 				if (tmp->ar.ar_data[i] != arp->ar_data[i])
3149 					break;
3150 			if (i == IBCM_DAPL_ATS_NBYTES)
3151 				return (IBT_INCONSISTENT_AR);
3152 			/* try next address record */
3153 		}
3154 		tmp = tmp->ar_link;
3155 	}
3156 	return (IBT_AR_NOT_REGISTERED);
3157 }
3158 
3159 ibt_status_t
3160 ibt_register_ar(ibt_clnt_hdl_t ibt_hdl, ibt_ar_t *arp)
3161 {
3162 	ibcm_ar_t		*found;
3163 	ibcm_ar_t		*tmp;
3164 	ibt_status_t		status;
3165 	ibt_status_t		s1, s2;
3166 	char			*s;
3167 	ibcm_ar_ref_t		*hdlp;
3168 	ibcm_ar_t		*new;
3169 	ibcm_ar_t		**linkp;
3170 	ibtl_cm_hca_port_t	cm_port;
3171 	uint16_t		pkey_ix;
3172 	ibcm_hca_info_t		*hcap;
3173 	ibmf_saa_handle_t	saa_handle;
3174 	sa_service_record_t	*srv_recp;
3175 	uint64_t		gid_ored;
3176 
3177 	IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: PKey 0x%X GID %llX:%llX",
3178 	    arp->ar_pkey, (longlong_t)arp->ar_gid.gid_prefix,
3179 	    (longlong_t)arp->ar_gid.gid_guid);
3180 
3181 	/*
3182 	 * If P_Key is 0, but GID is not, this query is invalid.
3183 	 * If GID is 0, but P_Key is not, this query is invalid.
3184 	 */
3185 	gid_ored = arp->ar_gid.gid_guid | arp->ar_gid.gid_prefix;
3186 	if ((arp->ar_pkey == 0 && gid_ored != 0ULL) ||
3187 	    (arp->ar_pkey != 0 && gid_ored == 0ULL)) {
3188 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
3189 		    "GID/P_Key is not valid");
3190 		return (IBT_INVALID_PARAM);
3191 	}
3192 
3193 	/* assume success, so these might be needed */
3194 	hdlp = kmem_alloc(sizeof (*hdlp), KM_SLEEP);
3195 	new = kmem_zalloc(sizeof (*new), KM_SLEEP);
3196 
3197 	mutex_enter(&ibcm_svc_info_lock);
3198 	/* search for existing GID/pkey (there can be at most 1) */
3199 	status = ibcm_search_ar(arp, &found);
3200 	if (status == IBT_INCONSISTENT_AR) {
3201 		mutex_exit(&ibcm_svc_info_lock);
3202 		kmem_free(new, sizeof (*new));
3203 		kmem_free(hdlp, sizeof (*hdlp));
3204 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
3205 		    "address record is inconsistent with a known one");
3206 		return (IBT_INCONSISTENT_AR);
3207 	} else if (status == IBT_SUCCESS) {
3208 		if (found->ar_flags == IBCM_AR_INITING) {
3209 			found->ar_waiters++;
3210 			cv_wait(&found->ar_cv, &ibcm_svc_info_lock);
3211 			found->ar_waiters--;
3212 		}
3213 		if (found->ar_flags == IBCM_AR_FAILED) {
3214 			if (found->ar_waiters == 0) {
3215 				cv_destroy(&found->ar_cv);
3216 				kmem_free(found, sizeof (*found));
3217 			}
3218 			mutex_exit(&ibcm_svc_info_lock);
3219 			kmem_free(new, sizeof (*new));
3220 			kmem_free(hdlp, sizeof (*hdlp));
3221 			return (ibt_get_module_failure(IBT_FAILURE_IBCM, 0));
3222 		}
3223 		hdlp->ar_ibt_hdl = ibt_hdl;
3224 		hdlp->ar_ref_link = found->ar_ibt_hdl_list;
3225 		found->ar_ibt_hdl_list = hdlp;
3226 		mutex_exit(&ibcm_svc_info_lock);
3227 		kmem_free(new, sizeof (*new));
3228 		ibtl_cm_change_service_cnt(ibt_hdl, 1);
3229 		return (IBT_SUCCESS);
3230 	} else {
3231 		ASSERT(status == IBT_AR_NOT_REGISTERED);
3232 	}
3233 	hdlp->ar_ref_link = NULL;
3234 	hdlp->ar_ibt_hdl = ibt_hdl;
3235 	new->ar_ibt_hdl_list = hdlp;
3236 	new->ar = *arp;
3237 	new->ar_flags = IBCM_AR_INITING;
3238 	new->ar_waiters = 0;
3239 	cv_init(&new->ar_cv, NULL, CV_DEFAULT, NULL);
3240 	new->ar_link = ibcm_ar_list;
3241 	ibcm_ar_list = new;
3242 
3243 	/* verify GID/pkey is valid for a local port, etc. */
3244 	hcap = NULL;
3245 	if ((s1 = ibtl_cm_get_hca_port(arp->ar_gid, 0, &cm_port))
3246 	    != IBT_SUCCESS ||
3247 	    (s2 = ibt_pkey2index_byguid(cm_port.hp_hca_guid, cm_port.hp_port,
3248 	    arp->ar_pkey, &pkey_ix)) != IBT_SUCCESS ||
3249 	    (hcap = ibcm_find_hca_entry(cm_port.hp_hca_guid)) == NULL) {
3250 		cv_destroy(&new->ar_cv);
3251 		ibcm_ar_list = new->ar_link;
3252 		mutex_exit(&ibcm_svc_info_lock);
3253 		kmem_free(new, sizeof (*new));
3254 		kmem_free(hdlp, sizeof (*hdlp));
3255 		status = IBT_INVALID_PARAM;
3256 		if (s1 == IBT_HCA_PORT_NOT_ACTIVE) {
3257 			s = "PORT DOWN";
3258 			status = IBT_HCA_PORT_NOT_ACTIVE;
3259 		} else if (s1 != IBT_SUCCESS)
3260 			s = "GID not found";
3261 		else if (s2 != IBT_SUCCESS)
3262 			s = "PKEY not found";
3263 		else
3264 			s = "CM could not find its HCA entry";
3265 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: %s, status = %d",
3266 		    s, status);
3267 		return (status);
3268 	}
3269 	mutex_exit(&ibcm_svc_info_lock);
3270 	saa_handle = ibcm_get_saa_handle(hcap, cm_port.hp_port);
3271 
3272 	/* create service record */
3273 	srv_recp = kmem_zalloc(sizeof (*srv_recp), KM_SLEEP);
3274 	srv_recp->ServiceLease = 0xFFFFFFFF;	/* infinite */
3275 	srv_recp->ServiceP_Key = arp->ar_pkey;
3276 	srv_recp->ServiceKey_hi = 0xDA410000ULL;	/* DAPL */
3277 	srv_recp->ServiceKey_lo = 0xA7500000ULL;	/* ATS */
3278 	(void) strcpy((char *)srv_recp->ServiceName, IBCM_DAPL_ATS_NAME);
3279 	srv_recp->ServiceGID = arp->ar_gid;
3280 	bcopy(arp->ar_data, srv_recp->ServiceData, IBCM_DAPL_ATS_NBYTES);
3281 	srv_recp->ServiceID = IBCM_DAPL_ATS_SID;
3282 
3283 	/* insert service record into the SA */
3284 
3285 	IBCM_DUMP_SERVICE_REC(srv_recp);
3286 
3287 	if (saa_handle != NULL)
3288 		status = ibcm_write_service_record(saa_handle, srv_recp,
3289 		    IBMF_SAA_UPDATE);
3290 	else
3291 		status = IBT_HCA_PORT_NOT_ACTIVE;
3292 
3293 	if (status != IBT_SUCCESS) {
3294 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: sa access fails %d, "
3295 		    "sid %llX", status, (longlong_t)srv_recp->ServiceID);
3296 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: FAILED for gid "
3297 		    "%llX:%llX pkey 0x%X", (longlong_t)arp->ar_gid.gid_prefix,
3298 		    (longlong_t)arp->ar_gid.gid_guid, arp->ar_pkey);
3299 
3300 		kmem_free(srv_recp, sizeof (*srv_recp));
3301 		kmem_free(hdlp, sizeof (*hdlp));
3302 
3303 		mutex_enter(&ibcm_svc_info_lock);
3304 		linkp = &ibcm_ar_list;
3305 		tmp = *linkp;
3306 		while (tmp != NULL) {
3307 			if (tmp == new) {
3308 				*linkp = new->ar_link;
3309 				break;
3310 			}
3311 			linkp = &tmp->ar_link;
3312 			tmp = *linkp;
3313 		}
3314 		if (new->ar_waiters > 0) {
3315 			new->ar_flags = IBCM_AR_FAILED;
3316 			cv_broadcast(&new->ar_cv);
3317 			mutex_exit(&ibcm_svc_info_lock);
3318 		} else {
3319 			cv_destroy(&new->ar_cv);
3320 			mutex_exit(&ibcm_svc_info_lock);
3321 			kmem_free(new, sizeof (*new));
3322 		}
3323 		ibcm_dec_hca_acc_cnt(hcap);
3324 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
3325 		    "IBMF_SAA failed to write address record");
3326 	} else {					/* SUCCESS */
3327 		uint8_t		*b;
3328 
3329 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: SUCCESS for gid "
3330 		    "%llx:%llx pkey %x", (longlong_t)arp->ar_gid.gid_prefix,
3331 		    (longlong_t)arp->ar_gid.gid_guid, arp->ar_pkey);
3332 		b = arp->ar_data;
3333 
3334 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar:"
3335 		    " data %d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
3336 		    b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9],
3337 		    b[10], b[11], b[12], b[13], b[14], b[15]);
3338 		mutex_enter(&ibcm_svc_info_lock);
3339 		new->ar_srv_recp = srv_recp;
3340 		new->ar_saa_handle = saa_handle;
3341 		new->ar_port = cm_port.hp_port;
3342 		new->ar_hcap = hcap;
3343 		new->ar_flags = IBCM_AR_SUCCESS;
3344 		if (new->ar_waiters > 0)
3345 			cv_broadcast(&new->ar_cv);
3346 		mutex_exit(&ibcm_svc_info_lock);
3347 		ibtl_cm_change_service_cnt(ibt_hdl, 1);
3348 		/* do not call ibcm_dec_hca_acc_cnt(hcap) until deregister */
3349 	}
3350 	return (status);
3351 }
3352 
3353 ibt_status_t
3354 ibt_deregister_ar(ibt_clnt_hdl_t ibt_hdl, ibt_ar_t *arp)
3355 {
3356 	ibcm_ar_t		*found;
3357 	ibcm_ar_t		*tmp;
3358 	ibcm_ar_t		**linkp;
3359 	ibcm_ar_ref_t		*hdlp;
3360 	ibcm_ar_ref_t		**hdlpp;
3361 	ibt_status_t		status;
3362 	ibmf_saa_handle_t	saa_handle;
3363 	sa_service_record_t	*srv_recp;
3364 	uint64_t		gid_ored;
3365 
3366 	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: pkey %x", arp->ar_pkey);
3367 	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: gid %llx:%llx",
3368 	    (longlong_t)arp->ar_gid.gid_prefix,
3369 	    (longlong_t)arp->ar_gid.gid_guid);
3370 
3371 	/*
3372 	 * If P_Key is 0, but GID is not, this query is invalid.
3373 	 * If GID is 0, but P_Key is not, this query is invalid.
3374 	 */
3375 	gid_ored = arp->ar_gid.gid_guid | arp->ar_gid.gid_prefix;
3376 	if ((arp->ar_pkey == 0 && gid_ored != 0ULL) ||
3377 	    (arp->ar_pkey != 0 && gid_ored == 0ULL)) {
3378 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3379 		    "GID/P_Key is not valid");
3380 		return (IBT_INVALID_PARAM);
3381 	}
3382 
3383 	mutex_enter(&ibcm_svc_info_lock);
3384 	/* search for existing GID/pkey (there can be at most 1) */
3385 	status = ibcm_search_ar(arp, &found);
3386 	if (status == IBT_INCONSISTENT_AR || status == IBT_AR_NOT_REGISTERED) {
3387 		mutex_exit(&ibcm_svc_info_lock);
3388 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3389 		    "address record not found");
3390 		return (IBT_AR_NOT_REGISTERED);
3391 	}
3392 	ASSERT(status == IBT_SUCCESS);
3393 
3394 	hdlpp = &found->ar_ibt_hdl_list;
3395 	hdlp = *hdlpp;
3396 	while (hdlp != NULL) {
3397 		if (hdlp->ar_ibt_hdl == ibt_hdl)
3398 			break;
3399 		hdlpp = &hdlp->ar_ref_link;
3400 		hdlp = *hdlpp;
3401 	}
3402 	if (hdlp == NULL) {	/* could not find ibt_hdl on list */
3403 		mutex_exit(&ibcm_svc_info_lock);
3404 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3405 		    "address record found, but not for this client");
3406 		return (IBT_AR_NOT_REGISTERED);
3407 	}
3408 	*hdlpp = hdlp->ar_ref_link;	/* remove ref for this client */
3409 	if (found->ar_ibt_hdl_list == NULL && found->ar_waiters == 0) {
3410 		/* last entry was removed */
3411 		found->ar_flags = IBCM_AR_INITING; /* hold off register_ar */
3412 		saa_handle = found->ar_saa_handle;
3413 		srv_recp = found->ar_srv_recp;
3414 
3415 		/* wait if this service record is being rewritten */
3416 		while (found->ar_rewrite_state == IBCM_REWRITE_BUSY)
3417 			cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
3418 		mutex_exit(&ibcm_svc_info_lock);
3419 
3420 		/* remove service record */
3421 		status = ibcm_write_service_record(saa_handle, srv_recp,
3422 		    IBMF_SAA_DELETE);
3423 		if (status != IBT_SUCCESS)
3424 			IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3425 			    "IBMF_SAA failed to delete address record");
3426 		mutex_enter(&ibcm_svc_info_lock);
3427 		if (found->ar_waiters == 0) {	/* still no waiters */
3428 			linkp = &ibcm_ar_list;
3429 			tmp = *linkp;
3430 			while (tmp != found) {
3431 				linkp = &tmp->ar_link;
3432 				tmp = *linkp;
3433 			}
3434 			*linkp = tmp->ar_link;
3435 			ibcm_dec_hca_acc_cnt(found->ar_hcap);
3436 			kmem_free(srv_recp, sizeof (*srv_recp));
3437 			cv_destroy(&found->ar_cv);
3438 			kmem_free(found, sizeof (*found));
3439 		} else {
3440 			/* add service record back in for the waiters */
3441 			mutex_exit(&ibcm_svc_info_lock);
3442 			status = ibcm_write_service_record(saa_handle, srv_recp,
3443 			    IBMF_SAA_UPDATE);
3444 			mutex_enter(&ibcm_svc_info_lock);
3445 			if (status == IBT_SUCCESS)
3446 				found->ar_flags = IBCM_AR_SUCCESS;
3447 			else {
3448 				found->ar_flags = IBCM_AR_FAILED;
3449 				IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3450 				    "IBMF_SAA failed to write address record");
3451 			}
3452 			cv_broadcast(&found->ar_cv);
3453 		}
3454 	}
3455 	mutex_exit(&ibcm_svc_info_lock);
3456 	kmem_free(hdlp, sizeof (*hdlp));
3457 	ibtl_cm_change_service_cnt(ibt_hdl, -1);
3458 	return (status);
3459 }
3460 
3461 ibt_status_t
3462 ibt_query_ar(ib_gid_t *sgid, ibt_ar_t *queryp, ibt_ar_t *resultp)
3463 {
3464 	sa_service_record_t	svcrec_req;
3465 	sa_service_record_t	*svcrec_resp;
3466 	void			*results_p;
3467 	uint64_t		component_mask = 0;
3468 	uint64_t		gid_ored;
3469 	size_t			length;
3470 	int			num_rec;
3471 	int			i;
3472 	ibmf_saa_access_args_t	access_args;
3473 	ibt_status_t		retval;
3474 	ibtl_cm_hca_port_t	cm_port;
3475 	ibcm_hca_info_t		*hcap;
3476 	ibmf_saa_handle_t	saa_handle;
3477 
3478 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar(%p, %p)", queryp, resultp);
3479 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: sgid %llx:%llx",
3480 	    (longlong_t)sgid->gid_prefix, (longlong_t)sgid->gid_guid);
3481 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: query_pkey %x", queryp->ar_pkey);
3482 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: query_gid %llx:%llx",
3483 	    (longlong_t)queryp->ar_gid.gid_prefix,
3484 	    (longlong_t)queryp->ar_gid.gid_guid);
3485 
3486 	/*
3487 	 * If P_Key is 0, but GID is not, this query is invalid.
3488 	 * If GID is 0, but P_Key is not, this query is invalid.
3489 	 */
3490 	gid_ored = queryp->ar_gid.gid_guid | queryp->ar_gid.gid_prefix;
3491 	if ((queryp->ar_pkey == 0 && gid_ored != 0ULL) ||
3492 	    (queryp->ar_pkey != 0 && gid_ored == 0ULL)) {
3493 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: GID/P_Key is not valid");
3494 		return (IBT_INVALID_PARAM);
3495 	}
3496 
3497 	hcap = NULL;
3498 	if (ibtl_cm_get_hca_port(*sgid, 0, &cm_port) != IBT_SUCCESS ||
3499 	    (hcap = ibcm_find_hca_entry(cm_port.hp_hca_guid)) == NULL ||
3500 	    (saa_handle = ibcm_get_saa_handle(hcap, cm_port.hp_port)) == NULL) {
3501 		if (hcap != NULL)
3502 			ibcm_dec_hca_acc_cnt(hcap);
3503 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: sgid is not valid");
3504 		return (IBT_INVALID_PARAM);
3505 	}
3506 
3507 	bzero(&svcrec_req, sizeof (svcrec_req));
3508 
3509 	/* Is GID/P_Key Specified. */
3510 	if (queryp->ar_pkey != 0) {	/* GID is non-zero from check above */
3511 		svcrec_req.ServiceP_Key = queryp->ar_pkey;
3512 		component_mask |= SA_SR_COMPMASK_PKEY;
3513 		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: P_Key %X",
3514 		    queryp->ar_pkey);
3515 		svcrec_req.ServiceGID = queryp->ar_gid;
3516 		component_mask |= SA_SR_COMPMASK_GID;
3517 		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: GID %llX:%llX",
3518 		    (longlong_t)queryp->ar_gid.gid_prefix,
3519 		    (longlong_t)queryp->ar_gid.gid_guid);
3520 	}
3521 
3522 	/* Is ServiceData Specified. */
3523 	for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++) {
3524 		if (queryp->ar_data[i] != 0) {
3525 			bcopy(queryp->ar_data, svcrec_req.ServiceData,
3526 			    IBCM_DAPL_ATS_NBYTES);
3527 			component_mask |= 0xFFFF << 7;	/* all 16 Data8 */
3528 							/* components */
3529 			break;
3530 		}
3531 	}
3532 
3533 	/* Service Name */
3534 	(void) strcpy((char *)svcrec_req.ServiceName, IBCM_DAPL_ATS_NAME);
3535 	component_mask |= SA_SR_COMPMASK_NAME;
3536 
3537 	svcrec_req.ServiceID = IBCM_DAPL_ATS_SID;
3538 	component_mask |= SA_SR_COMPMASK_ID;
3539 
3540 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: "
3541 	    "Perform SA Access: Mask: 0x%X", component_mask);
3542 
3543 	/*
3544 	 * Call in SA Access retrieve routine to get Service Records.
3545 	 *
3546 	 * SA Access framework allocated memory for the "results_p".
3547 	 * Make sure to deallocate once we are done with the results_p.
3548 	 * The size of the buffer allocated will be as returned in
3549 	 * "length" field.
3550 	 */
3551 	access_args.sq_attr_id = SA_SERVICERECORD_ATTRID;
3552 	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
3553 	access_args.sq_component_mask = component_mask;
3554 	access_args.sq_template = &svcrec_req;
3555 	access_args.sq_template_length = sizeof (sa_service_record_t);
3556 	access_args.sq_callback = NULL;
3557 	access_args.sq_callback_arg = NULL;
3558 
3559 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
3560 	    &results_p);
3561 
3562 	ibcm_dec_hca_acc_cnt(hcap);
3563 	if (retval != IBT_SUCCESS) {
3564 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: SA Access Failed");
3565 		return (retval);
3566 	}
3567 
3568 	num_rec = length / sizeof (sa_service_record_t);
3569 
3570 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: "
3571 	    "Found %d Service Records.", num_rec);
3572 
3573 	/* Validate the returned number of records. */
3574 	if ((results_p != NULL) && (num_rec > 0)) {
3575 		uint8_t		*b;
3576 
3577 		/* Just return info from the first service record. */
3578 		svcrec_resp = (sa_service_record_t *)results_p;
3579 
3580 		/* The Service GID and Service ID */
3581 		resultp->ar_gid = svcrec_resp->ServiceGID;
3582 		resultp->ar_pkey = svcrec_resp->ServiceP_Key;
3583 		bcopy(svcrec_resp->ServiceData,
3584 		    resultp->ar_data, IBCM_DAPL_ATS_NBYTES);
3585 
3586 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: "
3587 		    "Found: pkey %x dgid %llX:%llX", resultp->ar_pkey,
3588 		    (longlong_t)resultp->ar_gid.gid_prefix,
3589 		    (longlong_t)resultp->ar_gid.gid_guid);
3590 		b = resultp->ar_data;
3591 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar:"
3592 		    " data %d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
3593 		    b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9],
3594 		    b[10], b[11], b[12], b[13], b[14], b[15]);
3595 
3596 		/* Deallocate the memory for results_p. */
3597 		kmem_free(results_p, length);
3598 		if (num_rec > 1)
3599 			retval = IBT_MULTIPLE_AR;
3600 		else
3601 			retval = IBT_SUCCESS;
3602 	} else {
3603 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: "
3604 		    "ibmf_sa_access found 0 matching records");
3605 		retval = IBT_AR_NOT_REGISTERED;
3606 	}
3607 	return (retval);
3608 }
3609 
3610 /* mark all ATS service records associated with the port */
3611 static void
3612 ibcm_mark_ar(ib_guid_t hca_guid, uint8_t port)
3613 {
3614 	ibcm_ar_t	*tmp;
3615 
3616 	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
3617 	for (tmp = ibcm_ar_list; tmp != NULL; tmp = tmp->ar_link) {
3618 		if (tmp->ar_hcap->hca_guid == hca_guid &&
3619 		    tmp->ar_port == port) {
3620 			/* even if it's busy, we mark it for rewrite */
3621 			tmp->ar_rewrite_state = IBCM_REWRITE_NEEDED;
3622 		}
3623 	}
3624 }
3625 
3626 /* rewrite all ATS service records */
3627 static int
3628 ibcm_rewrite_ar(void)
3629 {
3630 	ibcm_ar_t		*tmp;
3631 	ibmf_saa_handle_t	saa_handle;
3632 	sa_service_record_t	*srv_recp;
3633 	ibt_status_t		rval;
3634 	int			did_something = 0;
3635 
3636 	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
3637 check_for_work:
3638 	for (tmp = ibcm_ar_list; tmp != NULL; tmp = tmp->ar_link) {
3639 		if (tmp->ar_rewrite_state == IBCM_REWRITE_NEEDED) {
3640 			tmp->ar_rewrite_state = IBCM_REWRITE_BUSY;
3641 			saa_handle = tmp->ar_saa_handle;
3642 			srv_recp = tmp->ar_srv_recp;
3643 			mutex_exit(&ibcm_svc_info_lock);
3644 			IBTF_DPRINTF_L3(cmlog, "ibcm_rewrite_ar: "
3645 			    "rewriting ar @ %p", tmp);
3646 			did_something = 1;
3647 			rval = ibcm_write_service_record(saa_handle, srv_recp,
3648 			    IBMF_SAA_UPDATE);
3649 			if (rval != IBT_SUCCESS)
3650 				IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_ar: "
3651 				    "ibcm_write_service_record failed: "
3652 				    "status = %d", rval);
3653 			mutex_enter(&ibcm_svc_info_lock);
3654 			/* if it got marked again, then we want to rewrite */
3655 			if (tmp->ar_rewrite_state == IBCM_REWRITE_BUSY)
3656 				tmp->ar_rewrite_state = IBCM_REWRITE_IDLE;
3657 			/* in case there was a waiter... */
3658 			cv_broadcast(&ibcm_svc_info_cv);
3659 			goto check_for_work;
3660 		}
3661 	}
3662 	return (did_something);
3663 }
3664 
3665 static void
3666 ibcm_rewrite_svc_record(ibcm_svc_info_t *srv_hdl, ibcm_svc_bind_t *sbindp)
3667 {
3668 	ibcm_hca_info_t		*hcap;
3669 	ib_svc_id_t		sid, start_sid, end_sid;
3670 	ibmf_saa_handle_t	saa_handle;
3671 	sa_service_record_t	srv_rec;
3672 	ibt_status_t		rval;
3673 
3674 	hcap = ibcm_find_hca_entry(sbindp->sbind_hcaguid);
3675 	if (hcap == NULL) {
3676 		IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record: "
3677 		    "NO HCA found for HCA GUID %llX", sbindp->sbind_hcaguid);
3678 		return;
3679 	}
3680 
3681 	saa_handle = ibcm_get_saa_handle(hcap, sbindp->sbind_port);
3682 	if (saa_handle == NULL) {
3683 		IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record: "
3684 		    "saa_handle is NULL");
3685 		ibcm_dec_hca_acc_cnt(hcap);
3686 		return;
3687 	}
3688 
3689 	IBTF_DPRINTF_L3(cmlog, "ibcm_rewrite_svc_record: "
3690 	    "rewriting svc '%s', port_guid = %llX", sbindp->sbind_name,
3691 	    sbindp->sbind_gid.gid_guid);
3692 
3693 	bzero(&srv_rec, sizeof (srv_rec));
3694 
3695 	srv_rec.ServiceLease = sbindp->sbind_lease;
3696 	srv_rec.ServiceP_Key = sbindp->sbind_pkey;
3697 	srv_rec.ServiceKey_hi = sbindp->sbind_key[0];
3698 	srv_rec.ServiceKey_lo = sbindp->sbind_key[1];
3699 	(void) strcpy((char *)srv_rec.ServiceName, sbindp->sbind_name);
3700 	srv_rec.ServiceGID = sbindp->sbind_gid;
3701 
3702 	bcopy(sbindp->sbind_data, srv_rec.ServiceData, IB_SVC_DATA_LEN);
3703 
3704 	/* insert srv record into the SA */
3705 	start_sid = srv_hdl->svc_id;
3706 	end_sid = start_sid + srv_hdl->svc_num_sids - 1;
3707 	for (sid = start_sid; sid <= end_sid; sid++) {
3708 		srv_rec.ServiceID = sid;
3709 
3710 		rval = ibcm_write_service_record(saa_handle, &srv_rec,
3711 		    IBMF_SAA_UPDATE);
3712 
3713 		IBTF_DPRINTF_L4(cmlog, "ibcm_rewrite_svc_record: "
3714 		    "ibcm_write_service_record, SvcId = %llX, "
3715 		    "rval = %d", (longlong_t)sid, rval);
3716 		if (rval != IBT_SUCCESS) {
3717 			IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record:"
3718 			    " ibcm_write_service_record fails %d sid %llX",
3719 			    rval, (longlong_t)sid);
3720 		}
3721 	}
3722 	ibcm_dec_hca_acc_cnt(hcap);
3723 }
3724 
3725 /*
3726  * Task to mark all service records as needing to be rewritten to the SM/SA.
3727  * This task does not return until all of them have been rewritten.
3728  */
3729 void
3730 ibcm_service_record_rewrite_task(void *arg)
3731 {
3732 	ibcm_port_up_t	*pup = (ibcm_port_up_t *)arg;
3733 	ib_guid_t	hca_guid = pup->pup_hca_guid;
3734 	uint8_t		port = pup->pup_port;
3735 	ibcm_svc_info_t	*svcp;
3736 	ibcm_svc_bind_t	*sbp;
3737 	avl_tree_t	*avl_tree = &ibcm_svc_avl_tree;
3738 	static int	task_is_running = 0;
3739 
3740 	IBTF_DPRINTF_L2(cmlog, "ibcm_service_record_rewrite_task STARTED "
3741 	    "for hca_guid %llX, port %d", hca_guid, port);
3742 
3743 	mutex_enter(&ibcm_svc_info_lock);
3744 	ibcm_mark_ar(hca_guid, port);
3745 	for (svcp = avl_first(avl_tree); svcp != NULL;
3746 	    svcp = avl_walk(avl_tree, svcp, AVL_AFTER)) {
3747 		sbp = svcp->svc_bind_list;
3748 		while (sbp != NULL) {
3749 			if (sbp->sbind_pkey != 0 &&
3750 			    sbp->sbind_port == port &&
3751 			    sbp->sbind_hcaguid == hca_guid) {
3752 				/* even if it's busy, we mark it for rewrite */
3753 				sbp->sbind_rewrite_state = IBCM_REWRITE_NEEDED;
3754 			}
3755 			sbp = sbp->sbind_link;
3756 		}
3757 	}
3758 	if (task_is_running) {
3759 		/* let the other task thread finish the work */
3760 		mutex_exit(&ibcm_svc_info_lock);
3761 		return;
3762 	}
3763 	task_is_running = 1;
3764 
3765 	(void) ibcm_rewrite_ar();
3766 
3767 check_for_work:
3768 	for (svcp = avl_first(avl_tree); svcp != NULL;
3769 	    svcp = avl_walk(avl_tree, svcp, AVL_AFTER)) {
3770 		sbp = svcp->svc_bind_list;
3771 		while (sbp != NULL) {
3772 			if (sbp->sbind_rewrite_state == IBCM_REWRITE_NEEDED) {
3773 				sbp->sbind_rewrite_state = IBCM_REWRITE_BUSY;
3774 				mutex_exit(&ibcm_svc_info_lock);
3775 				ibcm_rewrite_svc_record(svcp, sbp);
3776 				mutex_enter(&ibcm_svc_info_lock);
3777 				/* if it got marked again, we want to rewrite */
3778 				if (sbp->sbind_rewrite_state ==
3779 				    IBCM_REWRITE_BUSY)
3780 					sbp->sbind_rewrite_state =
3781 					    IBCM_REWRITE_IDLE;
3782 				/* in case there was a waiter... */
3783 				cv_broadcast(&ibcm_svc_info_cv);
3784 				goto check_for_work;
3785 			}
3786 			sbp = sbp->sbind_link;
3787 		}
3788 	}
3789 	/*
3790 	 * If there were no service records to write, and we failed to
3791 	 * have to rewrite any more ATS service records, then we're done.
3792 	 */
3793 	if (ibcm_rewrite_ar() != 0)
3794 		goto check_for_work;
3795 	task_is_running = 0;
3796 	mutex_exit(&ibcm_svc_info_lock);
3797 
3798 	IBTF_DPRINTF_L2(cmlog, "ibcm_service_record_rewrite_task DONE");
3799 	kmem_free(pup, sizeof (ibcm_port_up_t));
3800 }
3801 
3802 
3803 /*
3804  * Function:
3805  * 	ibt_cm_proceed
3806  *
3807  * Verifies the arguments and dispatches the cm state machine processing
3808  * via taskq
3809  */
3810 
3811 ibt_status_t
3812 ibt_cm_proceed(ibt_cm_event_type_t event, void *session_id,
3813     ibt_cm_status_t status, ibt_cm_proceed_reply_t *cm_event_data,
3814     void *priv_data, ibt_priv_data_len_t priv_data_len)
3815 {
3816 	ibcm_state_data_t *statep = (ibcm_state_data_t *)session_id;
3817 	ibcm_proceed_targs_t	*proceed_targs;
3818 	ibcm_proceed_error_t	proceed_error;
3819 
3820 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_proceed chan 0x%p event %x status %x "
3821 	    "session_id %p", statep->channel, event, status, session_id);
3822 
3823 	IBTF_DPRINTF_L5(cmlog, "ibt_cm_proceed chan 0x%p cm_event_data %p, "
3824 	    "priv_data %p priv_data_len %x", statep->channel, cm_event_data,
3825 	    priv_data, priv_data_len);
3826 
3827 	/* validate session_id and status */
3828 	if ((statep == NULL) || (status == IBT_CM_DEFER)) {
3829 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : Invalid Args");
3830 		return (IBT_INVALID_PARAM);
3831 	}
3832 
3833 	/* If priv data len specified, then priv_data cannot be NULL */
3834 	if ((priv_data_len > 0) && (priv_data == NULL))
3835 		return (IBT_INVALID_PARAM);
3836 
3837 	proceed_error = IBCM_PROCEED_INVALID_NONE;
3838 
3839 	mutex_enter(&statep->state_mutex);
3840 	if (event == IBT_CM_EVENT_REQ_RCV) {
3841 
3842 		if ((statep->state != IBCM_STATE_REQ_RCVD) &&
3843 		    (statep->state != IBCM_STATE_MRA_SENT))
3844 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3845 		else if (priv_data_len > IBT_REP_PRIV_DATA_SZ)
3846 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3847 
3848 	} else if (event == IBT_CM_EVENT_REP_RCV) {
3849 		if ((statep->state != IBCM_STATE_REP_RCVD) &&
3850 		    (statep->state != IBCM_STATE_MRA_REP_SENT))
3851 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3852 		else if (priv_data_len > IBT_RTU_PRIV_DATA_SZ)
3853 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3854 	} else if (event == IBT_CM_EVENT_LAP_RCV) {
3855 		if ((statep->ap_state != IBCM_AP_STATE_LAP_RCVD) &&
3856 		    (statep->ap_state != IBCM_AP_STATE_MRA_LAP_SENT))
3857 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3858 		else if (priv_data_len > IBT_APR_PRIV_DATA_SZ)
3859 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3860 	} else if (event == IBT_CM_EVENT_CONN_CLOSED) {
3861 		if (statep->state != IBCM_STATE_DREQ_RCVD)
3862 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3863 		else if (priv_data_len > IBT_DREP_PRIV_DATA_SZ)
3864 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3865 	} else {
3866 			proceed_error = IBCM_PROCEED_INVALID_EVENT;
3867 	}
3868 
3869 	/* if there is an error, print an error message and return */
3870 	if (proceed_error != IBCM_PROCEED_INVALID_NONE) {
3871 		mutex_exit(&statep->state_mutex);
3872 		if (proceed_error == IBCM_PROCEED_INVALID_EVENT_STATE) {
3873 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3874 			    "Invalid Event/State combination specified",
3875 			    statep->channel);
3876 			return (IBT_INVALID_PARAM);
3877 		} else if (proceed_error == IBCM_PROCEED_INVALID_PRIV_SZ) {
3878 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3879 			    "Invalid Event/priv len combination specified",
3880 			    statep->channel);
3881 			return (IBT_INVALID_PARAM);
3882 		} else if (proceed_error == IBCM_PROCEED_INVALID_EVENT) {
3883 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3884 			    "Invalid Event specified", statep->channel);
3885 			return (IBT_INVALID_PARAM);
3886 		} else {
3887 			ASSERT(proceed_error == IBCM_PROCEED_INVALID_LAP);
3888 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3889 			    "IBT_CM_EVENT_LAP_RCV not supported",
3890 			    statep->channel);
3891 			/* UNTIL HCA DRIVER ENABLES AP SUPPORT, FAIL THE CALL */
3892 			return (IBT_APM_NOT_SUPPORTED);
3893 		}
3894 	}
3895 
3896 
3897 	/* wait until client's CM handler returns DEFER status back to CM */
3898 
3899 	while (statep->clnt_proceed == IBCM_BLOCK) {
3900 		IBTF_DPRINTF_L5(cmlog, "ibt_cm_proceed : chan 0x%p blocked for "
3901 		    "return of client's cm handler", statep->channel);
3902 		cv_wait(&statep->block_client_cv, &statep->state_mutex);
3903 	}
3904 
3905 	if (statep->clnt_proceed == IBCM_FAIL) {
3906 		mutex_exit(&statep->state_mutex);
3907 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p Failed as "
3908 		    "client returned non-DEFER status from cm handler",
3909 		    statep->channel);
3910 		return (IBT_CHAN_STATE_INVALID);
3911 	}
3912 
3913 	ASSERT(statep->clnt_proceed == IBCM_UNBLOCK);
3914 	statep->clnt_proceed = IBCM_FAIL;
3915 	mutex_exit(&statep->state_mutex);
3916 
3917 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*proceed_targs))
3918 
3919 	/* the state machine processing is done in a separate thread */
3920 
3921 	/* proceed_targs is freed in ibcm_proceed_via_taskq */
3922 	proceed_targs = kmem_alloc(sizeof (ibcm_proceed_targs_t),
3923 	    KM_SLEEP);
3924 
3925 	proceed_targs->event  = event;
3926 	proceed_targs->status = status;
3927 	proceed_targs->priv_data_len = priv_data_len;
3928 
3929 	bcopy(priv_data, proceed_targs->priv_data, priv_data_len);
3930 
3931 	proceed_targs->tst.rc.statep = statep;
3932 	bcopy(cm_event_data, &proceed_targs->tst.rc.rc_cm_event_data,
3933 	    sizeof (ibt_cm_proceed_reply_t));
3934 
3935 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*proceed_targs))
3936 
3937 	(void) taskq_dispatch(ibcm_taskq, ibcm_proceed_via_taskq,
3938 	    proceed_targs, TQ_SLEEP);
3939 
3940 	return (IBT_SUCCESS);
3941 }
3942 
3943 /*
3944  * Function:
3945  * 	ibcm_proceed_via_taskq
3946  *
3947  * Called from taskq, dispatched by ibt_cm_proceed
3948  * Completes the cm state processing for ibt_cm_proceed
3949  */
3950 void
3951 ibcm_proceed_via_taskq(void *targs)
3952 {
3953 	ibcm_proceed_targs_t	*proceed_targs = (ibcm_proceed_targs_t *)targs;
3954 	ibcm_state_data_t *statep = proceed_targs->tst.rc.statep;
3955 	ibt_cm_reason_t reject_reason;
3956 	uint8_t arej_len;
3957 	ibcm_status_t response;
3958 	ibcm_clnt_reply_info_t clnt_info;
3959 
3960 	clnt_info.reply_event = &proceed_targs->tst.rc.rc_cm_event_data;
3961 	clnt_info.priv_data = proceed_targs->priv_data;
3962 	clnt_info.priv_data_len = proceed_targs->priv_data_len;
3963 
3964 	IBTF_DPRINTF_L4(cmlog, "ibcm_proceed_via_taskq chan 0x%p targs %x",
3965 	    statep->channel, targs);
3966 
3967 	if (proceed_targs->event == IBT_CM_EVENT_REQ_RCV) {
3968 		response =
3969 		    ibcm_process_cep_req_cm_hdlr(statep, proceed_targs->status,
3970 		    &clnt_info, &reject_reason, &arej_len,
3971 		    (ibcm_req_msg_t *)statep->defer_cm_msg);
3972 
3973 		ibcm_handle_cep_req_response(statep, response, reject_reason,
3974 		    arej_len);
3975 
3976 	} else if (proceed_targs->event == IBT_CM_EVENT_REP_RCV) {
3977 		response =
3978 		    ibcm_process_cep_rep_cm_hdlr(statep, proceed_targs->status,
3979 		    &clnt_info, &reject_reason, &arej_len,
3980 		    (ibcm_rep_msg_t *)statep->defer_cm_msg);
3981 
3982 		ibcm_handle_cep_rep_response(statep, response, reject_reason,
3983 		    arej_len, (ibcm_rep_msg_t *)statep->defer_cm_msg);
3984 
3985 	} else if (proceed_targs->event == IBT_CM_EVENT_LAP_RCV) {
3986 		ibcm_process_cep_lap_cm_hdlr(statep, proceed_targs->status,
3987 		    &clnt_info, (ibcm_lap_msg_t *)statep->defer_cm_msg,
3988 		    (ibcm_apr_msg_t *)IBCM_OUT_MSGP(statep->lapr_msg));
3989 
3990 		ibcm_post_apr_mad(statep);
3991 
3992 	} else {
3993 		ASSERT(proceed_targs->event == IBT_CM_EVENT_CONN_CLOSED);
3994 		ibcm_handle_cep_dreq_response(statep, proceed_targs->priv_data,
3995 		    proceed_targs->priv_data_len);
3996 	}
3997 
3998 	kmem_free(targs, sizeof (ibcm_proceed_targs_t));
3999 }
4000 
4001 /*
4002  * Function:
4003  * 	ibt_cm_ud_proceed
4004  *
4005  * Verifies the arguments and dispatches the cm state machine processing
4006  * via taskq
4007  */
4008 ibt_status_t
4009 ibt_cm_ud_proceed(void *session_id, ibt_channel_hdl_t ud_channel,
4010     ibt_cm_status_t status, ibt_redirect_info_t *redirect_infop,
4011     void *priv_data, ibt_priv_data_len_t priv_data_len)
4012 {
4013 	ibcm_ud_state_data_t *ud_statep = (ibcm_ud_state_data_t *)session_id;
4014 	ibcm_proceed_targs_t	*proceed_targs;
4015 	ibt_qp_query_attr_t	qp_attr;
4016 	ibt_status_t		retval;
4017 
4018 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_ud_proceed session_id %p "
4019 	    "ud_channel %p ", session_id, ud_channel);
4020 
4021 	IBTF_DPRINTF_L4(cmlog, "ibt_cm_ud_proceed status %x priv_data %p "
4022 	    "priv_data_len %x",  status, priv_data, priv_data_len);
4023 
4024 	/* validate session_id and status */
4025 	if ((ud_statep == NULL) || (status == IBT_CM_DEFER)) {
4026 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Invalid Args");
4027 		return (IBT_INVALID_PARAM);
4028 	}
4029 
4030 	/* If priv data len specified, then priv_data cannot be NULL */
4031 	if ((priv_data_len > 0) && (priv_data == NULL))
4032 		return (IBT_INVALID_PARAM);
4033 
4034 	if (priv_data_len > IBT_SIDR_REP_PRIV_DATA_SZ)
4035 		return (IBT_INVALID_PARAM);
4036 
4037 	/* retrieve qpn and qkey from ud channel */
4038 
4039 	/* validate event and statep's state */
4040 
4041 	if (status == IBT_CM_ACCEPT) {
4042 		retval = ibt_query_qp(ud_channel, &qp_attr);
4043 		if ((retval != IBT_SUCCESS) ||
4044 		    (qp_attr.qp_info.qp_trans != IBT_UD_SRV)) {
4045 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed: "
4046 			    "Failed to retrieve QPN from the channel: %d",
4047 			    retval);
4048 			return (IBT_INVALID_PARAM);
4049 		}
4050 	}
4051 
4052 
4053 	mutex_enter(&ud_statep->ud_state_mutex);
4054 
4055 	if (ud_statep->ud_state != IBCM_STATE_SIDR_REQ_RCVD) {
4056 		mutex_exit(&ud_statep->ud_state_mutex);
4057 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Invalid State "
4058 		    "specified");
4059 		return (IBT_INVALID_PARAM);
4060 	}
4061 
4062 	/* wait until client's CM handler returns DEFER status back to CM */
4063 
4064 	while (ud_statep->ud_clnt_proceed == IBCM_BLOCK) {
4065 		IBTF_DPRINTF_L5(cmlog, "ibt_cm_ud_proceed : Blocked for return"
4066 		    " of client's ud cm handler");
4067 		cv_wait(&ud_statep->ud_block_client_cv,
4068 		    &ud_statep->ud_state_mutex);
4069 	}
4070 
4071 	if (ud_statep->ud_clnt_proceed == IBCM_FAIL) {
4072 		mutex_exit(&ud_statep->ud_state_mutex);
4073 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Failed as client "
4074 		    "returned non-DEFER status from cm handler");
4075 		return (IBT_INVALID_PARAM);
4076 	}
4077 
4078 	ASSERT(ud_statep->ud_clnt_proceed == IBCM_UNBLOCK);
4079 	ud_statep->ud_clnt_proceed = IBCM_FAIL;
4080 	mutex_exit(&ud_statep->ud_state_mutex);
4081 
4082 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*proceed_targs))
4083 
4084 	/* the state machine processing is done in a separate thread */
4085 
4086 	/* proceed_targs is freed in ibcm_proceed_via_taskq */
4087 	proceed_targs = kmem_zalloc(sizeof (ibcm_proceed_targs_t),
4088 	    KM_SLEEP);
4089 
4090 	proceed_targs->status = status;
4091 	proceed_targs->priv_data_len = priv_data_len;
4092 
4093 	bcopy(priv_data, proceed_targs->priv_data, priv_data_len);
4094 
4095 	if (status == IBT_CM_ACCEPT) {
4096 		proceed_targs->tst.ud.ud_qkey =
4097 		    qp_attr.qp_info.qp_transport.ud.ud_qkey;
4098 		proceed_targs->tst.ud.ud_qpn = qp_attr.qp_qpn;
4099 	}
4100 
4101 	proceed_targs->tst.ud.ud_statep = ud_statep;
4102 
4103 	/* copy redirect info based on status */
4104 	if (status == IBT_CM_REDIRECT)
4105 		bcopy(redirect_infop, &proceed_targs->tst.ud.ud_redirect_info,
4106 		    sizeof (ibt_redirect_info_t));
4107 
4108 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*proceed_targs))
4109 
4110 	(void) taskq_dispatch(ibcm_taskq, ibcm_ud_proceed_via_taskq,
4111 	    proceed_targs, TQ_SLEEP);
4112 
4113 	return (IBT_SUCCESS);
4114 }
4115 
4116 /*
4117  * Function:
4118  * 	ibcm_ud_proceed_via_taskq
4119  *
4120  * Called from taskq, dispatched by ibt_cm_ud_proceed
4121  * Completes the cm state processing for ibt_cm_ud_proceed
4122  */
4123 void
4124 ibcm_ud_proceed_via_taskq(void *targs)
4125 {
4126 	ibcm_proceed_targs_t	*proceed_targs = (ibcm_proceed_targs_t *)targs;
4127 	ibcm_ud_state_data_t	*ud_statep = proceed_targs->tst.ud.ud_statep;
4128 	ibcm_ud_clnt_reply_info_t ud_clnt_info;
4129 	ibt_sidr_status_t	sidr_status;
4130 
4131 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_proceed_via_taskq(%p)", targs);
4132 
4133 	ud_clnt_info.ud_qpn  = proceed_targs->tst.ud.ud_qpn;
4134 	ud_clnt_info.ud_qkey  = proceed_targs->tst.ud.ud_qkey;
4135 	ud_clnt_info.priv_data = proceed_targs->priv_data;
4136 	ud_clnt_info.priv_data_len = proceed_targs->priv_data_len;
4137 	ud_clnt_info.redirect_infop = &proceed_targs->tst.ud.ud_redirect_info;
4138 
4139 	/* validate event and statep's state */
4140 	ibcm_process_sidr_req_cm_hdlr(ud_statep, proceed_targs->status,
4141 	    &ud_clnt_info, &sidr_status,
4142 	    (ibcm_sidr_rep_msg_t *)IBCM_OUT_MSGP(ud_statep->ud_stored_msg));
4143 
4144 	ibcm_post_sidr_rep_mad(ud_statep, sidr_status);
4145 
4146 	/* decr the statep ref cnt incremented in ibcm_process_sidr_req_msg */
4147 	mutex_enter(&ud_statep->ud_state_mutex);
4148 	IBCM_UD_REF_CNT_DECR(ud_statep);
4149 	mutex_exit(&ud_statep->ud_state_mutex);
4150 
4151 	kmem_free(targs, sizeof (ibcm_proceed_targs_t));
4152 }
4153 
4154 /*
4155  * Function:
4156  *	ibt_set_alt_path
4157  * Input:
4158  *	channel		Channel handle returned from ibt_alloc_rc_channel(9F).
4159  *
4160  *	mode		Execute in blocking or non blocking mode.
4161  *
4162  *	alt_path	A pointer to an ibt_alt_path_info_t as returned from an
4163  *			ibt_get_alt_path(9F) call that specifies the new
4164  *			alternate path.
4165  *
4166  *	priv_data       A pointer to a buffer specified by caller for the
4167  *			private data in the outgoing CM Load Alternate Path
4168  *			(LAP) message sent to the remote host. This can be NULL
4169  *			if no private data is available to communicate to the
4170  *			remote node.
4171  *
4172  *	priv_data_len   Length of valid data in priv_data, this should be less
4173  *			than or equal to IBT_LAP_PRIV_DATA_SZ.
4174  *
4175  * Output:
4176  *	ret_args	If called in blocking mode, points to a return argument
4177  *			structure of type ibt_ap_returns_t.
4178  *
4179  * Returns:
4180  *	IBT_SUCCESS on Success else appropriate error.
4181  * Description:
4182  *	Load the specified alternate path. Causes the CM to send an LAP message
4183  *	to the remote node.
4184  *	Can only be called on a previously opened RC channel.
4185  */
4186 ibt_status_t
4187 ibt_set_alt_path(ibt_channel_hdl_t channel, ibt_execution_mode_t mode,
4188     ibt_alt_path_info_t *alt_path, void *priv_data,
4189     ibt_priv_data_len_t priv_data_len, ibt_ap_returns_t *ret_args)
4190 {
4191 	ibmf_handle_t		ibmf_hdl;
4192 	ibt_status_t		status = IBT_SUCCESS;
4193 	ibcm_lap_msg_t		*lap_msgp;
4194 	ibcm_hca_info_t		*hcap;
4195 	ibcm_state_data_t	*statep;
4196 	uint8_t			port_no;
4197 	ib_lid_t		alternate_slid;
4198 	ibt_priv_data_len_t	len;
4199 	ib_lid_t		base_lid;
4200 	boolean_t		alt_grh;
4201 
4202 	IBTF_DPRINTF_L3(cmlog, "ibt_set_alt_path(%p, %x, %p, %p, %x, %p)",
4203 	    channel, mode, alt_path, priv_data, priv_data_len, ret_args);
4204 
4205 	/* validate channel */
4206 	if (IBCM_INVALID_CHANNEL(channel)) {
4207 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: invalid channel");
4208 		return (IBT_CHAN_HDL_INVALID);
4209 	}
4210 
4211 	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
4212 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4213 		    "Invalid Channel type: Applicable only to RC Channel");
4214 		return (IBT_CHAN_SRV_TYPE_INVALID);
4215 	}
4216 
4217 	if (mode == IBT_NONBLOCKING) {
4218 		if (ret_args != NULL) {
4219 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4220 			    "ret_args should be NULL when called in "
4221 			    "non-blocking mode");
4222 			return (IBT_INVALID_PARAM);
4223 		}
4224 	} else if (mode == IBT_BLOCKING) {
4225 		if (ret_args == NULL) {
4226 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4227 			    "ret_args should be Non-NULL when called in "
4228 			    "blocking mode");
4229 			return (IBT_INVALID_PARAM);
4230 		}
4231 		if (ret_args->ap_priv_data_len > IBT_APR_PRIV_DATA_SZ) {
4232 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4233 			    "expected private data length is too large");
4234 			return (IBT_INVALID_PARAM);
4235 		}
4236 		if ((ret_args->ap_priv_data_len > 0) &&
4237 		    (ret_args->ap_priv_data == NULL)) {
4238 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4239 			    "apr_priv_data_len > 0, but apr_priv_data NULL");
4240 			return (IBT_INVALID_PARAM);
4241 		}
4242 	} else { /* any other mode is not valid for ibt_set_alt_path */
4243 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4244 		    "invalid mode %x specified", mode);
4245 		return (IBT_INVALID_PARAM);
4246 	}
4247 
4248 	if ((port_no = alt_path->ap_alt_cep_path.cep_hca_port_num) == 0)
4249 		return (IBT_INVALID_PARAM);
4250 
4251 	/* get the statep */
4252 	IBCM_GET_CHAN_PRIVATE(channel, statep);
4253 	if (statep == NULL) {
4254 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: statep NULL");
4255 		return (IBT_CM_FAILURE);
4256 	}
4257 
4258 	mutex_enter(&statep->state_mutex);
4259 	IBCM_RELEASE_CHAN_PRIVATE(channel);
4260 	IBCM_REF_CNT_INCR(statep);
4261 	mutex_exit(&statep->state_mutex);
4262 
4263 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: statep %p", statep);
4264 
4265 	hcap = statep->hcap;
4266 
4267 	/* HCA must have been in active state. If not, it's a client bug */
4268 	if (!IBCM_ACCESS_HCA_OK(hcap))
4269 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: hca in error state");
4270 
4271 	ASSERT(statep->cm_handler != NULL);
4272 
4273 	/* Check Alternate port */
4274 	status = ibt_get_port_state_byguid(hcap->hca_guid, port_no, NULL,
4275 	    &base_lid);
4276 	if (status != IBT_SUCCESS) {
4277 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4278 		    "ibt_get_port_state_byguid status %d ", status);
4279 		mutex_enter(&statep->state_mutex);
4280 		IBCM_REF_CNT_DECR(statep);
4281 		mutex_exit(&statep->state_mutex);
4282 		return (status);
4283 	}
4284 
4285 	if ((hcap->hca_port_info[port_no - 1].port_ibmf_hdl == NULL) &&
4286 	    ((status = ibcm_hca_reinit_port(hcap, port_no - 1))
4287 	    != IBT_SUCCESS)) {
4288 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4289 		    "ibmf reg or callback setup failed during re-initialize");
4290 		mutex_enter(&statep->state_mutex);
4291 		IBCM_REF_CNT_DECR(statep);
4292 		mutex_exit(&statep->state_mutex);
4293 		return (status);
4294 	}
4295 
4296 	ibmf_hdl = statep->stored_reply_addr.ibmf_hdl;
4297 
4298 	alternate_slid = base_lid +
4299 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_src_path;
4300 
4301 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: alternate SLID = %x",
4302 	    h2b16(alternate_slid));
4303 
4304 	ibcm_lapr_enter();	/* limit how many run simultaneously */
4305 
4306 	/* Allocate MAD for LAP */
4307 	if (statep->lapr_msg == NULL)
4308 		if ((status = ibcm_alloc_out_msg(ibmf_hdl, &statep->lapr_msg,
4309 		    MAD_METHOD_SEND)) != IBT_SUCCESS) {
4310 			ibcm_lapr_exit();
4311 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4312 			    "chan 0x%p ibcm_alloc_out_msg failed", channel);
4313 			mutex_enter(&statep->state_mutex);
4314 			IBCM_REF_CNT_DECR(statep);
4315 			mutex_exit(&statep->state_mutex);
4316 			return (status);
4317 		}
4318 
4319 	mutex_enter(&statep->state_mutex);
4320 
4321 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: connection state is"
4322 	    " %x", statep->state);
4323 
4324 	/* Check state */
4325 	if ((statep->state != IBCM_STATE_ESTABLISHED) ||
4326 	    (statep->ap_state != IBCM_AP_STATE_IDLE)) {
4327 		IBCM_REF_CNT_DECR(statep);
4328 		mutex_exit(&statep->state_mutex);
4329 		(void) ibcm_free_out_msg(ibmf_hdl, &statep->lapr_msg);
4330 		ibcm_lapr_exit();
4331 		return (IBT_CHAN_STATE_INVALID);
4332 	} else {
4333 		/* Set to LAP Sent state */
4334 		statep->ap_state = IBCM_AP_STATE_LAP_SENT;
4335 		statep->ap_done = B_FALSE;
4336 		statep->remaining_retry_cnt = statep->max_cm_retries;
4337 		statep->timer_stored_state = statep->state;
4338 		statep->timer_stored_ap_state = statep->ap_state;
4339 		IBCM_REF_CNT_INCR(statep); /* for ibcm_post_lap_complete */
4340 	}
4341 
4342 	mutex_exit(&statep->state_mutex);
4343 
4344 	/* No more failure returns below */
4345 
4346 	/* Allocate MAD for LAP */
4347 	IBTF_DPRINTF_L5(cmlog, "ibt_set_alt_path:"
4348 	    " statep's mad addr = 0x%p", IBCM_OUT_HDRP(statep->lapr_msg));
4349 
4350 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*lap_msgp))
4351 
4352 	lap_msgp = (ibcm_lap_msg_t *)IBCM_OUT_MSGP(statep->lapr_msg);
4353 
4354 	lap_msgp->lap_alt_l_port_lid = h2b16(alternate_slid);
4355 	lap_msgp->lap_alt_r_port_lid =
4356 	    h2b16(alt_path->ap_alt_cep_path.cep_adds_vect.av_dlid);
4357 
4358 	/* Fill in remote port gid */
4359 	lap_msgp->lap_alt_r_port_gid.gid_prefix =
4360 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_dgid.gid_prefix);
4361 	lap_msgp->lap_alt_r_port_gid.gid_guid =
4362 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_dgid.gid_guid);
4363 
4364 	/* Fill in local port gid */
4365 	lap_msgp->lap_alt_l_port_gid.gid_prefix =
4366 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_sgid.gid_prefix);
4367 	lap_msgp->lap_alt_l_port_gid.gid_guid =
4368 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_sgid.gid_guid);
4369 
4370 	alt_grh = alt_path->ap_alt_cep_path.cep_adds_vect.av_send_grh;
4371 
4372 	/* alternate_flow_label, and alternate srate, alternate traffic class */
4373 	lap_msgp->lap_alt_srate_plus =
4374 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_srate & 0x3f;
4375 	lap_msgp->lap_alt_flow_label_plus = h2b32(((alt_grh == B_TRUE) ?
4376 	    (alt_path->ap_alt_cep_path.cep_adds_vect.av_flow << 12) : 0) |
4377 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_tclass);
4378 
4379 	/* Alternate hop limit, service level */
4380 	lap_msgp->lap_alt_hop_limit = (alt_grh == B_TRUE) ?
4381 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_hop : 0xff;
4382 	lap_msgp->lap_alt_sl_plus =
4383 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_srvl << 4 |
4384 	    ((alt_grh == B_FALSE) ? 0x8 : 0);
4385 
4386 	lap_msgp->lap_alt_local_acktime_plus = ibt_usec2ib(
4387 	    (2 * statep->rc_alt_pkt_lt) +
4388 	    ibt_ib2usec(hcap->hca_ack_delay)) << 3;
4389 
4390 	lap_msgp->lap_local_comm_id = h2b32(statep->local_comid);
4391 	lap_msgp->lap_remote_comm_id = h2b32(statep->remote_comid);
4392 
4393 	lap_msgp->lap_remote_qpn_eecn_plus =
4394 	    h2b32((statep->remote_qpn << 8) |
4395 	    ibt_usec2ib(ibcm_remote_response_time) << 3);
4396 
4397 	len = min(priv_data_len, IBT_LAP_PRIV_DATA_SZ);
4398 	if ((len > 0) && priv_data) {
4399 		bcopy(priv_data, lap_msgp->lap_private_data, len);
4400 	}
4401 
4402 	/* only rc_alt_pkt_lt and ap_return_data fields are initialized */
4403 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
4404 
4405 	statep->rc_alt_pkt_lt = ibt_ib2usec(alt_path->ap_alt_pkt_lt);
4406 
4407 	/* return_data is filled up in the state machine code */
4408 	statep->ap_return_data = ret_args;
4409 
4410 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*statep))
4411 
4412 	IBCM_OUT_HDRP(statep->lapr_msg)->AttributeID =
4413 	    h2b16(IBCM_INCOMING_LAP + IBCM_ATTR_BASE_ID);
4414 
4415 	IBCM_OUT_HDRP(statep->lapr_msg)->TransactionID =
4416 	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_LAP, statep->local_comid,
4417 	    0));
4418 	IBTF_DPRINTF_L3(cmlog, "ibt_set_alt_path: statep %p, tid %llx",
4419 	    statep, IBCM_OUT_HDRP(statep->lapr_msg)->TransactionID);
4420 
4421 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*lap_msgp))
4422 
4423 	/* Send LAP */
4424 	ibcm_post_rc_mad(statep, statep->lapr_msg, ibcm_post_lap_complete,
4425 	    statep);
4426 
4427 	mutex_enter(&statep->state_mutex);
4428 
4429 	if (mode == IBT_BLOCKING) {
4430 		IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: blocking");
4431 
4432 		/* wait for APR */
4433 		while (statep->ap_done != B_TRUE) {
4434 			cv_wait(&statep->block_client_cv,
4435 			    &statep->state_mutex);
4436 		}
4437 
4438 		IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: done blocking");
4439 
4440 		/*
4441 		 * In the case that ibt_set_alt_path fails,
4442 		 * change retval to IBT_CM_FAILURE
4443 		 */
4444 		if (statep->ap_return_data->ap_status != IBT_CM_AP_LOADED)
4445 			status = IBT_CM_FAILURE;
4446 
4447 	}
4448 
4449 	/* decrement the ref-count before leaving here */
4450 	IBCM_REF_CNT_DECR(statep);
4451 
4452 	mutex_exit(&statep->state_mutex);
4453 
4454 	ibcm_lapr_exit();
4455 
4456 	/* If this message isn't seen then ibt_set_alt_path failed */
4457 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: done");
4458 
4459 	return (status);
4460 }
4461 
4462 
4463 #ifdef DEBUG
4464 
4465 /*
4466  * ibcm_query_classport_info:
4467  *	Query classportinfo
4468  *
4469  * INPUTS:
4470  *	channel		- Channel that is associated with a statep
4471  *
4472  * RETURN VALUE: NONE
4473  * This function is currently used to generate a valid get method classport
4474  * info, and test CM functionality. There is no ibtl client interface to
4475  * generate a classportinfo. It is possible that CM may use classportinfo
4476  * from other nodes in the future, and most of the code below could be re-used.
4477  */
4478 void
4479 ibcm_query_classport_info(ibt_channel_hdl_t channel)
4480 {
4481 	ibcm_state_data_t	*statep;
4482 	ibmf_msg_t		*msgp;
4483 
4484 	IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info(%p)", channel);
4485 
4486 	/* validate channel, first */
4487 	if (IBCM_INVALID_CHANNEL(channel)) {
4488 		IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info: "
4489 		    "invalid channel (%p)", channel);
4490 		return;
4491 	}
4492 
4493 	/* get the statep */
4494 	IBCM_GET_CHAN_PRIVATE(channel, statep);
4495 
4496 	/*
4497 	 * This can happen, if the statep is already gone by a DREQ from
4498 	 * the remote side
4499 	 */
4500 	if (statep == NULL) {
4501 		IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info: "
4502 		    "statep NULL");
4503 		return;
4504 	}
4505 
4506 	mutex_enter(&statep->state_mutex);
4507 	IBCM_RELEASE_CHAN_PRIVATE(channel);
4508 	IBCM_REF_CNT_INCR(statep);
4509 	mutex_exit(&statep->state_mutex);
4510 
4511 	/* Debug/test code, so don't care about return status */
4512 	(void) ibcm_alloc_out_msg(statep->stored_reply_addr.ibmf_hdl, &msgp,
4513 	    MAD_METHOD_GET);
4514 
4515 	IBCM_OUT_HDRP(msgp)->TransactionID = h2b64(ibcm_generate_tranid(
4516 	    MAD_ATTR_ID_CLASSPORTINFO, statep->local_comid, 0));
4517 	IBCM_OUT_HDRP(msgp)->AttributeID = h2b16(MAD_ATTR_ID_CLASSPORTINFO);
4518 
4519 	(void) ibcm_post_mad(msgp, &statep->stored_reply_addr, NULL, NULL);
4520 
4521 	IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info(%p) "
4522 	    "Get method MAD posted ", channel);
4523 
4524 	(void) ibcm_free_out_msg(statep->stored_reply_addr.ibmf_hdl, &msgp);
4525 
4526 	mutex_enter(&statep->state_mutex);
4527 	IBCM_REF_CNT_DECR(statep);
4528 	mutex_exit(&statep->state_mutex);
4529 }
4530 
4531 static void
4532 ibcm_print_reply_addr(ibt_channel_hdl_t channel, ibcm_mad_addr_t *cm_reply_addr)
4533 {
4534 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: chan 0x%p, SLID %x, "
4535 	    "DLID %x", channel, cm_reply_addr->rcvd_addr.ia_local_lid,
4536 	    cm_reply_addr->rcvd_addr.ia_remote_lid);
4537 
4538 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: QKEY %x, PKEY %x, "
4539 	    "RQPN %x SL %x", cm_reply_addr->rcvd_addr.ia_q_key,
4540 	    cm_reply_addr->rcvd_addr.ia_p_key,
4541 	    cm_reply_addr->rcvd_addr.ia_remote_qno,
4542 	    cm_reply_addr->rcvd_addr.ia_service_level);
4543 
4544 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM SGID %llX:%llX ",
4545 	    cm_reply_addr->grh_hdr.ig_sender_gid.gid_prefix,
4546 	    cm_reply_addr->grh_hdr.ig_sender_gid.gid_guid);
4547 
4548 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM DGID %llX:%llX",
4549 	    cm_reply_addr->grh_hdr.ig_recver_gid.gid_prefix,
4550 	    cm_reply_addr->grh_hdr.ig_recver_gid.gid_guid);
4551 
4552 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM FL %x TC %x HL %x",
4553 	    cm_reply_addr->grh_hdr.ig_flow_label,
4554 	    cm_reply_addr->grh_hdr.ig_tclass,
4555 	    cm_reply_addr->grh_hdr.ig_hop_limit);
4556 }
4557 
4558 #endif
4559 
4560 typedef struct ibcm_join_mcg_tqarg_s {
4561 	ib_gid_t		rgid;
4562 	ibt_mcg_attr_t		mcg_attr;
4563 	ibt_mcg_info_t		*mcg_infop;
4564 	ibt_mcg_handler_t	func;
4565 	void			*arg;
4566 } ibcm_join_mcg_tqarg_t;
4567 
4568 _NOTE(READ_ONLY_DATA(ibcm_join_mcg_tqarg_s))
4569 
4570 /*
4571  * Function:
4572  *	ibt_join_mcg
4573  * Input:
4574  *	rgid		The request GID that defines the HCA port from which a
4575  *			contact to SA Access is performed to add the specified
4576  *			endport GID ((mcg_attr->mc_pgid) to a multicast group.
4577  *			If mcg_attr->mc_pgid is null, then this (rgid) will be
4578  *			treated as endport GID that is to be added to the
4579  *			multicast group.
4580  *
4581  *	mcg_attr	A pointer to an ibt_mcg_attr_t structure that defines
4582  *			the attributes of the desired multicast group to be
4583  *			created or joined.
4584  *
4585  *	func		NULL or a pointer to a function to call when
4586  *			ibt_join_mcg() completes. If 'func' is not NULL then
4587  *			ibt_join_mcg() will return as soon as possible after
4588  *			initiating the multicast group join/create process.
4589  *			'func' is then called when the process completes.
4590  *
4591  *	arg		Argument to the 'func'.
4592  *
4593  * Output:
4594  *	mcg_info_p	A pointer to the ibt_mcg_info_t structure, allocated
4595  *			by the caller, where the attributes of the created or
4596  *			joined multicast group are copied.
4597  * Returns:
4598  *	IBT_SUCCESS
4599  *	IBT_INVALID_PARAM
4600  *	IBT_MCG_RECORDS_NOT_FOUND
4601  *	IBT_INSUFF_RESOURCE
4602  * Description:
4603  *	Join a multicast group.  The first full member "join" causes the MCG
4604  *	to be created.
4605  */
4606 ibt_status_t
4607 ibt_join_mcg(ib_gid_t rgid, ibt_mcg_attr_t *mcg_attr,
4608     ibt_mcg_info_t *mcg_info_p, ibt_mcg_handler_t func, void  *arg)
4609 {
4610 	ibcm_join_mcg_tqarg_t	*mcg_tq;
4611 	int			flag = ((func == NULL) ? KM_SLEEP : KM_NOSLEEP);
4612 
4613 	IBTF_DPRINTF_L3(cmlog, "ibt_join_mcg(%llX:%llX, %p)", rgid.gid_prefix,
4614 	    rgid.gid_guid, mcg_attr);
4615 
4616 	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
4617 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Request GID is required");
4618 		return (IBT_INVALID_PARAM);
4619 	}
4620 
4621 	if ((mcg_attr->mc_pkey == IB_PKEY_INVALID_LIMITED) ||
4622 	    (mcg_attr->mc_pkey == IB_PKEY_INVALID_FULL)) {
4623 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Invalid P_Key specified");
4624 		return (IBT_INVALID_PARAM);
4625 	}
4626 
4627 	if (mcg_attr->mc_join_state == 0) {
4628 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: JoinState not specified");
4629 		return (IBT_INVALID_PARAM);
4630 	}
4631 
4632 	if (mcg_info_p == NULL) {
4633 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: mcg_info_p is NULL");
4634 		return (IBT_INVALID_PARAM);
4635 	}
4636 
4637 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mcg_tq))
4638 
4639 	mcg_tq = kmem_alloc(sizeof (ibcm_join_mcg_tqarg_t), flag);
4640 	if (mcg_tq == NULL) {
4641 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: "
4642 		    "Unable to allocate memory for local usage.");
4643 		return (IBT_INSUFF_KERNEL_RESOURCE);
4644 	}
4645 
4646 	mcg_tq->rgid = rgid;
4647 	bcopy(mcg_attr, &mcg_tq->mcg_attr, sizeof (ibt_mcg_attr_t));
4648 	mcg_tq->mcg_infop = mcg_info_p;
4649 	mcg_tq->func = func;
4650 	mcg_tq->arg = arg;
4651 
4652 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mcg_tq))
4653 
4654 	if (func != NULL) {	/* Non-Blocking */
4655 		IBTF_DPRINTF_L3(cmlog, "ibt_join_mcg: Non-Blocking Call");
4656 		if (taskq_dispatch(ibcm_taskq, ibcm_process_async_join_mcg,
4657 		    mcg_tq, TQ_NOSLEEP) == 0) {
4658 			IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Failed to "
4659 			    "Dispatch the TaskQ");
4660 			kmem_free(mcg_tq, sizeof (ibcm_join_mcg_tqarg_t));
4661 			return (IBT_INSUFF_KERNEL_RESOURCE);
4662 		} else
4663 			return (IBT_SUCCESS);
4664 	} else {		/* Blocking */
4665 		return (ibcm_process_join_mcg(mcg_tq));
4666 	}
4667 }
4668 
4669 static void
4670 ibcm_process_async_join_mcg(void *tq_arg)
4671 {
4672 	(void) ibcm_process_join_mcg(tq_arg);
4673 }
4674 
4675 static ibt_status_t
4676 ibcm_process_join_mcg(void *taskq_arg)
4677 {
4678 	sa_mcmember_record_t	mcg_req;
4679 	sa_mcmember_record_t	*mcg_resp;
4680 	ibmf_saa_access_args_t	access_args;
4681 	ibmf_saa_handle_t	saa_handle;
4682 	uint64_t		component_mask = 0;
4683 	ibt_status_t		retval;
4684 	ibtl_cm_hca_port_t	hca_port;
4685 	uint_t			num_records;
4686 	size_t			length;
4687 	ibcm_hca_info_t		*hcap;
4688 	ibcm_join_mcg_tqarg_t	*mcg_arg = (ibcm_join_mcg_tqarg_t *)taskq_arg;
4689 	ibt_mcg_info_t		*mcg_info_p = mcg_arg->mcg_infop;
4690 
4691 	IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg(%p)", mcg_arg);
4692 
4693 	retval = ibtl_cm_get_hca_port(mcg_arg->rgid, 0, &hca_port);
4694 	if (retval != IBT_SUCCESS) {
4695 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: Failed to get "
4696 		    "port info from specified RGID: status = %d", retval);
4697 		goto ibcm_join_mcg_exit1;
4698 	}
4699 
4700 	bzero(&mcg_req, sizeof (sa_mcmember_record_t));
4701 
4702 	if ((mcg_arg->mcg_attr.mc_pgid.gid_prefix == 0) ||
4703 	    (mcg_arg->mcg_attr.mc_pgid.gid_guid == 0)) {
4704 		IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
4705 		    "Request GID is Port GID");
4706 		mcg_req.PortGID = mcg_arg->rgid;
4707 	} else {
4708 		mcg_req.PortGID = mcg_arg->mcg_attr.mc_pgid;
4709 	}
4710 	component_mask |= SA_MC_COMPMASK_PORTGID;
4711 
4712 	mcg_req.Q_Key = mcg_arg->mcg_attr.mc_qkey;
4713 	mcg_req.P_Key = mcg_arg->mcg_attr.mc_pkey;
4714 	mcg_req.JoinState = mcg_arg->mcg_attr.mc_join_state;
4715 	mcg_req.TClass = mcg_arg->mcg_attr.mc_tclass;
4716 	mcg_req.FlowLabel = mcg_arg->mcg_attr.mc_flow;
4717 	mcg_req.SL = mcg_arg->mcg_attr.mc_sl;
4718 
4719 	component_mask |= SA_MC_COMPMASK_QKEY | SA_MC_COMPMASK_PKEY |
4720 	    SA_MC_COMPMASK_JOINSTATE | SA_MC_COMPMASK_TCLASS |
4721 	    SA_MC_COMPMASK_FLOWLABEL | SA_MC_COMPMASK_SL;
4722 
4723 	/* If client has specified MGID, use it else SA will assign one. */
4724 	if ((mcg_arg->mcg_attr.mc_mgid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
4725 		mcg_req.MGID = mcg_arg->mcg_attr.mc_mgid;
4726 		component_mask |= SA_MC_COMPMASK_MGID;
4727 	}
4728 
4729 	IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: PGID=%llX:%llX, "
4730 	    "MGID=%llX:%llX", mcg_req.PortGID.gid_prefix,
4731 	    mcg_req.PortGID.gid_guid, mcg_req.MGID.gid_prefix,
4732 	    mcg_req.MGID.gid_guid);
4733 
4734 	/* Is MTU specified. */
4735 	if (mcg_arg->mcg_attr.mc_mtu_req.r_mtu) {
4736 		mcg_req.MTU = mcg_arg->mcg_attr.mc_mtu_req.r_mtu;
4737 		mcg_req.MTUSelector = mcg_arg->mcg_attr.mc_mtu_req.r_selector;
4738 
4739 		component_mask |= SA_MC_COMPMASK_MTUSELECTOR |
4740 		    SA_MC_COMPMASK_MTU;
4741 	}
4742 
4743 	/* Is RATE specified. */
4744 	if (mcg_arg->mcg_attr.mc_rate_req.r_srate) {
4745 		mcg_req.Rate = mcg_arg->mcg_attr.mc_rate_req.r_srate;
4746 		mcg_req.RateSelector =
4747 		    mcg_arg->mcg_attr.mc_rate_req.r_selector;
4748 
4749 		component_mask |= SA_MC_COMPMASK_RATESELECTOR |
4750 		    SA_MC_COMPMASK_RATE;
4751 	}
4752 
4753 	/* Is Packet Life Time specified. */
4754 	if (mcg_arg->mcg_attr.mc_pkt_lt_req.p_pkt_lt) {
4755 		mcg_req.Rate = mcg_arg->mcg_attr.mc_pkt_lt_req.p_pkt_lt;
4756 		mcg_req.RateSelector =
4757 		    mcg_arg->mcg_attr.mc_pkt_lt_req.p_selector;
4758 
4759 		component_mask |= SA_MC_COMPMASK_PKTLTSELECTOR |
4760 		    SA_MC_COMPMASK_PKTLT;
4761 	}
4762 
4763 	if (mcg_arg->mcg_attr.mc_hop) {
4764 		mcg_req.HopLimit = mcg_arg->mcg_attr.mc_hop;
4765 		component_mask |= SA_MC_COMPMASK_HOPLIMIT;
4766 	}
4767 
4768 	if (mcg_arg->mcg_attr.mc_scope) {
4769 		mcg_req.Scope = mcg_arg->mcg_attr.mc_scope;
4770 		component_mask |= SA_MC_COMPMASK_SCOPE;
4771 	}
4772 
4773 	if (mcg_arg->mcg_attr.mc_mlid) {
4774 		mcg_req.MLID = mcg_arg->mcg_attr.mc_mlid;
4775 		component_mask |= SA_MC_COMPMASK_MLID;
4776 	}
4777 
4778 	/* Get SA Access Handle. */
4779 	hcap = ibcm_find_hca_entry(hca_port.hp_hca_guid);
4780 	if (hcap == NULL) {
4781 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: NO HCA found");
4782 
4783 		retval = IBT_HCA_BUSY_DETACHING;
4784 		goto ibcm_join_mcg_exit1;
4785 	}
4786 
4787 	saa_handle = ibcm_get_saa_handle(hcap, hca_port.hp_port);
4788 	if (saa_handle == NULL) {
4789 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: SA Handle NULL");
4790 
4791 		retval = IBT_HCA_PORT_NOT_ACTIVE;
4792 		goto ibcm_join_mcg_exit;
4793 	}
4794 
4795 	if ((mcg_arg->mcg_attr.mc_pgid.gid_prefix != 0) &&
4796 	    (mcg_arg->mcg_attr.mc_pgid.gid_guid != 0)) {
4797 		retval = ibtl_cm_get_hca_port(mcg_arg->mcg_attr.mc_pgid, 0,
4798 		    &hca_port);
4799 		if (retval != IBT_SUCCESS) {
4800 			IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: Failed "
4801 			    "to get PortInfo of specified PGID: status = %d",
4802 			    retval);
4803 			goto ibcm_join_mcg_exit1;
4804 		}
4805 	}
4806 
4807 	/* Contact SA Access */
4808 	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
4809 	access_args.sq_access_type = IBMF_SAA_UPDATE;
4810 	access_args.sq_component_mask = component_mask;
4811 	access_args.sq_template = &mcg_req;
4812 	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
4813 	access_args.sq_callback = NULL;
4814 	access_args.sq_callback_arg = NULL;
4815 
4816 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
4817 	    (void **)&mcg_resp);
4818 	if (retval != IBT_SUCCESS) {
4819 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: "
4820 		    "SA Access Failed");
4821 		goto ibcm_join_mcg_exit;
4822 	}
4823 
4824 	num_records = length/sizeof (sa_mcmember_record_t);
4825 
4826 	IBTF_DPRINTF_L4(cmlog, "ibcm_process_join_mcg: "
4827 	    "Found %d MCMember Records", num_records);
4828 
4829 	/* Validate the returned number of records. */
4830 	if ((mcg_resp != NULL) && (num_records > 0)) {
4831 		/* Update the return values. */
4832 		mcg_info_p->mc_adds_vect.av_dgid = mcg_resp->MGID;
4833 		mcg_info_p->mc_adds_vect.av_sgid = mcg_resp->PortGID;
4834 		mcg_info_p->mc_adds_vect.av_srate = mcg_resp->Rate;
4835 		mcg_info_p->mc_adds_vect.av_srvl = mcg_resp->SL;
4836 		mcg_info_p->mc_adds_vect.av_flow = mcg_resp->FlowLabel;
4837 		mcg_info_p->mc_adds_vect.av_tclass = mcg_resp->TClass;
4838 		mcg_info_p->mc_adds_vect.av_hop = mcg_resp->HopLimit;
4839 		mcg_info_p->mc_adds_vect.av_send_grh = B_TRUE;
4840 		mcg_info_p->mc_adds_vect.av_dlid = mcg_resp->MLID;
4841 		mcg_info_p->mc_mtu = mcg_resp->MTU;
4842 		mcg_info_p->mc_qkey = mcg_resp->Q_Key;
4843 
4844 		retval = ibt_pkey2index_byguid(hca_port.hp_hca_guid,
4845 		    hca_port.hp_port, mcg_resp->P_Key, &mcg_info_p->mc_pkey_ix);
4846 		if (retval != IBT_SUCCESS) {
4847 			IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
4848 			    "Pkey2Index Conversion failed<%d>", retval);
4849 			mcg_info_p->mc_pkey_ix = 0;
4850 		}
4851 
4852 		mcg_info_p->mc_scope = mcg_resp->Scope;
4853 		mcg_info_p->mc_pkt_lt = mcg_resp->PacketLifeTime;
4854 
4855 		mcg_info_p->mc_adds_vect.av_port_num = hca_port.hp_port;
4856 		mcg_info_p->mc_adds_vect.av_sgid_ix = hca_port.hp_sgid_ix;
4857 		mcg_info_p->mc_adds_vect.av_src_path = 0;
4858 
4859 		/* Deallocate the memory allocated by SA for mcg_resp. */
4860 		kmem_free(mcg_resp, length);
4861 		retval = IBT_SUCCESS;
4862 	} else {
4863 		retval = IBT_MCG_RECORDS_NOT_FOUND;
4864 		IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
4865 		    "MCG RECORDS NOT FOUND");
4866 	}
4867 
4868 ibcm_join_mcg_exit:
4869 	ibcm_dec_hca_acc_cnt(hcap);
4870 
4871 ibcm_join_mcg_exit1:
4872 	if (mcg_arg->func)
4873 		(*(mcg_arg->func))(mcg_arg->arg, retval, mcg_info_p);
4874 
4875 	kmem_free(mcg_arg, sizeof (ibcm_join_mcg_tqarg_t));
4876 
4877 	return (retval);
4878 }
4879 
4880 
4881 /*
4882  * Function:
4883  *	ibt_leave_mcg
4884  * Input:
4885  *	rgid		The request GID that defines the HCA port upon which
4886  *			to send the request to the Subnet Administrator, to
4887  *			remove the specified port (port_gid) from the multicast
4888  *			group.  If 'port_gid' is the Reserved GID (i.e.
4889  *			port_gid.gid_prefix = 0 and port_gid.gid_guid = 0),
4890  *			then the end-port associated with 'rgid' is removed
4891  *			from the multicast group.
4892  *
4893  *	mc_gid		A multicast group GID as returned from ibt_join_mcg()
4894  *			call.  This is optional, if not specified (i.e.
4895  *			mc_gid.gid_prefix has 0xFF in its upper 8 bits to
4896  *			identify this as being a multicast GID), then the
4897  *			port is removed from all the multicast groups of
4898  *			which it is a member.
4899  *
4900  *	port_gid	This is optional, if not the Reserved GID (gid_prefix
4901  *			and gid_guid not equal to 0), then this specifies the
4902  *			endport GID of the multicast group member being deleted
4903  *			from the group. If it is the Reserved GID (gid_prefix
4904  *			and gid_guid equal to 0) then the member endport GID is
4905  *			determined from 'rgid'.
4906  *
4907  *	mc_join_state	The Join State attribute used when the group was joined
4908  *			using ibt_join_mcg(). This Join State component must
4909  *			contains at least one bit set to 1 in the same position
4910  *			as that used during ibt_join_mcg(). i.e. the logical
4911  *			AND of the two JoinState components is not all zeros.
4912  *			This Join State component must not have some bits set
4913  *			which are not set using ibt_join_mcg().
4914  * Output:
4915  *	None.
4916  * Returns:
4917  *	IBT_SUCCESS
4918  *	IBT_INVALID_PARAM
4919  *	IBT_MC_GROUP_INVALID
4920  *	IBT_INSUFF_RESOURCE
4921  * Description:
4922  *	The port associated with the port GID shall be removed from the
4923  *	multicast group specified by MGID (mc_gid) or from all the multicast
4924  *	groups of which it is a member if the MGID (mc_gid) is not specified.
4925  *
4926  *	The last full member to leave causes the destruction of the Multicast
4927  *	Group.
4928  */
4929 ibt_status_t
4930 ibt_leave_mcg(ib_gid_t rgid, ib_gid_t mc_gid, ib_gid_t port_gid,
4931     uint8_t mc_join_state)
4932 {
4933 	sa_mcmember_record_t	mcg_req;
4934 	ibmf_saa_access_args_t	access_args;
4935 	ibmf_saa_handle_t	saa_handle;
4936 	uint64_t		component_mask = 0;
4937 	int			sa_retval;
4938 	ibt_status_t		retval;
4939 	ibtl_cm_hca_port_t	hca_port;
4940 	size_t			length;
4941 	void			*results_p;
4942 	ibcm_hca_info_t		*hcap;
4943 
4944 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg(%llX:%llX, %llX:%llX)",
4945 	    rgid.gid_prefix, rgid.gid_guid, mc_gid.gid_prefix, mc_gid.gid_guid);
4946 
4947 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg(%llX:%llX, 0x%X)",
4948 	    port_gid.gid_prefix, port_gid.gid_guid, mc_join_state);
4949 
4950 	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
4951 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: RequestGID is required");
4952 		return (IBT_INVALID_PARAM);
4953 	}
4954 
4955 	bzero(&mcg_req, sizeof (sa_mcmember_record_t));
4956 
4957 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: MGID: %llX%llX",
4958 	    mc_gid.gid_prefix, mc_gid.gid_guid);
4959 
4960 	/* Validate MGID */
4961 	if ((mc_gid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
4962 		mcg_req.MGID = mc_gid;
4963 		component_mask |= SA_MC_COMPMASK_MGID;
4964 	} else if ((mc_gid.gid_prefix != 0) || (mc_gid.gid_guid != 0)) {
4965 		IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Invalid MGID specified");
4966 		return (IBT_MC_MGID_INVALID);
4967 	}
4968 
4969 	if ((port_gid.gid_prefix == 0) || (port_gid.gid_guid == 0)) {
4970 		mcg_req.PortGID = rgid;
4971 	} else {
4972 		IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Performing PROXY Leave");
4973 		mcg_req.PortGID = port_gid;
4974 	}
4975 	component_mask |= SA_MC_COMPMASK_PORTGID;
4976 
4977 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Port GID <%llX:%llX>",
4978 	    mcg_req.PortGID.gid_prefix, mcg_req.PortGID.gid_guid);
4979 
4980 	/* Join State */
4981 	mcg_req.JoinState = mc_join_state;
4982 	component_mask |= SA_MC_COMPMASK_JOINSTATE;
4983 
4984 	retval = ibtl_cm_get_hca_port(rgid, 0, &hca_port);
4985 	if (retval != IBT_SUCCESS) {
4986 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: Failed to get port info "
4987 		    "from specified RGID : status = %d", retval);
4988 		return (retval);
4989 	}
4990 
4991 	/* Get SA Access Handle. */
4992 	hcap = ibcm_find_hca_entry(hca_port.hp_hca_guid);
4993 	if (hcap == NULL) {
4994 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: "
4995 		    "NO HCA found");
4996 		return (IBT_HCA_BUSY_DETACHING);
4997 	}
4998 
4999 	saa_handle = ibcm_get_saa_handle(hcap, hca_port.hp_port);
5000 	if (saa_handle == NULL) {
5001 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: saa_handle is NULL");
5002 		ibcm_dec_hca_acc_cnt(hcap);
5003 		return (IBT_HCA_PORT_NOT_ACTIVE);
5004 	}
5005 
5006 	/* Contact SA Access */
5007 	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
5008 	access_args.sq_access_type = IBMF_SAA_DELETE;
5009 	access_args.sq_component_mask = component_mask;
5010 	access_args.sq_template = &mcg_req;
5011 	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
5012 	access_args.sq_callback = NULL;
5013 	access_args.sq_callback_arg = NULL;
5014 
5015 	ibcm_sa_access_enter();
5016 
5017 	sa_retval = ibmf_sa_access(saa_handle, &access_args, 0, &length,
5018 	    &results_p);
5019 	if (sa_retval != IBMF_SUCCESS) {
5020 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: SA access Failed: %d",
5021 		    sa_retval);
5022 		(void) ibcm_ibmf_analyze_error(sa_retval);
5023 		retval = IBT_MC_GROUP_INVALID;
5024 	}
5025 
5026 	ibcm_sa_access_exit();
5027 
5028 	ibcm_dec_hca_acc_cnt(hcap);
5029 
5030 	return (retval);
5031 }
5032 
5033 
5034 /*
5035  * Function:
5036  *	ibt_query_mcg
5037  * Input:
5038  *	rgid		The request GID that defines the HCA port upon which
5039  *			to send the request to the Subnet Administrator, to
5040  *			retrieve Multicast Records matching attributes as
5041  *			specified through 'mcg_attr' argument.
5042  *
5043  *	mcg_attr	NULL or a pointer to an ibt_mcg_attr_t structure that
5044  *			specifies MCG attributes that are to be matched.
5045  *			Attributes that are not required can be wild carded
5046  *			by specifying as '0'.
5047  *
5048  *	mcgs_max_num	The maximum number of matching multicast groups to
5049  *			return.  If zero, then all available matching multicast
5050  *			groups are returned.
5051  * Output:
5052  *	mcgs_info_p	The address of an ibt_mcg_info_t pointer, where
5053  *			multicast group information is returned. The actual
5054  *			number of entries filled in the array is returned in
5055  *			entries_p.
5056  *
5057  *	entries_p	The number of ibt_mcg_attr_t entries returned.
5058  * Returns:
5059  *	IBT_SUCCESS
5060  *	IBT_INVALID_PARAM
5061  *	IBT_MCG_RECORDS_NOT_FOUND
5062  * Description:
5063  *	Request information on multicast groups that match the parameters
5064  *	specified in mcg_attr. Information on each multicast group is returned
5065  *	to the caller in the form of an array of ibt_mcg_info_t.
5066  *	ibt_query_mcg() allocates the memory for this array and returns a
5067  *	pointer to the array (mcgs_p) and the number of entries in the array
5068  *	(entries_p). This memory should be freed by the client using
5069  *	ibt_free_mcg_info().
5070  */
5071 ibt_status_t
5072 ibt_query_mcg(ib_gid_t rgid, ibt_mcg_attr_t *mcg_attr, uint_t mcgs_max_num,
5073     ibt_mcg_info_t **mcgs_info_p, uint_t *entries_p)
5074 {
5075 	sa_mcmember_record_t	mcg_req;
5076 	sa_mcmember_record_t	*mcg_resp;
5077 	ibt_mcg_info_t		*mcg_infop;
5078 	ibmf_saa_access_args_t	access_args;
5079 	ibmf_saa_handle_t	saa_handle;
5080 	uint64_t		component_mask = 0;
5081 	ibt_status_t		retval;
5082 	ibtl_cm_hca_port_t	hport;
5083 	uint_t			num_records;
5084 	size_t			length;
5085 	void			*results_p;
5086 	ib_gid_t		port_gid;
5087 	ibcm_hca_info_t		*hcap;
5088 
5089 	IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg(%p, %d)", mcg_attr, mcgs_max_num);
5090 
5091 	if ((entries_p == NULL) || (mcgs_info_p == NULL)) {
5092 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: "
5093 		    "entries_p or mcgs_info_p is NULL");
5094 		return (IBT_INVALID_PARAM);
5095 	}
5096 
5097 	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
5098 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: RequestGID is required");
5099 		return (IBT_INVALID_PARAM);
5100 	}
5101 	IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: Request GID <%llX:%llX>",
5102 	    rgid.gid_prefix, rgid.gid_guid);
5103 
5104 	bzero(&mcg_req, sizeof (sa_mcmember_record_t));
5105 	port_gid.gid_prefix = port_gid.gid_guid = 0;
5106 
5107 	if (mcg_attr != NULL) {
5108 		port_gid = mcg_attr->mc_pgid;
5109 
5110 		if ((port_gid.gid_prefix != 0) && (port_gid.gid_guid != 0)) {
5111 			mcg_req.PortGID = mcg_attr->mc_pgid;
5112 			component_mask |= SA_MC_COMPMASK_PORTGID;
5113 
5114 			IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: PGID %llX:%llX",
5115 			    port_gid.gid_prefix, port_gid.gid_guid);
5116 		}
5117 
5118 		/* Is Q_Key specified. */
5119 		if (mcg_attr->mc_qkey != 0) {
5120 			mcg_req.Q_Key = mcg_attr->mc_qkey;
5121 			component_mask |= SA_MC_COMPMASK_QKEY;
5122 		}
5123 
5124 		/* Is P_Key specified. */
5125 		if (mcg_attr->mc_pkey != 0) {
5126 			mcg_req.P_Key = mcg_attr->mc_pkey;
5127 			component_mask |= SA_MC_COMPMASK_PKEY;
5128 		}
5129 
5130 		/* Is MGID specified. */
5131 		if ((mcg_attr->mc_mgid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
5132 			mcg_req.MGID = mcg_attr->mc_mgid;
5133 			component_mask |= SA_MC_COMPMASK_MGID;
5134 		}
5135 
5136 		/* Is MTU specified. */
5137 		if (mcg_attr->mc_mtu_req.r_mtu) {
5138 			mcg_req.MTU = mcg_attr->mc_mtu_req.r_mtu;
5139 			mcg_req.MTUSelector = mcg_attr->mc_mtu_req.r_selector;
5140 
5141 			component_mask |= SA_MC_COMPMASK_MTUSELECTOR |
5142 			    SA_MC_COMPMASK_MTU;
5143 		}
5144 
5145 		if (mcg_attr->mc_tclass) {
5146 			mcg_req.TClass = mcg_attr->mc_tclass;
5147 			component_mask |= SA_MC_COMPMASK_TCLASS;
5148 		}
5149 
5150 		/* Is RATE specified. */
5151 		if (mcg_attr->mc_rate_req.r_srate) {
5152 			mcg_req.Rate = mcg_attr->mc_rate_req.r_srate;
5153 			mcg_req.RateSelector = mcg_attr->mc_rate_req.r_selector;
5154 
5155 			component_mask |= SA_MC_COMPMASK_RATESELECTOR |
5156 			    SA_MC_COMPMASK_RATE;
5157 		}
5158 
5159 		/* Is Packet Life Time specified. */
5160 		if (mcg_attr->mc_pkt_lt_req.p_pkt_lt) {
5161 			mcg_req.Rate = mcg_attr->mc_pkt_lt_req.p_pkt_lt;
5162 			mcg_req.RateSelector =
5163 			    mcg_attr->mc_pkt_lt_req.p_selector;
5164 
5165 			component_mask |= SA_MC_COMPMASK_PKTLTSELECTOR |
5166 			    SA_MC_COMPMASK_PKTLT;
5167 		}
5168 
5169 		if (mcg_attr->mc_hop) {
5170 			mcg_req.HopLimit = mcg_attr->mc_hop;
5171 			component_mask |= SA_MC_COMPMASK_HOPLIMIT;
5172 		}
5173 
5174 		if (mcg_attr->mc_flow) {
5175 			mcg_req.FlowLabel = mcg_attr->mc_flow;
5176 			component_mask |= SA_MC_COMPMASK_FLOWLABEL;
5177 		}
5178 
5179 		if (mcg_attr->mc_sl) {
5180 			mcg_req.SL = mcg_attr->mc_sl;
5181 			component_mask |= SA_MC_COMPMASK_SL;
5182 		}
5183 
5184 		if (mcg_attr->mc_scope) {
5185 			mcg_req.Scope = mcg_attr->mc_scope;
5186 			component_mask |= SA_MC_COMPMASK_SCOPE;
5187 		}
5188 
5189 		if (mcg_attr->mc_join_state) {
5190 			mcg_req.JoinState = mcg_attr->mc_join_state;
5191 			component_mask |= SA_MC_COMPMASK_JOINSTATE;
5192 		}
5193 
5194 		if (mcg_attr->mc_mlid) {
5195 			mcg_req.MLID = mcg_attr->mc_mlid;
5196 			component_mask |= SA_MC_COMPMASK_MLID;
5197 		}
5198 	}
5199 
5200 	retval = ibtl_cm_get_hca_port(rgid, 0, &hport);
5201 	if (retval != IBT_SUCCESS) {
5202 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: Failed to get port info "
5203 		    "from specified RGID : status = %d", retval);
5204 		return (retval);
5205 	}
5206 
5207 	/* Get SA Access Handle. */
5208 	hcap = ibcm_find_hca_entry(hport.hp_hca_guid);
5209 	if (hcap == NULL) {
5210 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: NO HCA found");
5211 		return (IBT_HCA_BUSY_DETACHING);
5212 	}
5213 
5214 	saa_handle = ibcm_get_saa_handle(hcap, hport.hp_port);
5215 	if (saa_handle == NULL) {
5216 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: saa_handle is NULL");
5217 		ibcm_dec_hca_acc_cnt(hcap);
5218 		return (IBT_HCA_PORT_NOT_ACTIVE);
5219 	}
5220 
5221 	/* Contact SA Access */
5222 	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
5223 	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
5224 	access_args.sq_component_mask = component_mask;
5225 	access_args.sq_template = &mcg_req;
5226 	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
5227 	access_args.sq_callback = NULL;
5228 	access_args.sq_callback_arg = NULL;
5229 
5230 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
5231 	    &results_p);
5232 	if (retval != IBT_SUCCESS) {
5233 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: SA access Failed");
5234 		ibcm_dec_hca_acc_cnt(hcap);
5235 		return (retval);
5236 	}
5237 
5238 	num_records = length/sizeof (sa_mcmember_record_t);
5239 
5240 	IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: Found %d MCMember Records",
5241 	    num_records);
5242 
5243 	/* Validate the returned number of records. */
5244 	if ((results_p != NULL) && (num_records > 0)) {
5245 		uint_t	i;
5246 
5247 		/*
5248 		 * If mcgs_max_num is zero, then return all records else
5249 		 * return only requested number of records
5250 		 */
5251 		if ((mcgs_max_num != 0) && (num_records > mcgs_max_num)) {
5252 			/* we are interested in only mcgs_max_num records */
5253 			num_records = mcgs_max_num;
5254 		}
5255 
5256 		/*
5257 		 * The SGID returned in "mcg_info_p" buffer should be PortGID,
5258 		 * (mcg_attr->mc_pgid), if 'mcg_attr->mc_pgid' was specified,
5259 		 * else RequestGID (rgid) should be returned.
5260 		 */
5261 		if ((port_gid.gid_prefix != 0) && (port_gid.gid_guid != 0)) {
5262 
5263 			/* Get sgid_ix and port number of 'port_gid' */
5264 			retval = ibtl_cm_get_hca_port(port_gid, 0, &hport);
5265 			if (retval != IBT_SUCCESS) {
5266 				IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: "
5267 				    "Failed to Get Portinfo for PortGID :"
5268 				    "status = %d", retval);
5269 				return (retval);
5270 			}
5271 		} else {
5272 			/*
5273 			 * The sgid_ix and port number related to RequestGID
5274 			 * are already obtained at the beginning.
5275 			 */
5276 			port_gid = rgid;
5277 		}
5278 
5279 		/*
5280 		 * Allocate memory for return buffer, to be freed in
5281 		 * ibt_free_mcg_info().
5282 		 */
5283 		mcg_infop = kmem_alloc((num_records * sizeof (ibt_mcg_info_t)),
5284 		    KM_SLEEP);
5285 
5286 		*mcgs_info_p = mcg_infop;
5287 		*entries_p = num_records;
5288 
5289 		/* Update the return values. */
5290 		for (i = 0; i < num_records; i++) {
5291 
5292 			mcg_resp = (sa_mcmember_record_t *)((uchar_t *)
5293 			    results_p + i * sizeof (sa_mcmember_record_t));
5294 
5295 			mcg_infop[i].mc_adds_vect.av_dgid = mcg_resp->MGID;
5296 			mcg_infop[i].mc_adds_vect.av_sgid = port_gid;
5297 			mcg_infop[i].mc_adds_vect.av_srate = mcg_resp->Rate;
5298 			mcg_infop[i].mc_adds_vect.av_srvl = mcg_resp->SL;
5299 			mcg_infop[i].mc_adds_vect.av_flow = mcg_resp->FlowLabel;
5300 			mcg_infop[i].mc_adds_vect.av_tclass = mcg_resp->TClass;
5301 			mcg_infop[i].mc_adds_vect.av_hop = mcg_resp->HopLimit;
5302 			mcg_infop[i].mc_adds_vect.av_port_num = hport.hp_port;
5303 			mcg_infop[i].mc_adds_vect.av_send_grh = B_TRUE;
5304 			mcg_infop[i].mc_adds_vect.av_dlid = mcg_resp->MLID;
5305 			mcg_infop[i].mc_adds_vect.av_sgid_ix = hport.hp_sgid_ix;
5306 			mcg_infop[i].mc_adds_vect.av_src_path = 0;
5307 			mcg_infop[i].mc_mtu = mcg_resp->MTU;
5308 			mcg_infop[i].mc_qkey = mcg_resp->Q_Key;
5309 			mcg_infop[i].mc_scope = mcg_resp->Scope;
5310 			mcg_infop[i].mc_pkt_lt = mcg_resp->PacketLifeTime;
5311 
5312 			if (ibt_pkey2index_byguid(hport.hp_hca_guid,
5313 			    hport.hp_port, mcg_resp->P_Key,
5314 			    &mcg_infop[i].mc_pkey_ix) != IBT_SUCCESS) {
5315 				IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: "
5316 				    "Pkey2Index Conversion failed");
5317 				mcg_infop[i].mc_pkey_ix = 0;
5318 			}
5319 		}
5320 
5321 		/*
5322 		 * Deallocate the memory allocated by SA for results_p.
5323 		 */
5324 		kmem_free(results_p, length);
5325 		retval = IBT_SUCCESS;
5326 
5327 		IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: returning %d MCGRecords",
5328 		    num_records);
5329 
5330 	} else {
5331 		retval = IBT_MCG_RECORDS_NOT_FOUND;
5332 		*entries_p = 0;
5333 
5334 		IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: MCG RECORDS NOT FOUND");
5335 	}
5336 
5337 	ibcm_dec_hca_acc_cnt(hcap);
5338 
5339 	return (retval);
5340 }
5341 
5342 
5343 /*
5344  * ibt_free_mcg_info()
5345  *	Free the memory allocated by successful ibt_query_mcg()
5346  *
5347  *	mcgs_info	Pointer returned by ibt_query_mcg().
5348  *
5349  *	entries		The number of ibt_mcg_info_t entries to free.
5350  */
5351 void
5352 ibt_free_mcg_info(ibt_mcg_info_t *mcgs_info, uint_t entries)
5353 {
5354 	IBTF_DPRINTF_L3(cmlog, "ibt_free_mcg_info: "
5355 	    "Free <%d> entries from 0x%p", entries, mcgs_info);
5356 
5357 	if ((mcgs_info != NULL) && (entries > 0))
5358 		kmem_free(mcgs_info, entries * sizeof (ibt_mcg_info_t));
5359 	else
5360 		IBTF_DPRINTF_L2(cmlog, "ibt_free_mcg_info: "
5361 		    "ERROR: NULL buf pointer or length specified.");
5362 }
5363 
5364 
5365 /*
5366  * Function:
5367  *	ibt_gid_to_node_info()
5368  * Input:
5369  *	gid		Identifies the IB Node and port for which to obtain
5370  *			Node information.
5371  * Output:
5372  *	node_info_p	A pointer to an ibt_node_info_t structure (allocated
5373  *			by the caller) in which to return the node information.
5374  * Returns:
5375  *	IBT_SUCCESS
5376  *	IBT_INVALID_PARAM
5377  *	IBT_NODE_RECORDS_NOT_FOUND
5378  *	IBT_NO_HCAS_AVAILABLE
5379  * Description:
5380  *	Retrieve Node Information for the specified GID.
5381  */
5382 ibt_status_t
5383 ibt_gid_to_node_info(ib_gid_t gid, ibt_node_info_t *node_info_p)
5384 {
5385 	sa_node_record_t	nr_req, *nr_resp;
5386 	ibmf_saa_handle_t	saa_handle;
5387 	ibt_status_t		retval;
5388 	ibcm_hca_info_t		*hcap;
5389 	ibtl_cm_hca_port_t	hport;
5390 	int			i, j;
5391 	uint_t			num_rec;
5392 	ib_guid_t		*guid_array = NULL;
5393 	sa_path_record_t	*path;
5394 	size_t			len;
5395 	uint8_t			npaths;
5396 	uint32_t		num_hcas = 0;
5397 	ib_lid_t		node_lid;
5398 	boolean_t		local_node = B_FALSE;
5399 	void			*res_p;
5400 	uint8_t			num_ports = 0;
5401 
5402 
5403 	IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info(%llX:%llX, %p)",
5404 	    gid.gid_prefix, gid.gid_guid, node_info_p);
5405 
5406 	if ((gid.gid_prefix == 0) || (gid.gid_guid == 0)) {
5407 		IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: GID is required");
5408 		return (IBT_INVALID_PARAM);
5409 	}
5410 
5411 	if (node_info_p == NULL) {
5412 		IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5413 		    "Return Buf (node_info_p) is NULL.");
5414 		return (IBT_INVALID_PARAM);
5415 	}
5416 
5417 	/*
5418 	 * If 'gid' is on local node, then get node lid (i.e. base lid of the
5419 	 * associated port) info via ibtl_cm_get_hca_port() call.
5420 	 */
5421 	bzero(&hport, sizeof (ibtl_cm_hca_port_t));
5422 	if (ibtl_cm_get_hca_port(gid, 0, &hport) == IBT_SUCCESS) {
5423 
5424 		hcap = ibcm_find_hca_entry(hport.hp_hca_guid);
5425 		if (hcap == NULL) {
5426 			IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5427 			    "HCA(%llX) info not found", hport.hp_hca_guid);
5428 			return (IBT_NO_HCAS_AVAILABLE);
5429 		}
5430 		num_ports = 1;
5431 		num_hcas = 1;
5432 		node_lid = hport.hp_base_lid;
5433 		local_node = B_TRUE;
5434 		IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info: Local Node: "
5435 		    "LID = 0x%X", node_lid);
5436 	} else {
5437 		/* Get the number of HCAs and their GUIDs */
5438 		num_hcas = ibt_get_hca_list(&guid_array);
5439 		IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info: ibt_get_hca_list "
5440 		    "returned %d hcas", num_hcas);
5441 
5442 		if (num_hcas == 0) {
5443 			IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5444 			    "NO HCA's Found on this system");
5445 			return (IBT_NO_HCAS_AVAILABLE);
5446 		}
5447 	}
5448 
5449 	for (i = 0; i < num_hcas; i++) {
5450 		if (local_node == B_FALSE) {
5451 			hcap = ibcm_find_hca_entry(guid_array[i]);
5452 			if (hcap == NULL) {
5453 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5454 				    "HCA(%llX) info not found", guid_array[i]);
5455 				retval = IBT_NO_HCAS_AVAILABLE;
5456 				continue;
5457 			}
5458 			num_ports = hcap->hca_num_ports;
5459 		}
5460 
5461 		for (j = 0; j < num_ports; j++) {
5462 			uint8_t		port = 0;
5463 
5464 			if (local_node == B_TRUE)
5465 				port = hport.hp_port;
5466 			else
5467 				port = j + 1;
5468 
5469 			/* Get SA Access Handle. */
5470 			saa_handle = ibcm_get_saa_handle(hcap, port);
5471 			if (saa_handle == NULL) {
5472 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5473 				    "Port %d of HCA (%llX) is NOT ACTIVE",
5474 				    port, hport.hp_hca_guid);
5475 				retval = IBT_NODE_RECORDS_NOT_FOUND;
5476 				continue;
5477 			}
5478 
5479 			if (local_node == B_FALSE) {
5480 				ib_gid_t	sgid;
5481 				int		sa_ret;
5482 
5483 				/*
5484 				 * Check whether 'gid' and this port has same
5485 				 * subnet prefix. If not, then there is no use
5486 				 * in searching from this port.
5487 				 */
5488 				sgid = hcap->hca_port_info[j].port_sgid0;
5489 				if (gid.gid_prefix != sgid.gid_prefix) {
5490 					IBTF_DPRINTF_L3(cmlog,
5491 					    "ibt_gid_to_node_info:Sn_Prefix of "
5492 					    "GID(%llX) and Port's(%llX) differ",
5493 					    gid.gid_prefix, sgid.gid_prefix);
5494 					retval = IBT_NODE_RECORDS_NOT_FOUND;
5495 					continue;
5496 				}
5497 
5498 				/*
5499 				 * First Get Path Records for the specified DGID
5500 				 * from this port (SGID). From Path Records,
5501 				 * note down DLID, then use this DLID as Input
5502 				 * attribute to get NodeRecords from SA Access.
5503 				 */
5504 				npaths = 1;
5505 				path = NULL;
5506 
5507 				sa_ret = ibmf_saa_gid_to_pathrecords(saa_handle,
5508 				    sgid, gid, 0, 0, B_TRUE, &npaths, 0, &len,
5509 				    &path);
5510 				if (sa_ret != IBMF_SUCCESS) {
5511 					IBTF_DPRINTF_L2(cmlog,
5512 					    "ibt_gid_to_node_info: "
5513 					    "ibmf_saa_gid_to_pathrecords() "
5514 					    "returned error: %d ", sa_ret);
5515 					retval =
5516 					    ibcm_ibmf_analyze_error(sa_ret);
5517 					continue;
5518 				} else if ((npaths == 0) || (path == NULL)) {
5519 					IBTF_DPRINTF_L3(cmlog,
5520 					    "ibt_gid_to_node_info: failed (%d) "
5521 					    "to get path records for the DGID "
5522 					    "0x%llX from SGID 0x%llX", sa_ret,
5523 					    gid.gid_guid, sgid.gid_guid);
5524 					retval = IBT_NODE_RECORDS_NOT_FOUND;
5525 					continue;
5526 				}
5527 				node_lid = path->DLID;	/* LID */
5528 
5529 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5530 				    "Remote Node: LID = 0x%X", node_lid);
5531 
5532 				/* Free SA_Access memory for path record. */
5533 				kmem_free(path, len);
5534 			}
5535 
5536 			/* Retrieve Node Records from SA Access. */
5537 			bzero(&nr_req, sizeof (sa_node_record_t));
5538 
5539 			nr_req.LID = node_lid;	/* LID */
5540 
5541 			retval = ibcm_get_node_rec(saa_handle, &nr_req,
5542 			    SA_NODEINFO_COMPMASK_NODELID, &res_p, &len);
5543 			if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
5544 				IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5545 				    "failed (%d) to get Node records", retval);
5546 				continue;
5547 			} else if (retval != IBT_SUCCESS) {
5548 				IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5549 				    "failed (%d) to get Node records", retval);
5550 				ibcm_dec_hca_acc_cnt(hcap);
5551 				goto gid_to_ni_exit;
5552 			}
5553 
5554 			num_rec = len/sizeof (sa_node_record_t);
5555 			nr_resp = (sa_node_record_t *)(uchar_t *)res_p;
5556 
5557 			/* Validate the returned number of records. */
5558 			if ((nr_resp != NULL) && (num_rec > 0)) {
5559 
5560 				IBCM_DUMP_NODE_REC(nr_resp);
5561 
5562 				_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(
5563 				    *node_info_p))
5564 
5565 				node_info_p->n_sys_img_guid =
5566 				    nr_resp->NodeInfo.SystemImageGUID;
5567 				node_info_p->n_node_guid =
5568 				    nr_resp->NodeInfo.NodeGUID;
5569 				node_info_p->n_port_guid =
5570 				    nr_resp->NodeInfo.PortGUID;
5571 				node_info_p->n_dev_id =
5572 				    nr_resp->NodeInfo.DeviceID;
5573 				node_info_p->n_revision =
5574 				    nr_resp->NodeInfo.Revision;
5575 				node_info_p->n_vendor_id =
5576 				    nr_resp->NodeInfo.VendorID;
5577 				node_info_p->n_num_ports =
5578 				    nr_resp->NodeInfo.NumPorts;
5579 				node_info_p->n_port_num =
5580 				    nr_resp->NodeInfo.LocalPortNum;
5581 				node_info_p->n_node_type =
5582 				    nr_resp->NodeInfo.NodeType;
5583 				(void) strncpy(node_info_p->n_description,
5584 				    (char *)&nr_resp->NodeDescription, 64);
5585 
5586 				_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(
5587 				    *node_info_p))
5588 
5589 				/*
5590 				 * Deallocate the memory allocated by SA for
5591 				 * 'nr_resp'.
5592 				 */
5593 				ibcm_dec_hca_acc_cnt(hcap);
5594 				kmem_free(nr_resp, len);
5595 				retval = IBT_SUCCESS;
5596 
5597 				goto gid_to_ni_exit;
5598 			} else {
5599 				retval = IBT_NODE_RECORDS_NOT_FOUND;
5600 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5601 				    "Node Records NOT found - PortGUID %016llX",
5602 				    gid.gid_guid);
5603 			}
5604 		}
5605 		ibcm_dec_hca_acc_cnt(hcap);
5606 
5607 		if (local_node == B_TRUE)
5608 			break;
5609 	}
5610 
5611 gid_to_ni_exit:
5612 	if (guid_array)
5613 		ibt_free_hca_list(guid_array, num_hcas);
5614 
5615 	IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: done. Status %d", retval);
5616 
5617 	return (retval);
5618 }
5619 
5620 
5621 static ibt_status_t
5622 ibcm_get_node_rec(ibmf_saa_handle_t saa_handle, sa_node_record_t *nr_req,
5623     uint64_t component_mask, void *result_p, size_t *len)
5624 {
5625 	ibmf_saa_access_args_t  args;
5626 	size_t			length;
5627 	ibt_status_t		retval;
5628 
5629 	args.sq_attr_id = SA_NODERECORD_ATTRID;
5630 	args.sq_template = nr_req;
5631 	args.sq_access_type = IBMF_SAA_RETRIEVE;
5632 	args.sq_template_length = sizeof (sa_node_record_t);
5633 	args.sq_component_mask = component_mask;
5634 	args.sq_callback = NULL;
5635 	args.sq_callback_arg = NULL;
5636 
5637 	retval = ibcm_contact_sa_access(saa_handle, &args, &length, result_p);
5638 	if (retval != IBT_SUCCESS) {
5639 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_node_rec: SA Call Failed");
5640 		return (retval);
5641 	}
5642 
5643 	*len = length;
5644 
5645 	/* Validate the returned number of records. */
5646 	if ((result_p != NULL) && (length > 0)) {
5647 		IBTF_DPRINTF_L3(cmlog, "ibcm_get_node_rec: Node Records FOUND");
5648 
5649 		/* Got it, done!. */
5650 		return (IBT_SUCCESS);
5651 	} else {
5652 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_node_rec: Node Rec NOT found");
5653 		return (IBT_NODE_RECORDS_NOT_FOUND);
5654 	}
5655 }
5656 
5657 
5658 /*
5659  * Function:
5660  *	ibt_get_companion_port_gids()
5661  * Description:
5662  *	Get list of GID's available on a companion port(s) of the specified
5663  *	GID or list of GIDs available on a specified Node GUID/SystemImage GUID.
5664  */
5665 ibt_status_t
5666 ibt_get_companion_port_gids(ib_gid_t gid, ib_guid_t hca_guid,
5667     ib_guid_t sysimg_guid, ib_gid_t **gids_p, uint_t *num_gids_p)
5668 {
5669 	sa_node_record_t	nr_req, *nr_resp;
5670 	void			*res_p;
5671 	ibmf_saa_handle_t	saa_handle;
5672 	int			sa_ret;
5673 	ibt_status_t		retval = IBT_SUCCESS;
5674 	ibcm_hca_info_t		*hcap;
5675 	ibtl_cm_hca_port_t	hport;
5676 	int			i, j;
5677 	uint_t			num_rec;
5678 	ib_guid_t		*guid_array = NULL;
5679 	sa_path_record_t	*path;
5680 	size_t			len;
5681 	uint8_t			npaths;
5682 	uint32_t		num_hcas = 0;
5683 	boolean_t		local_node = B_FALSE;
5684 	boolean_t		local_hca = B_FALSE;
5685 	ib_guid_t		h_guid = hca_guid;
5686 	ib_gid_t		*gidp = NULL, *t_gidp = NULL;
5687 	int			multi_hca_loop = 0;
5688 
5689 	IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids(%llX:%llX, %llX, "
5690 	    "%llX)", gid.gid_prefix, gid.gid_guid, hca_guid, sysimg_guid);
5691 
5692 	if (((gid.gid_prefix == 0) || (gid.gid_guid == 0)) && (hca_guid == 0) &&
5693 	    (sysimg_guid == 0)) {
5694 		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
5695 		    "Null Input attribute specified.");
5696 		return (IBT_INVALID_PARAM);
5697 	}
5698 
5699 	if ((num_gids_p == NULL) || (gids_p == NULL)) {
5700 		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
5701 		    "num_gids_p or gids_p is NULL");
5702 		return (IBT_INVALID_PARAM);
5703 	}
5704 
5705 	*num_gids_p = 0;
5706 
5707 	/* Get the number of HCAs and their GUIDs */
5708 	if ((num_hcas = ibt_get_hca_list(&guid_array)) == 0) {
5709 		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
5710 		    "NO HCA's Found on this system");
5711 		return (IBT_NO_HCAS_AVAILABLE);
5712 	}
5713 
5714 	IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids: "
5715 	    "ibt_get_hca_list() returned %d hcas", num_hcas);
5716 
5717 	/*
5718 	 * If 'gid' is on local node, then get node lid (i.e. base lid of the
5719 	 * associated port) info via ibtl_cm_get_hca_port() call.
5720 	 */
5721 	bzero(&hport, sizeof (ibtl_cm_hca_port_t));
5722 	if ((gid.gid_prefix != 0) && (gid.gid_guid != 0) &&
5723 	    (ibtl_cm_get_hca_port(gid, 0, &hport) == IBT_SUCCESS)) {
5724 
5725 		if ((hca_guid != 0) && (hca_guid != hport.hp_hca_guid)) {
5726 			IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
5727 			    "Invalid GID<->HCAGUID combination specified.");
5728 			retval = IBT_INVALID_PARAM;
5729 			goto get_comp_pgid_exit;
5730 		}
5731 		h_guid = hport.hp_hca_guid;
5732 		local_node = B_TRUE;
5733 
5734 		IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids: "
5735 		    "Local Node: HCA (0x%llX)", h_guid);
5736 	} else if (h_guid) {	/* Is specified HCA GUID - local? */
5737 		for (i = 0; i < num_hcas; i++) {
5738 			if (h_guid == guid_array[i]) {
5739 				local_hca = B_TRUE;
5740 				break;
5741 			}
5742 		}
5743 	} else if (sysimg_guid) { /* Is specified SystemImage GUID - local? */
5744 		for (i = 0; i < num_hcas; i++) {
5745 			ibt_status_t	ret;
5746 			ibt_hca_attr_t	hca_attr;
5747 
5748 			ret = ibt_query_hca_byguid(guid_array[i], &hca_attr);
5749 			if (ret != IBT_SUCCESS) {
5750 				IBTF_DPRINTF_L2(cmlog,
5751 				    "ibt_get_companion_port_gids: HCA(%llX) "
5752 				    "info not found", guid_array[i]);
5753 				retval = IBT_NO_HCAS_AVAILABLE;
5754 				continue;
5755 			}
5756 			if (hca_attr.hca_si_guid == sysimg_guid) {
5757 				if ((hca_guid != 0) &&
5758 				    (hca_guid != hca_attr.hca_node_guid)) {
5759 					IBTF_DPRINTF_L2(cmlog,
5760 					    "ibt_get_companion_port_gids: "
5761 					    "Invalid SysImg<->HCA GUID "
5762 					    "combination specified.");
5763 					retval = IBT_INVALID_PARAM;
5764 					goto get_comp_pgid_exit;
5765 				}
5766 				local_hca = B_TRUE;
5767 				h_guid = hca_attr.hca_node_guid;
5768 				break;
5769 			}
5770 		}
5771 	}
5772 
5773 	if ((local_node == B_TRUE) || (local_hca == B_TRUE)) {
5774 		retval = ibtl_cm_get_local_comp_gids(h_guid, gid, gids_p,
5775 		    num_gids_p);
5776 		goto get_comp_pgid_exit;
5777 	}
5778 
5779 get_comp_for_multihca:
5780 	/* We will be here, if request is for remote node */
5781 	for (i = 0; i < num_hcas; i++) {
5782 		int		multism;
5783 		uint8_t		count = 0;
5784 		int		multi_sm_loop = 0;
5785 		uint_t		k = 0, l;
5786 
5787 		hcap = ibcm_find_hca_entry(guid_array[i]);
5788 		if (hcap == NULL) {
5789 			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
5790 			    "HCA(%llX) info not found", guid_array[i]);
5791 			retval = IBT_NO_HCAS_AVAILABLE;
5792 			continue;
5793 		}
5794 
5795 		/* 1 - MultiSM, 0 - Single SM */
5796 		multism = ibtl_cm_is_multi_sm(guid_array[i]);
5797 
5798 		for (j = 0; j < hcap->hca_num_ports; j++) {
5799 			ib_gid_t	sgid;
5800 			uint64_t	c_mask = 0;
5801 			ib_guid_t	pg;
5802 			uint_t		port = j;
5803 
5804 get_comp_for_multism:
5805 			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
5806 			    "Port %d, HCA %llX, MultiSM= %d, Loop=%d",
5807 			    port + 1, h_guid, multism, multi_sm_loop);
5808 
5809 			/* Get SA Access Handle. */
5810 			saa_handle = ibcm_get_saa_handle(hcap, port + 1);
5811 			if (saa_handle == NULL) {
5812 				IBTF_DPRINTF_L2(cmlog,
5813 				    "ibt_get_companion_port_gids: "
5814 				    "Port (%d)  - NOT ACTIVE", port + 1);
5815 				retval = IBT_GIDS_NOT_FOUND;
5816 				continue;
5817 			}
5818 
5819 			/*
5820 			 * Check whether 'gid' and this port has same subnet
5821 			 * prefix. If not, then there is no use in searching
5822 			 * from this port.
5823 			 */
5824 			sgid = hcap->hca_port_info[port].port_sgid0;
5825 			if ((h_guid == 0) && (gid.gid_prefix != 0) &&
5826 			    (multi_sm_loop == 0) &&
5827 			    (gid.gid_prefix != sgid.gid_prefix)) {
5828 				IBTF_DPRINTF_L2(cmlog,
5829 				    "ibt_get_companion_port_gids: SnPrefix of "
5830 				    "GID(%llX) and Port SN_Pfx(%llX) differ",
5831 				    gid.gid_prefix, sgid.gid_prefix);
5832 				retval = IBT_GIDS_NOT_FOUND;
5833 				continue;
5834 			}
5835 
5836 			/*
5837 			 * If HCA GUID or System Image GUID is specified, then
5838 			 * we can achieve our goal sooner!.
5839 			 */
5840 			if ((h_guid == 0) && (sysimg_guid == 0)) {
5841 				/* So only GID info is provided. */
5842 
5843 				/*
5844 				 * First Get Path Records for the specified DGID
5845 				 * from this port (SGID). From Path Records,
5846 				 * note down DLID, then use this DLID as Input
5847 				 * attribute to get NodeRecords.
5848 				 */
5849 				npaths = 1;
5850 				path = NULL;
5851 
5852 				sa_ret = ibmf_saa_gid_to_pathrecords(saa_handle,
5853 				    sgid, gid, 0, 0, B_TRUE, &npaths, 0, &len,
5854 				    &path);
5855 				if (sa_ret != IBMF_SUCCESS) {
5856 					IBTF_DPRINTF_L2(cmlog,
5857 					    "ibt_get_companion_port_gids: "
5858 					    "ibmf_saa_gid_to_pathrecords() "
5859 					    "returned error: %d ", sa_ret);
5860 					retval =
5861 					    ibcm_ibmf_analyze_error(sa_ret);
5862 					ibcm_dec_hca_acc_cnt(hcap);
5863 					goto get_comp_pgid_exit;
5864 				} else if ((npaths == 0) || (path == NULL)) {
5865 					IBTF_DPRINTF_L2(cmlog,
5866 					    "ibt_get_companion_port_gids: "
5867 					    "failed (%d) to get path records "
5868 					    "for the DGID (0x%llX) from SGID "
5869 					    "(0x%llX)", sa_ret, gid.gid_guid,
5870 					    sgid.gid_guid);
5871 					retval = IBT_GIDS_NOT_FOUND;
5872 					continue;
5873 				}
5874 
5875 				bzero(&nr_req, sizeof (sa_node_record_t));
5876 				nr_req.LID = path->DLID;	/* LID */
5877 
5878 				IBTF_DPRINTF_L3(cmlog,
5879 				    "ibt_get_companion_port_gids: "
5880 				    "Remote Node: LID = 0x%X", nr_req.LID);
5881 
5882 				/* Free SA_Access memory for path record. */
5883 				kmem_free(path, len);
5884 
5885 				IBTF_DPRINTF_L3(cmlog,
5886 				    "ibt_get_companion_port_gids: SAA Call: "
5887 				    "based on LID ");
5888 
5889 				retval = ibcm_get_node_rec(saa_handle, &nr_req,
5890 				    SA_NODEINFO_COMPMASK_NODELID, &res_p, &len);
5891 				if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
5892 					IBTF_DPRINTF_L2(cmlog,
5893 					    "ibt_get_companion_port_gids: "
5894 					    "failed (%d) to get Node records",
5895 					    retval);
5896 					continue;
5897 				} else if (retval != IBT_SUCCESS) {
5898 					IBTF_DPRINTF_L2(cmlog,
5899 					    "ibt_get_companion_port_gids: "
5900 					    "failed (%d) to get Node records",
5901 					    retval);
5902 					ibcm_dec_hca_acc_cnt(hcap);
5903 					goto get_comp_pgid_exit;
5904 				}
5905 
5906 				nr_resp = (sa_node_record_t *)(uchar_t *)res_p;
5907 				/* Note down HCA GUID info. */
5908 				h_guid = nr_resp->NodeInfo.NodeGUID;
5909 
5910 				IBTF_DPRINTF_L3(cmlog,
5911 				    "ibt_get_companion_port_gids: "
5912 				    "Remote HCA GUID: 0x%llX", h_guid);
5913 
5914 				IBCM_DUMP_NODE_REC(nr_resp);
5915 
5916 				kmem_free(res_p, len);
5917 			}
5918 
5919 			bzero(&nr_req, sizeof (sa_node_record_t));
5920 			if (h_guid != 0) {
5921 				nr_req.NodeInfo.NodeGUID = h_guid;
5922 				c_mask = SA_NODEINFO_COMPMASK_NODEGUID;
5923 			}
5924 
5925 			if (sysimg_guid != 0) {
5926 				nr_req.NodeInfo.SystemImageGUID = sysimg_guid;
5927 				c_mask |= SA_NODEINFO_COMPMASK_SYSIMAGEGUID;
5928 			}
5929 
5930 			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
5931 			    "SAA Call: CMASK= 0x%llX", c_mask);
5932 
5933 			retval = ibcm_get_node_rec(saa_handle, &nr_req, c_mask,
5934 			    &res_p, &len);
5935 			if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
5936 				IBTF_DPRINTF_L3(cmlog,
5937 				    "ibt_get_companion_port_gids: "
5938 				    "failed (%d) to get Node records", retval);
5939 				continue;
5940 			} else if (retval != IBT_SUCCESS) {
5941 				IBTF_DPRINTF_L2(cmlog,
5942 				    "ibt_get_companion_port_gids: Error: (%d) "
5943 				    "while getting Node records", retval);
5944 				ibcm_dec_hca_acc_cnt(hcap);
5945 				goto get_comp_pgid_exit;
5946 			}
5947 
5948 			num_rec = len/sizeof (sa_node_record_t);
5949 
5950 			/* We will be here, only if we found some NodeRec */
5951 			if (gid.gid_prefix && gid.gid_guid) {
5952 				nr_resp = (sa_node_record_t *)res_p;
5953 				for (l = 0; l < num_rec; l++, nr_resp++) {
5954 					pg = nr_resp->NodeInfo.PortGUID;
5955 					if (gid.gid_guid != pg)
5956 						count++;
5957 				}
5958 			} else {
5959 				count = num_rec;
5960 			}
5961 
5962 			if (count != 0) {
5963 				if (multi_sm_loop == 1) {
5964 					count += k;
5965 					t_gidp = kmem_zalloc(count *
5966 					    sizeof (ib_gid_t), KM_SLEEP);
5967 
5968 					if ((k != 0) && (gidp != NULL)) {
5969 						bcopy(gidp, t_gidp,
5970 						    k * sizeof (ib_gid_t));
5971 						kmem_free(gidp,
5972 						    k * sizeof (ib_gid_t));
5973 					}
5974 					gidp = t_gidp;
5975 				} else {
5976 					gidp = kmem_zalloc(count *
5977 					    sizeof (ib_gid_t), KM_SLEEP);
5978 				}
5979 				*num_gids_p = count;
5980 				*gids_p = gidp;
5981 
5982 				nr_resp = (sa_node_record_t *)res_p;
5983 				for (l = 0; l < num_rec; l++, nr_resp++) {
5984 					IBCM_DUMP_NODE_REC(nr_resp);
5985 
5986 					pg = nr_resp->NodeInfo.PortGUID;
5987 					IBTF_DPRINTF_L4(cmlog,
5988 					    "ibt_get_companion_port_gids: "
5989 					    "PortGID %llX", pg);
5990 
5991 					if (pg != gid.gid_guid) {
5992 						gidp[k].gid_prefix =
5993 						    sgid.gid_prefix;
5994 						gidp[k].gid_guid = pg;
5995 
5996 						IBTF_DPRINTF_L3(cmlog,
5997 						    "ibt_get_companion_pgids: "
5998 						    "GID[%d] = %llX:%llX", k,
5999 						    gidp[k].gid_prefix,
6000 						    gidp[k].gid_guid);
6001 
6002 						k++;
6003 						if (k == count)
6004 							break;
6005 					}
6006 				}
6007 				retval = IBT_SUCCESS;	/* done!. */
6008 				kmem_free(res_p, len);
6009 				ibcm_dec_hca_acc_cnt(hcap);
6010 				goto get_comp_pgid_exit;
6011 			} else {
6012 				IBTF_DPRINTF_L2(cmlog,
6013 				    "ibt_get_companion_port_gids: "
6014 				    "Companion PortGIDs not available");
6015 				retval = IBT_GIDS_NOT_FOUND;
6016 			}
6017 			/* Deallocate the memory for 'res_p'. */
6018 			kmem_free(res_p, len);
6019 
6020 			/*
6021 			 * If we are on MultiSM setup, then we need to lookout
6022 			 * from that subnet port too.
6023 			 */
6024 			if (multism) {
6025 				/* break if already searched both the subnet */
6026 				if (multi_sm_loop == 1)
6027 					break;
6028 
6029 				port = (j == 0) ? 1 : 0;
6030 				multi_sm_loop = 1;
6031 				goto get_comp_for_multism;
6032 			} else {
6033 				break;
6034 			}
6035 		}
6036 		ibcm_dec_hca_acc_cnt(hcap);
6037 
6038 		/*
6039 		 * We may be on dual HCA with dual SM configured system.  And
6040 		 * the input attr GID was visible from second HCA. So in order
6041 		 * to get the companion portgid we need to re-look from the
6042 		 * first HCA ports.
6043 		 */
6044 		if ((num_hcas > 1) && (i > 0) && (h_guid != 0) &&
6045 		    (multi_hca_loop != 1)) {
6046 			multi_hca_loop = 1;
6047 			goto get_comp_for_multihca;
6048 		}
6049 	}
6050 	if (*num_gids_p == 0)
6051 		retval = IBT_GIDS_NOT_FOUND;
6052 
6053 get_comp_pgid_exit:
6054 	if (guid_array)
6055 		ibt_free_hca_list(guid_array, num_hcas);
6056 
6057 	if ((retval != IBT_SUCCESS) && (*num_gids_p != 0)) {
6058 		retval = IBT_SUCCESS;
6059 	}
6060 
6061 	IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: done. Status %d, "
6062 	    "Found %d GIDs", retval, *num_gids_p);
6063 
6064 	return (retval);
6065 }
6066 
6067 /* RDMA IP CM Support routines */
6068 ibt_status_t
6069 ibt_get_src_ip(ib_gid_t gid, ib_pkey_t pkey, ibt_ip_addr_t *src_ip)
6070 {
6071 	ibcm_arp_ip_t		*ipp;
6072 	ibcm_arp_ibd_insts_t	ibds;
6073 	int			i;
6074 	boolean_t		found = B_FALSE;
6075 	ibt_status_t		retval = IBT_SUCCESS;
6076 
6077 	IBTF_DPRINTF_L4(cmlog, "ibt_get_src_ip(%llX:%llX, %X, %p)",
6078 	    gid.gid_prefix, gid.gid_guid, pkey, src_ip);
6079 
6080 	if (gid.gid_prefix == 0 || gid.gid_guid == 0) {
6081 		IBTF_DPRINTF_L3(cmlog, "ibt_get_src_ip: Invalid GID.");
6082 		return (IBT_INVALID_PARAM);
6083 	}
6084 
6085 	if (src_ip == NULL) {
6086 		IBTF_DPRINTF_L3(cmlog, "ibt_get_src_ip: ERROR: src_ip NULL");
6087 		return (IBT_INVALID_PARAM);
6088 	}
6089 
6090 	bzero(&ibds, sizeof (ibcm_arp_ibd_insts_t));
6091 	ibds.ibcm_arp_ibd_alloc = IBCM_ARP_IBD_INSTANCES;
6092 	ibds.ibcm_arp_ibd_cnt = 0;
6093 	ibds.ibcm_arp_ip = (ibcm_arp_ip_t *)kmem_zalloc(
6094 	    ibds.ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t), KM_SLEEP);
6095 
6096 	retval = ibcm_arp_get_ibds(&ibds);
6097 	if (retval != IBT_SUCCESS) {
6098 		IBTF_DPRINTF_L2(cmlog, "ibt_get_src_ip: ibcm_arp_get_ibds "
6099 		    "failed to get IBD Instances: ret 0x%x", retval);
6100 		goto get_src_ip_end;
6101 	}
6102 
6103 	for (i = 0, ipp = ibds.ibcm_arp_ip; i < ibds.ibcm_arp_ibd_cnt;
6104 	    i++, ipp++) {
6105 		if (ipp->ip_port_gid.gid_prefix == gid.gid_prefix &&
6106 		    ipp->ip_port_gid.gid_guid == gid.gid_guid) {
6107 			if (pkey) {
6108 				if (ipp->ip_pkey == pkey) {
6109 					found = B_TRUE;
6110 					break;
6111 				} else
6112 					continue;
6113 			}
6114 			found = B_TRUE;
6115 			break;
6116 		}
6117 	}
6118 
6119 	if (found == B_FALSE) {
6120 		retval = IBT_SRC_IP_NOT_FOUND;
6121 	} else {
6122 		src_ip->family = ipp->ip_inet_family;
6123 		if (src_ip->family == AF_INET) {
6124 			bcopy(&ipp->ip_cm_sin.sin_addr, &src_ip->un.ip4addr,
6125 			    sizeof (in_addr_t));
6126 			IBTF_DPRINTF_L4(cmlog, "ibt_get_src_ip: Got %lX",
6127 			    src_ip->un.ip4addr);
6128 		} else if (src_ip->family == AF_INET6) {
6129 			bcopy(&ipp->ip_cm_sin6.sin6_addr, &src_ip->un.ip6addr,
6130 			    sizeof (in6_addr_t));
6131 		}
6132 	}
6133 
6134 get_src_ip_end:
6135 	if (ibds.ibcm_arp_ip)
6136 		kmem_free(ibds.ibcm_arp_ip,
6137 		    ibds.ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t));
6138 
6139 	return (retval);
6140 }
6141 
6142 ib_svc_id_t
6143 ibt_get_ip_sid(uint8_t protocol_num, in_port_t dst_port)
6144 {
6145 	ib_svc_id_t	sid;
6146 
6147 	IBTF_DPRINTF_L4(cmlog, "ibt_get_ip_sid(%X, %lX)", protocol_num,
6148 	    dst_port);
6149 
6150 	/*
6151 	 * If protocol_num is non-zero, then formulate the SID and return it.
6152 	 * If protocol_num is zero, then we need to assign a locally generated
6153 	 * IP SID with IB_SID_IPADDR_PREFIX.
6154 	 */
6155 	if (protocol_num) {
6156 		sid = IB_SID_IPADDR_PREFIX | protocol_num << 16 | dst_port;
6157 	} else {
6158 		sid = ibcm_alloc_ip_sid();
6159 	}
6160 
6161 	IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_sid: SID: 0x%016llX", sid);
6162 	return (sid);
6163 }
6164 
6165 ibt_status_t
6166 ibt_release_ip_sid(ib_svc_id_t ip_sid)
6167 {
6168 	IBTF_DPRINTF_L4(cmlog, "ibt_release_ip_sid(%llX)", ip_sid);
6169 
6170 	if (((ip_sid & IB_SID_IPADDR_PREFIX_MASK) != 0) ||
6171 	    (!(ip_sid & IB_SID_IPADDR_PREFIX))) {
6172 		IBTF_DPRINTF_L2(cmlog, "ibt_release_ip_sid(0x%016llX): ERROR: "
6173 		    "Called for Non-RDMA IP SID", ip_sid);
6174 		return (IBT_INVALID_PARAM);
6175 	}
6176 
6177 	/*
6178 	 * If protocol_num in ip_sid are all ZEROs, then this SID is allocated
6179 	 * by IBTF. If not, then the specified ip_sid is invalid.
6180 	 */
6181 	if (ip_sid & IB_SID_IPADDR_IPNUM_MASK) {
6182 		IBTF_DPRINTF_L2(cmlog, "ibt_release_ip_sid(0x%016llX): ERROR: "
6183 		    "Called for Non-IBTF assigned RDMA IP SID", ip_sid);
6184 		return (IBT_INVALID_PARAM);
6185 	}
6186 
6187 	ibcm_free_ip_sid(ip_sid);
6188 
6189 	return (IBT_SUCCESS);
6190 }
6191 
6192 
6193 uint8_t
6194 ibt_get_ip_protocol_num(ib_svc_id_t sid)
6195 {
6196 	return ((sid & IB_SID_IPADDR_IPNUM_MASK) >> 16);
6197 }
6198 
6199 in_port_t
6200 ibt_get_ip_dst_port(ib_svc_id_t sid)
6201 {
6202 	return (sid & IB_SID_IPADDR_PORTNUM_MASK);
6203 }
6204 
6205 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibt_ip_cm_info_t))
6206 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_ip_pvtdata_t))
6207 
6208 ibt_status_t
6209 ibt_format_ip_private_data(ibt_ip_cm_info_t *ip_cm_info,
6210     ibt_priv_data_len_t priv_data_len, void *priv_data_p)
6211 {
6212 	ibcm_ip_pvtdata_t	*ip_data;
6213 
6214 	IBTF_DPRINTF_L4(cmlog, "ibt_format_ip_private_data(%p, %d, %p)",
6215 	    ip_cm_info, priv_data_len, priv_data_p);
6216 
6217 	if ((ip_cm_info == NULL) || (priv_data_p == NULL) ||
6218 	    (priv_data_len < IBT_IP_HDR_PRIV_DATA_SZ)) {
6219 		IBTF_DPRINTF_L2(cmlog, "ibt_format_ip_private_data: ERROR "
6220 		    "Invalid Inputs.");
6221 		return (IBT_INVALID_PARAM);
6222 	}
6223 
6224 	/* bzero'ing just IP_HDR part */
6225 	bzero(priv_data_p, IBT_IP_HDR_PRIV_DATA_SZ);
6226 	ip_data = (ibcm_ip_pvtdata_t *)priv_data_p;
6227 	ip_data->ip_srcport = b2h16(ip_cm_info->src_port); /* Source Port */
6228 
6229 	/* IPV = 0x4, if IP-Addr are IPv4 format, else 0x6 for IPv6 */
6230 	if (ip_cm_info->src_addr.family == AF_INET) {
6231 		ip_data->ip_ipv = IBT_CM_IP_IPV_V4;
6232 		ip_data->ip_srcv4 = ntohl(ip_cm_info->src_addr.un.ip4addr);
6233 		ip_data->ip_dstv4 = ntohl(ip_cm_info->dst_addr.un.ip4addr);
6234 	} else if (ip_cm_info->src_addr.family == AF_INET6) {
6235 		ip_data->ip_ipv = IBT_CM_IP_IPV_V6;
6236 		bcopy(&ip_cm_info->src_addr.un.ip6addr,
6237 		    &ip_data->ip_srcv6, sizeof (in6_addr_t));
6238 		bcopy(&ip_cm_info->dst_addr.un.ip6addr,
6239 		    &ip_data->ip_dstv6, sizeof (in6_addr_t));
6240 	} else {
6241 		IBTF_DPRINTF_L2(cmlog, "ibt_format_ip_private_data: ERROR "
6242 		    "IP Addr needs to be either AF_INET or AF_INET6 family.");
6243 		return (IBT_INVALID_PARAM);
6244 	}
6245 
6246 	ip_data->ip_MajV = IBT_CM_IP_MAJ_VER;
6247 	ip_data->ip_MinV = IBT_CM_IP_MIN_VER;
6248 
6249 	return (IBT_SUCCESS);
6250 }
6251 
6252 
6253 ibt_status_t
6254 ibt_get_ip_data(ibt_priv_data_len_t priv_data_len, void *priv_data,
6255     ibt_ip_cm_info_t *ip_cm_infop)
6256 {
6257 	ibcm_ip_pvtdata_t	*ip_data;
6258 
6259 	IBTF_DPRINTF_L4(cmlog, "ibt_get_ip_data(%d, %p, %p)",
6260 	    priv_data_len, priv_data, ip_cm_infop);
6261 
6262 	if ((ip_cm_infop == NULL) || (priv_data == NULL) ||
6263 	    (priv_data_len < IBT_IP_HDR_PRIV_DATA_SZ)) {
6264 		IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_data: ERROR Invalid Inputs");
6265 		return (IBT_INVALID_PARAM);
6266 	}
6267 
6268 	bzero(ip_cm_infop, sizeof (ibt_ip_cm_info_t));
6269 
6270 	ip_data = (ibcm_ip_pvtdata_t *)priv_data;
6271 	ip_cm_infop->src_port = b2h16(ip_data->ip_srcport); /* Source Port */
6272 
6273 	/* IPV = 0x4, if IP Address are IPv4 format, else 0x6 for IPv6 */
6274 	if (ip_data->ip_ipv == IBT_CM_IP_IPV_V4) {
6275 		/* Copy IPv4 Addr */
6276 		ip_cm_infop->src_addr.family = AF_INET;
6277 		ip_cm_infop->src_addr.un.ip4addr = ntohl(ip_data->ip_srcv4);
6278 		ip_cm_infop->dst_addr.family = AF_INET;
6279 		ip_cm_infop->dst_addr.un.ip4addr = ntohl(ip_data->ip_dstv4);
6280 	} else if (ip_data->ip_ipv == IBT_CM_IP_IPV_V6) {
6281 		/* Copy IPv6 Addr */
6282 		ip_cm_infop->src_addr.family = AF_INET6;
6283 		bcopy(&ip_data->ip_srcv6, &ip_cm_infop->src_addr.un.ip6addr,
6284 		    sizeof (in6_addr_t));
6285 		ip_cm_infop->dst_addr.family = AF_INET6;
6286 		bcopy(&ip_data->ip_dstv6, &ip_cm_infop->dst_addr.un.ip6addr,
6287 		    sizeof (in6_addr_t));
6288 	} else {
6289 		IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_data: ERROR: IP Addr needs"
6290 		    " to be either AF_INET or AF_INET6 family.");
6291 		return (IBT_INVALID_PARAM);
6292 	}
6293 
6294 	return (IBT_SUCCESS);
6295 }
6296 
6297 
6298 /* Routines for warlock */
6299 
6300 /* ARGSUSED */
6301 static void
6302 ibcm_dummy_mcg_handler(void *arg, ibt_status_t retval, ibt_mcg_info_t *minfo)
6303 {
6304 	ibcm_join_mcg_tqarg_t	dummy_mcg;
6305 
6306 	dummy_mcg.func = ibcm_dummy_mcg_handler;
6307 
6308 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_mcg_handler: "
6309 	    "dummy_mcg.func %p", dummy_mcg.func);
6310 }
6311 
6312 
6313 /* ARGSUSED */
6314 static void
6315 ibcm_dummy_recycle_rc_handler(ibt_status_t retval, void *arg)
6316 {
6317 	ibcm_taskq_recycle_arg_t	dummy_rc_recycle;
6318 
6319 	dummy_rc_recycle.func = ibcm_dummy_recycle_rc_handler;
6320 
6321 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_recycle_rc_handler: "
6322 	    "dummy_rc_recycle.func %p", dummy_rc_recycle.func);
6323 }
6324 
6325 
6326 /* ARGSUSED */
6327 static ibt_cm_status_t
6328 ibcm_dummy_ud_handler(void *priv, ibt_cm_ud_event_t *event,
6329     ibt_cm_ud_return_args_t *ret_args,
6330     void *priv_data, ibt_priv_data_len_t len)
6331 {
6332 	/*
6333 	 * Let warlock see that ibcm_local_handler_s::actual_cm_handler
6334 	 * points to this routine.
6335 	 */
6336 	ibcm_local_handler_t	p;
6337 	ibcm_ud_state_data_t	dummy_ud;
6338 
6339 	p.actual_cm_handler = ibcm_dummy_ud_handler;
6340 	dummy_ud.ud_cm_handler = ibcm_dummy_ud_handler;
6341 
6342 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_ud_handler: p.actual_cm_handler %p"
6343 	    "dummy_ud.ud_cm_handler %p", p.actual_cm_handler,
6344 	    dummy_ud.ud_cm_handler);
6345 	/*
6346 	 * Call all routines that the client's callback routine could call.
6347 	 */
6348 
6349 	return (IBT_CM_ACCEPT);
6350 }
6351 
6352 /* ARGSUSED */
6353 static ibt_cm_status_t
6354 ibcm_dummy_rc_handler(void *priv, ibt_cm_event_t *event,
6355     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6356 {
6357 	ibcm_state_data_t	dummy_rc;
6358 
6359 	dummy_rc.cm_handler = ibcm_dummy_rc_handler;
6360 
6361 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_rc_handler: "
6362 	    "dummy_ud.ud_cm_handler %p", dummy_rc.cm_handler);
6363 	/*
6364 	 * Call all routines that the client's callback routine could call.
6365 	 */
6366 
6367 	return (IBT_CM_ACCEPT);
6368 }
6369