xref: /illumos-gate/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c (revision d22e11eb92a44ef85ea64989dbff7134a35829cc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/ib/mgt/ibcm/ibcm_impl.h>
27 #include <sys/ib/ibtl/ibti.h>
28 #include <sys/ib/mgt/ibcm/ibcm_arp.h>
29 
30 /*
31  * ibcm_ti.c
32  *	These routines implement the Communication Manager's interfaces to IBTL.
33  */
34 
35 /* CM rc recycle task args structure definition */
36 typedef struct ibcm_taskq_recycle_arg_s {
37 	ibt_channel_hdl_t	rc_chan;
38 	ibt_cep_flags_t		control;
39 	uint8_t			hca_port_num;
40 	ibt_recycle_handler_t	func;
41 	void			*arg;
42 } ibcm_taskq_recycle_arg_t;
43 
44 _NOTE(READ_ONLY_DATA(ibcm_taskq_recycle_arg_s))
45 
46 static ibt_status_t	ibcm_init_reply_addr(ibcm_hca_info_t *hcap,
47     ibcm_mad_addr_t *reply_addr, ibt_chan_open_args_t *chan_args,
48     ibt_chan_open_flags_t flags, ib_time_t *cm_pkt_lt, ib_lid_t prim_slid);
49 static void		ibcm_process_abort_via_taskq(void *args);
50 static ibt_status_t	ibcm_process_rc_recycle_ret(void *recycle_arg);
51 static ibt_status_t	ibcm_process_join_mcg(void *taskq_arg);
52 static void		ibcm_process_async_join_mcg(void *tq_arg);
53 
54 static ibt_status_t ibcm_get_node_rec(ibmf_saa_handle_t, sa_node_record_t *,
55     uint64_t c_mask, void *, size_t *);
56 
57 static ibt_status_t ibcm_close_rc_channel(ibt_channel_hdl_t channel,
58     ibcm_state_data_t *statep, ibt_execution_mode_t mode);
59 
60 /* Address Record management definitions */
61 #define	IBCM_DAPL_ATS_NAME	"DAPL Address Translation Service"
62 #define	IBCM_DAPL_ATS_SID	0x10000CE100415453ULL
63 #define	IBCM_DAPL_ATS_NBYTES	16
64 ibcm_svc_info_t *ibcm_ar_svcinfop;
65 ibcm_ar_t	*ibcm_ar_list;
66 
67 /*
68  * Tunable parameter to turnoff the overriding of pi_path_mtu value.
69  *	1 	By default override the path record's pi_path_mtu value to
70  *		IB_MTU_1K for all RC channels. This is done only for the
71  *		channels established on Tavor HCA and the path's pi_path_mtu
72  *		is greater than IB_MTU_1K.
73  *	0	Do not override, use pi_path_mtu by default.
74  */
75 int	ibcm_override_path_mtu = 1;
76 
77 #ifdef DEBUG
78 static void	ibcm_print_reply_addr(ibt_channel_hdl_t channel,
79 		    ibcm_mad_addr_t *cm_reply_addr);
80 #endif
81 
82 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_port_info_s::{port_ibmf_hdl}))
83 
84 /* access is controlled between ibcm_sm.c and ibcm_ti.c by CVs */
85 _NOTE(SCHEME_PROTECTS_DATA("Serialized access by CV", {ibt_rc_returns_t
86     ibt_ud_returns_t ibt_ap_returns_t ibt_ar_t}))
87 
88 /*
89  * Typically, clients initialize these args in one api call, and use in
90  * another api
91  */
92 _NOTE(SCHEME_PROTECTS_DATA("Expected usage of ibtl api by client",
93     {ibt_path_info_s ibt_cep_path_s ibt_adds_vect_s ibt_mcg_info_s ib_gid_s
94     ibt_ud_dest_attr_s ibt_ud_dest_s ibt_srv_data_s ibt_redirect_info_s}))
95 
96 /*
97  * ibt_open_rc_channel()
98  *	ibt_open_rc_channel opens a communication channel on the specified
99  *	channel to the specified service. For connection service type qp's
100  *	the CM initiates the CEP to establish the connection and transitions
101  *	the QP/EEC to the "Ready to send" State modifying the QP/EEC's
102  *	attributes as necessary.
103  *	The implementation of this function assumes that alt path is different
104  *	from primary path. It is assumed that the Path functions ensure that.
105  *
106  * RETURN VALUES:
107  *	IBT_SUCCESS	on success (or respective failure on error)
108  */
109 ibt_status_t
110 ibt_open_rc_channel(ibt_channel_hdl_t channel, ibt_chan_open_flags_t flags,
111     ibt_execution_mode_t mode, ibt_chan_open_args_t *chan_args,
112     ibt_rc_returns_t *ret_args)
113 {
114 	/* all fields that are related to REQ MAD formation */
115 
116 	ib_pkey_t		prim_pkey;
117 	ib_lid_t		primary_slid, alternate_slid;
118 	ib_qpn_t		local_qpn = 0;
119 	ib_guid_t		hca_guid;
120 	ib_qkey_t		local_qkey = 0;
121 	ib_eecn_t		local_eecn = 0;
122 	ib_eecn_t		remote_eecn = 0;
123 	boolean_t		primary_grh;
124 	boolean_t		alternate_grh = B_FALSE;
125 	ib_lid_t		base_lid;
126 	ib_com_id_t		local_comid;
127 	ibmf_msg_t		*ibmf_msg, *ibmf_msg_dreq;
128 	ibcm_req_msg_t		*req_msgp;
129 
130 	uint8_t			rdma_in, rdma_out;
131 	uint8_t			cm_retries;
132 	uint64_t		local_cm_proc_time;	/* In usec */
133 	uint8_t			local_cm_resp_time;	/* IB time */
134 	uint64_t		remote_cm_resp_time;	/* In usec */
135 	uint32_t		starting_psn = 0;
136 
137 	/* CM path related fields */
138 	ibmf_handle_t		ibmf_hdl;
139 	ibcm_qp_list_t		*cm_qp_entry;
140 	ibcm_mad_addr_t		cm_reply_addr;
141 
142 	uint8_t			cm_pkt_lt;
143 
144 	/* Local args for ibtl/internal CM functions called within */
145 	ibt_status_t		status;
146 	ibcm_status_t		lkup_status;
147 	ibt_qp_query_attr_t	qp_query_attr;
148 
149 	/* Other misc local args */
150 	ibt_priv_data_len_t	len;
151 	ibcm_hca_info_t		*hcap;
152 	ibcm_state_data_t	*statep;
153 	uint8_t			port_no;
154 
155 	IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel(chan %p, %X, %x, %p, %p)",
156 	    channel, flags, mode, chan_args, ret_args);
157 
158 	if (IBCM_INVALID_CHANNEL(channel)) {
159 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: invalid channel");
160 		return (IBT_CHAN_HDL_INVALID);
161 	}
162 
163 	/* cm handler should always be specified */
164 	if (chan_args->oc_cm_handler == NULL) {
165 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
166 		    "CM handler is not be specified", channel);
167 		return (IBT_INVALID_PARAM);
168 	}
169 
170 	if (mode == IBT_NONBLOCKING) {
171 		if (ret_args != NULL) {
172 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
173 			    " ret_args should be NULL when called in "
174 			    "non-blocking mode", channel);
175 			return (IBT_INVALID_PARAM);
176 		}
177 	} else if (mode == IBT_BLOCKING) {
178 		if (ret_args == NULL) {
179 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
180 			    " ret_args should be Non-NULL when called in "
181 			    "blocking mode", channel);
182 			return (IBT_INVALID_PARAM);
183 		}
184 		if (ret_args->rc_priv_data_len > IBT_REP_PRIV_DATA_SZ) {
185 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
186 			    " private data length is too large", channel);
187 			return (IBT_INVALID_PARAM);
188 		}
189 		if ((ret_args->rc_priv_data_len > 0) &&
190 		    (ret_args->rc_priv_data == NULL)) {
191 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
192 			    " rc_priv_data_len > 0, but rc_priv_data NULL",
193 			    channel);
194 			return (IBT_INVALID_PARAM);
195 		}
196 	} else { /* any other mode is not valid for ibt_open_rc_channel */
197 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
198 		    "invalid mode %x specified", channel, mode);
199 		return (IBT_INVALID_PARAM);
200 	}
201 
202 	/*
203 	 * XXX: no support yet for ibt_chan_open_flags_t - IBT_OCHAN_DUP
204 	 */
205 	if (flags & IBT_OCHAN_DUP) {
206 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
207 		    "Unsupported Flags specified: 0x%X", channel, flags);
208 		return (IBT_INVALID_PARAM);
209 	}
210 
211 	if ((flags & IBT_OCHAN_REDIRECTED) &&
212 	    (flags & IBT_OCHAN_PORT_REDIRECTED)) {
213 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
214 		    "Illegal to specify IBT_OCHAN_REDIRECTED and "
215 		    "IBT_OCHAN_PORT_REDIRECTED flags together", channel);
216 		return (IBT_INVALID_PARAM);
217 	}
218 
219 	if (((flags & IBT_OCHAN_REDIRECTED) &&
220 	    (chan_args->oc_cm_redirect_info == NULL)) ||
221 	    ((flags & IBT_OCHAN_PORT_REDIRECTED) &&
222 	    (chan_args->oc_cm_cep_path == NULL))) {
223 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
224 		    "Redirect flag specified, but respective arg is NULL",
225 		    channel);
226 		return (IBT_INVALID_PARAM);
227 	}
228 
229 	if ((flags & IBT_OCHAN_REDIRECTED) &&
230 	    (chan_args->oc_cm_redirect_info->rdi_dlid == 0) &&
231 	    (chan_args->oc_cm_redirect_info->rdi_gid.gid_guid == 0)) {
232 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
233 		    "Either rdi_dlid or rdi_gid must be specified for"
234 		    " IBT_OCHAN_REDIRECTED", channel);
235 		return (IBT_INVALID_PARAM);
236 	}
237 
238 	/* primary dlid and hca_port_num should never be zero */
239 	port_no = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;
240 
241 	if ((IBCM_PRIM_ADDS_VECT(chan_args).av_dlid == 0) && (port_no == 0)) {
242 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
243 		    "Primary Path's information is not valid", channel);
244 		return (IBT_INVALID_PARAM);
245 	}
246 
247 	/* validate SID */
248 	if (chan_args->oc_path->pi_sid == 0) {
249 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
250 		    "ERROR: Service ID in path information is 0", channel);
251 		return (IBT_INVALID_PARAM);
252 	}
253 
254 	/* validate rnr_retry_cnt (enum has more than 3 bits) */
255 	if ((uint_t)chan_args->oc_path_rnr_retry_cnt > IBT_RNR_INFINITE_RETRY) {
256 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
257 		    "ERROR: oc_path_rnr_retry_cnt(%d) is out of range",
258 		    channel, chan_args->oc_path_rnr_retry_cnt);
259 		return (IBT_INVALID_PARAM);
260 	}
261 
262 	/*
263 	 * Ensure that client is not re-using a QP that is still associated
264 	 * with a statep
265 	 */
266 	IBCM_GET_CHAN_PRIVATE(channel, statep);
267 	if (statep != NULL) {
268 		IBCM_RELEASE_CHAN_PRIVATE(channel);
269 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
270 		    "Channel being re-used on active side", channel);
271 		return (IBT_CHAN_IN_USE);
272 	}
273 
274 	/* Get GUID from Channel */
275 	hca_guid = ibt_channel_to_hca_guid(channel);
276 
277 	/* validate QP's hca guid with that from primary path  */
278 	if (hca_guid != chan_args->oc_path->pi_hca_guid) {
279 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
280 		    "GUID from Channel and primary path don't match", channel);
281 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
282 		    "Channel GUID %llX primary path GUID %llX", channel,
283 		    hca_guid, chan_args->oc_path->pi_hca_guid);
284 		return (IBT_CHAN_HDL_INVALID);
285 	}
286 
287 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
288 	    "Local HCA GUID %llX", channel, hca_guid);
289 
290 	status = ibt_query_qp(channel, &qp_query_attr);
291 	if (status != IBT_SUCCESS) {
292 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
293 		    "ibt_query_qp failed %d", channel, status);
294 		return (status);
295 	}
296 
297 	/* If client specified "no port change on QP" */
298 	if ((qp_query_attr.qp_info.qp_transport.rc.rc_path.cep_hca_port_num !=
299 	    port_no) && (flags & IBT_OCHAN_PORT_FIXED)) {
300 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
301 		    "chan port %d and path port %d does not match", channel,
302 		    qp_query_attr.qp_info.qp_transport.rc.rc_path. \
303 		    cep_hca_port_num, port_no);
304 		return (IBT_INVALID_PARAM);
305 	}
306 
307 	if (qp_query_attr.qp_info.qp_trans != IBT_RC_SRV) {
308 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
309 		    "Invalid Channel type: Applicable only to RC Channel",
310 		    channel);
311 		return (IBT_CHAN_SRV_TYPE_INVALID);
312 	}
313 
314 	/* Check if QP is in INIT state or not */
315 	if (qp_query_attr.qp_info.qp_state != IBT_STATE_INIT) {
316 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
317 		    "QP is not in INIT state %x", channel,
318 		    qp_query_attr.qp_info.qp_state);
319 		return (IBT_CHAN_STATE_INVALID);
320 	}
321 
322 	local_qpn = qp_query_attr.qp_qpn;
323 
324 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p Active QPN 0x%x",
325 	    channel, local_qpn);
326 
327 #ifdef	NO_EEC_SUPPORT_YET
328 
329 	if (flags & IBT_OCHAN_RDC_EXISTS) {
330 		ibt_eec_query_attr_t	eec_query_attr;
331 
332 		local_qkey = qp_query_attr.qp_info.qp_transport.rd_qkey;
333 
334 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: RD");
335 
336 		status = ibt_query_eec(channel, &eec_query_attr);
337 		if (status != IBT_SUCCESS) {
338 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
339 			    " ibt_query_eec failed %d", channel, status);
340 			return (status);
341 		}
342 		local_eecn = eec_query_attr.eec_eecn;
343 	}
344 
345 #endif
346 	if (chan_args->oc_path->pi_prim_pkt_lt > ibcm_max_ib_pkt_lt) {
347 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
348 		    "Huge PktLifeTime %d, Max is %d", channel,
349 		    chan_args->oc_path->pi_prim_pkt_lt, ibcm_max_ib_pkt_lt);
350 		return (IBT_PATH_PKT_LT_TOO_HIGH);
351 	}
352 
353 	/* If no HCA found return failure */
354 	if ((hcap = ibcm_find_hca_entry(hca_guid)) == NULL) {
355 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
356 		    "hcap is NULL. Probably hca is not in active state",
357 		    channel);
358 		return (IBT_CHAN_HDL_INVALID);
359 	}
360 
361 	rdma_out = chan_args->oc_rdma_ra_out;
362 	rdma_in = chan_args->oc_rdma_ra_in;
363 
364 	if ((rdma_in > hcap->hca_max_rdma_in_qp) ||
365 	    (rdma_out > hcap->hca_max_rdma_out_qp)) {
366 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
367 		    "rdma in %d/out %d values exceed hca limits(%d/%d)",
368 		    channel, rdma_in, rdma_out, hcap->hca_max_rdma_in_qp,
369 		    hcap->hca_max_rdma_out_qp);
370 		ibcm_dec_hca_acc_cnt(hcap);
371 		return (IBT_INVALID_PARAM);
372 	}
373 
374 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
375 	    "rdma_in %d rdma_out %d", channel, rdma_in, rdma_out);
376 
377 	status = ibt_get_port_state_byguid(hcap->hca_guid, port_no,
378 	    NULL, &base_lid);
379 	if (status != IBT_SUCCESS) {
380 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
381 		    "primary port_num %d not active", channel, port_no);
382 		ibcm_dec_hca_acc_cnt(hcap);
383 		return (status);
384 	}
385 
386 	/* Validate P_KEY Index */
387 	status = ibt_index2pkey_byguid(hcap->hca_guid, port_no,
388 	    IBCM_PRIM_CEP_PATH(chan_args).cep_pkey_ix, &prim_pkey);
389 	if (status != IBT_SUCCESS) {
390 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
391 		    "Invalid Primary PKeyIx %x", channel,
392 		    IBCM_PRIM_CEP_PATH(chan_args).cep_pkey_ix);
393 		ibcm_dec_hca_acc_cnt(hcap);
394 		return (status);
395 	}
396 
397 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
398 	    "primary_port_num %d primary_pkey 0x%x", channel, port_no,
399 	    prim_pkey);
400 
401 	if ((hcap->hca_port_info[port_no - 1].port_ibmf_hdl == NULL) &&
402 	    ((status = ibcm_hca_reinit_port(hcap, port_no - 1))
403 	    != IBT_SUCCESS)) {
404 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
405 		    "ibmf reg or callback setup failed during re-initialize",
406 		    channel);
407 		ibcm_dec_hca_acc_cnt(hcap);
408 		return (status);
409 	}
410 
411 	ibmf_hdl = hcap->hca_port_info[port_no - 1].port_ibmf_hdl;
412 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
413 	    "primary ibmf_hdl = 0x%p", channel, ibmf_hdl);
414 
415 	primary_slid = base_lid + IBCM_PRIM_ADDS_VECT(chan_args).av_src_path;
416 
417 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: channel 0x%p "
418 	    "primary SLID = %x", channel, primary_slid);
419 
420 	/* check first if alternate path exists or not as it is OPTIONAL */
421 	if (IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num != 0) {
422 		uint8_t	alt_port_no;
423 
424 		alt_port_no = IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num;
425 
426 		if (chan_args->oc_path->pi_alt_pkt_lt > ibcm_max_ib_pkt_lt) {
427 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
428 			    "Huge Alt Pkt lt %d", channel,
429 			    chan_args->oc_path->pi_alt_pkt_lt);
430 			ibcm_dec_hca_acc_cnt(hcap);
431 			return (IBT_PATH_PKT_LT_TOO_HIGH);
432 		}
433 
434 		if (port_no != alt_port_no) {
435 
436 			status = ibt_get_port_state_byguid(hcap->hca_guid,
437 			    alt_port_no, NULL, &base_lid);
438 			if (status != IBT_SUCCESS) {
439 
440 				IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
441 				    "chan 0x%p alt_port_num %d inactive %d",
442 				    channel, alt_port_no, status);
443 				ibcm_dec_hca_acc_cnt(hcap);
444 				return (status);
445 			}
446 
447 		}
448 		alternate_slid =
449 		    base_lid + IBCM_ALT_ADDS_VECT(chan_args).av_src_path;
450 
451 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
452 		    "alternate SLID = %x", channel, alternate_slid);
453 	}
454 
455 	/*
456 	 * only pkey needs to be zero'ed, because all other fields are set in
457 	 * in ibcm_init_reply_addr. But, let's bzero the complete struct for
458 	 * any future modifications.
459 	 */
460 	bzero(&cm_reply_addr, sizeof (cm_reply_addr));
461 
462 	/* Initialize the MAD destination address in stored_reply_addr */
463 	if ((status = ibcm_init_reply_addr(hcap, &cm_reply_addr, chan_args,
464 	    flags, &cm_pkt_lt, primary_slid)) != IBT_SUCCESS) {
465 
466 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
467 		    "ibcm_init_reply_addr failed status %d ", channel, status);
468 		ibcm_dec_hca_acc_cnt(hcap);
469 		return (status);
470 	}
471 
472 
473 	/* Initialize the pkey for CM MAD communication */
474 	if (cm_reply_addr.rcvd_addr.ia_p_key == 0)
475 		cm_reply_addr.rcvd_addr.ia_p_key = prim_pkey;
476 
477 #ifdef DEBUG
478 	ibcm_print_reply_addr(channel, &cm_reply_addr);
479 #endif
480 
481 	/* Retrieve an ibmf qp for sending CM MADs */
482 	if ((cm_qp_entry = ibcm_find_qp(hcap, port_no,
483 	    cm_reply_addr.rcvd_addr.ia_p_key)) == NULL) {
484 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
485 		    "unable to allocate ibmf qp for CM MADs", channel);
486 		ibcm_dec_hca_acc_cnt(hcap);
487 		return (IBT_INSUFF_RESOURCE);
488 	}
489 
490 
491 	if (ibcm_alloc_comid(hcap, &local_comid) != IBCM_SUCCESS) {
492 		ibcm_release_qp(cm_qp_entry);
493 		ibcm_dec_hca_acc_cnt(hcap);
494 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
495 		    " Unable to allocate comid", channel);
496 		return (IBT_INSUFF_KERNEL_RESOURCE);
497 	}
498 
499 	/* allocate an IBMF mad buffer (REQ) */
500 	if ((status = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg,
501 	    MAD_METHOD_SEND)) != IBT_SUCCESS) {
502 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
503 		    "chan 0x%p ibcm_alloc_out_msg failed", channel);
504 		ibcm_release_qp(cm_qp_entry);
505 		ibcm_free_comid(hcap, local_comid);
506 		ibcm_dec_hca_acc_cnt(hcap);
507 		return (status);
508 	}
509 
510 	/* allocate an IBMF mad buffer (DREQ) */
511 	if ((status = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg_dreq,
512 	    MAD_METHOD_SEND)) != IBT_SUCCESS) {
513 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
514 		    "chan 0x%p ibcm_alloc_out_msg failed", channel);
515 		(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg);
516 		ibcm_release_qp(cm_qp_entry);
517 		ibcm_free_comid(hcap, local_comid);
518 		ibcm_dec_hca_acc_cnt(hcap);
519 		return (status);
520 	}
521 
522 	/* Init to Init, if QP's port does not match with path information */
523 	if (qp_query_attr.qp_info.qp_transport.rc.rc_path.cep_hca_port_num !=
524 	    IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num) {
525 
526 		ibt_qp_info_t		qp_info;
527 		ibt_cep_modify_flags_t	cep_flags;
528 
529 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
530 		    "chan 0x%p chan port %d", channel,
531 		    qp_query_attr.qp_info.qp_transport.rc.rc_path.\
532 		    cep_hca_port_num);
533 
534 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
535 		    "chan 0x%p path port %d", channel, port_no);
536 
537 		bzero(&qp_info, sizeof (qp_info));
538 		/* For now, set it to RC type */
539 
540 		qp_info.qp_trans = IBT_RC_SRV;
541 		qp_info.qp_state = IBT_STATE_INIT;
542 		qp_info.qp_transport.rc.rc_path.cep_hca_port_num = port_no;
543 
544 		cep_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_PORT;
545 
546 		status = ibt_modify_qp(channel, cep_flags, &qp_info, NULL);
547 
548 		if (status != IBT_SUCCESS) {
549 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
550 			    "chan 0x%p ibt_modify_qp() = %d", channel, status);
551 			ibcm_release_qp(cm_qp_entry);
552 			ibcm_free_comid(hcap, local_comid);
553 			ibcm_dec_hca_acc_cnt(hcap);
554 			(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg);
555 			(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg_dreq);
556 			return (status);
557 		} else
558 			IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
559 			    "chan 0x%p ibt_modify_qp() = %d", channel, status);
560 	}
561 
562 	/* allocate ibcm_state_data_t before grabbing the WRITER lock */
563 	statep = kmem_zalloc(sizeof (ibcm_state_data_t), KM_SLEEP);
564 	rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
565 	lkup_status = ibcm_lookup_msg(IBCM_OUTGOING_REQ, local_comid, 0, 0,
566 	    hcap, &statep);
567 	rw_exit(&hcap->hca_state_rwlock);
568 
569 	/* CM should be seeing this for the first time */
570 	ASSERT(lkup_status == IBCM_LOOKUP_NEW);
571 
572 	/* Increment the hca's resource count */
573 	ibcm_inc_hca_res_cnt(hcap);
574 
575 	/* Once a resource created on hca, no need to hold the acc cnt */
576 	ibcm_dec_hca_acc_cnt(hcap);
577 
578 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
579 
580 	statep->timerid = 0;
581 	statep->local_hca_guid = hca_guid;
582 	statep->local_qpn = local_qpn;
583 	statep->stored_reply_addr.cm_qp_entry = cm_qp_entry;
584 	statep->prim_port = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;
585 	statep->alt_port = IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num;
586 
587 
588 	/* Save "statep" as channel's CM private data.  */
589 	statep->channel = channel;
590 	IBCM_SET_CHAN_PRIVATE(statep->channel, statep);
591 
592 	statep->stored_msg = ibmf_msg;
593 	statep->dreq_msg = ibmf_msg_dreq;
594 
595 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*req_msgp))
596 
597 	/* Start filling in the REQ MAD */
598 	req_msgp = (ibcm_req_msg_t *)IBCM_OUT_MSGP(statep->stored_msg);
599 	req_msgp->req_local_comm_id = h2b32(local_comid);
600 	req_msgp->req_svc_id = h2b64(chan_args->oc_path->pi_sid);
601 	req_msgp->req_local_ca_guid = h2b64(hca_guid);
602 	req_msgp->req_local_qkey = h2b32(local_qkey);	/* for EEC/RD */
603 
604 	/* Bytes 32-35 are req_local_qpn and req_off_resp_resources */
605 	req_msgp->req_local_qpn_plus = h2b32(local_qpn << 8 | rdma_in);
606 
607 	/* Bytes 36-39 are req_local_eec_no and req_off_initiator_depth */
608 	req_msgp->req_local_eec_no_plus = h2b32(local_eecn << 8 | rdma_out);
609 
610 	if (flags & IBT_OCHAN_REMOTE_CM_TM)
611 		remote_cm_resp_time = chan_args->oc_remote_cm_time;
612 	else
613 		remote_cm_resp_time = ibcm_remote_response_time;
614 
615 	/*
616 	 * Bytes 40-43 - remote_eecn, remote_cm_resp_time, tran_type,
617 	 * IBT_CM_FLOW_CONTROL is always set by default.
618 	 */
619 	req_msgp->req_remote_eecn_plus = h2b32(
620 	    remote_eecn << 8 | (ibt_usec2ib(remote_cm_resp_time) & 0x1f) << 3 |
621 	    IBT_RC_SRV << 1 | IBT_CM_FLOW_CONTROL);
622 
623 	if (flags & IBT_OCHAN_LOCAL_CM_TM)
624 		local_cm_proc_time = chan_args->oc_local_cm_time;
625 	else
626 		local_cm_proc_time = ibcm_local_processing_time;
627 
628 	local_cm_resp_time = ibt_usec2ib(local_cm_proc_time +
629 	    2 * ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt) +
630 	    ibcm_sw_delay);
631 
632 	/* save retry count */
633 	statep->cep_retry_cnt = chan_args->oc_path_retry_cnt;
634 
635 	if (flags & IBT_OCHAN_STARTING_PSN)
636 		starting_psn = chan_args->oc_starting_psn;
637 
638 	if (local_cm_resp_time > 0x1f)
639 		local_cm_resp_time = 0x1f;
640 
641 	/* Bytes 44-47 are req_starting_psn, local_cm_resp_time and retry_cnt */
642 	req_msgp->req_starting_psn_plus = h2b32(starting_psn << 8 |
643 	    local_cm_resp_time << 3 | statep->cep_retry_cnt);
644 
645 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
646 	    "Prim Pkt lt (IB time) 0x%x", channel,
647 	    chan_args->oc_path->pi_prim_pkt_lt);
648 
649 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
650 	    "local_cm_proc_time(usec) %d ", channel, local_cm_proc_time);
651 
652 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
653 	    "local_cm_resp_time(ib_time) %d", channel, local_cm_resp_time);
654 
655 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
656 	    "remote_cm_resp_time (usec) %d", channel, remote_cm_resp_time);
657 
658 	statep->starting_psn = starting_psn;
659 
660 	/* Pkey - bytes 48-49 */
661 	req_msgp->req_part_key = h2b16(prim_pkey);
662 
663 	if (flags & IBT_OCHAN_CM_RETRY)
664 		cm_retries = chan_args->oc_cm_retry_cnt;
665 	else
666 		cm_retries = ibcm_max_retries;
667 
668 	statep->max_cm_retries = statep->remaining_retry_cnt = cm_retries;
669 	req_msgp->req_max_cm_retries_plus = statep->max_cm_retries << 4;
670 
671 	/*
672 	 * Check whether SRQ is associated with this Channel, if yes, then
673 	 * set the SRQ Exists bit in the REQ.
674 	 */
675 	if (qp_query_attr.qp_srq != NULL) {
676 		req_msgp->req_max_cm_retries_plus |= (1 << 3);
677 	}
678 
679 	/*
680 	 * By default on Tavor, we override the PathMTU to 1K.
681 	 * To turn this off, set ibcm_override_path_mtu = 0.
682 	 */
683 	if (ibcm_override_path_mtu && IBCM_IS_HCA_TAVOR(hcap) &&
684 	    (chan_args->oc_path->pi_path_mtu > IB_MTU_1K)) {
685 		req_msgp->req_mtu_plus = IB_MTU_1K << 4 |
686 		    chan_args->oc_path_rnr_retry_cnt;
687 		IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p PathMTU"
688 		    " overidden to IB_MTU_1K(%d) from %d", channel, IB_MTU_1K,
689 		    chan_args->oc_path->pi_path_mtu);
690 	} else
691 		req_msgp->req_mtu_plus = chan_args->oc_path->pi_path_mtu << 4 |
692 		    chan_args->oc_path_rnr_retry_cnt;
693 
694 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p CM retry cnt %d"
695 	    " staring PSN %x", channel, cm_retries, starting_psn);
696 
697 
698 #ifdef	NO_EEC_SUPPORT_YET
699 	if (flags & IBT_OCHAN_RDC_EXISTS)
700 		req_msgp->req_mtu_plus |= 8;
701 #endif
702 
703 	/* Initialize the "primary" port stuff next - bytes 52-95 */
704 	req_msgp->req_primary_l_port_lid = h2b16(primary_slid);
705 	req_msgp->req_primary_r_port_lid =
706 	    h2b16(IBCM_PRIM_ADDS_VECT(chan_args).av_dlid);
707 	req_msgp->req_primary_l_port_gid.gid_prefix =
708 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_sgid.gid_prefix);
709 	req_msgp->req_primary_l_port_gid.gid_guid =
710 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_sgid.gid_guid);
711 	req_msgp->req_primary_r_port_gid.gid_prefix =
712 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_prefix);
713 	req_msgp->req_primary_r_port_gid.gid_guid =
714 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid);
715 	primary_grh = IBCM_PRIM_ADDS_VECT(chan_args).av_send_grh;
716 
717 	statep->remote_hca_guid = /* not correct, but helpful for debugging */
718 	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid;
719 
720 	/* Bytes 88-91 - primary_flowlbl, and primary_srate */
721 	req_msgp->req_primary_flow_label_plus =
722 	    h2b32(((primary_grh == B_TRUE) ?
723 	    (IBCM_PRIM_ADDS_VECT(chan_args).av_flow << 12) : 0) |
724 	    IBCM_PRIM_ADDS_VECT(chan_args).av_srate);
725 	req_msgp->req_primary_traffic_class = (primary_grh == B_TRUE) ?
726 	    IBCM_PRIM_ADDS_VECT(chan_args).av_tclass : 0;
727 	req_msgp->req_primary_hop_limit = (primary_grh == B_TRUE) ?
728 	    IBCM_PRIM_ADDS_VECT(chan_args).av_hop : 1;
729 	req_msgp->req_primary_sl_plus =
730 	    IBCM_PRIM_ADDS_VECT(chan_args).av_srvl << 4 |
731 	    ((primary_grh == B_TRUE) ? 0 : 8);
732 
733 	req_msgp->req_primary_localtime_plus =
734 	    ibt_usec2ib((2 * ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt)) +
735 	    ibt_ib2usec(hcap->hca_ack_delay)) << 3;
736 
737 	IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan %p statep %p",
738 	    channel, statep);
739 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
740 	    "active hca_ack_delay (usec) %d", channel,
741 	    req_msgp->req_primary_localtime_plus);
742 
743 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
744 	    "Sent primary cep timeout (IB Time) %d", channel,
745 	    hcap->hca_ack_delay);
746 
747 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p prim_dlid %x ",
748 	    channel, IBCM_PRIM_ADDS_VECT(chan_args).av_dlid);
749 
750 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
751 	    "prim GID %llX:%llX", channel,
752 	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_prefix,
753 	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid);
754 
755 	/* Initialize the "alternate" port stuff - optional */
756 	if (chan_args->oc_path->pi_alt_cep_path.cep_hca_port_num != 0) {
757 		ib_gid_t	tmp_gid;
758 
759 		req_msgp->req_alt_l_port_lid = h2b16(alternate_slid);
760 		req_msgp->req_alt_r_port_lid =
761 		    h2b16(IBCM_ALT_ADDS_VECT(chan_args).av_dlid);
762 		/*
763 		 * doing all this as req_alt_r/l_port_gid is at offset
764 		 * 100, 116 which is not divisible by 8
765 		 */
766 
767 		tmp_gid.gid_prefix =
768 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_prefix);
769 		tmp_gid.gid_guid =
770 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_guid);
771 		bcopy(&tmp_gid, &req_msgp->req_alt_r_port_gid[0],
772 		    sizeof (ib_gid_t));
773 		tmp_gid.gid_prefix =
774 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_sgid.gid_prefix);
775 		tmp_gid.gid_guid =
776 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_sgid.gid_guid);
777 
778 		bcopy(&tmp_gid, &req_msgp->req_alt_l_port_gid[0],
779 		    sizeof (ib_gid_t));
780 		alternate_grh = IBCM_ALT_ADDS_VECT(chan_args).av_send_grh;
781 
782 		/* Bytes 132-135 - alternate_flow_label, and alternate srate */
783 		req_msgp->req_alt_flow_label_plus = h2b32(
784 		    (((alternate_grh == B_TRUE) ?
785 		    (IBCM_ALT_ADDS_VECT(chan_args).av_flow << 12) : 0) |
786 		    IBCM_ALT_ADDS_VECT(chan_args).av_srate));
787 		req_msgp->req_alt_traffic_class = (alternate_grh == B_TRUE) ?
788 		    IBCM_ALT_ADDS_VECT(chan_args).av_tclass : 0;
789 		req_msgp->req_alt_hop_limit = (alternate_grh == B_TRUE) ?
790 		    IBCM_ALT_ADDS_VECT(chan_args).av_hop : 1;
791 		req_msgp->req_alt_sl_plus =
792 		    IBCM_ALT_ADDS_VECT(chan_args).av_srvl << 4 |
793 		    ((alternate_grh == B_TRUE) ? 0 : 8);
794 		req_msgp->req_alt_localtime_plus = ibt_usec2ib((2 *
795 		    ibt_ib2usec(chan_args->oc_path->pi_alt_pkt_lt)) +
796 		    ibt_ib2usec(hcap->hca_ack_delay)) << 3;
797 
798 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
799 		    "alt_dlid %x ", channel,
800 		    IBCM_ALT_ADDS_VECT(chan_args).av_dlid);
801 
802 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
803 		    "alt GID %llX:%llX", channel,
804 		    IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_prefix,
805 		    IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_guid);
806 	}
807 
808 	len = min(chan_args->oc_priv_data_len, IBT_REQ_PRIV_DATA_SZ);
809 	if ((len > 0) && chan_args->oc_priv_data)
810 		bcopy(chan_args->oc_priv_data, req_msgp->req_private_data, len);
811 
812 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*req_msgp))
813 
814 	/* return_data is filled up in the state machine code */
815 	if (ret_args != NULL) {
816 		statep->open_return_data = ret_args;
817 	}
818 
819 	/* initialize some statep fields here */
820 	statep->mode = IBCM_ACTIVE_MODE;
821 	statep->hcap = hcap;
822 
823 	statep->cm_handler = chan_args->oc_cm_handler;
824 	statep->state_cm_private = chan_args->oc_cm_clnt_private;
825 
826 	statep->pkt_life_time =
827 	    ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt);
828 
829 	statep->timer_value = ibt_ib2usec(ibt_usec2ib(
830 	    2 * ibt_ib2usec(cm_pkt_lt) + remote_cm_resp_time));
831 
832 	/* Initialize statep->stored_reply_addr */
833 	statep->stored_reply_addr.ibmf_hdl = ibmf_hdl;
834 
835 	/* Initialize stored reply addr fields */
836 	statep->stored_reply_addr.grh_hdr = cm_reply_addr.grh_hdr;
837 	statep->stored_reply_addr.rcvd_addr = cm_reply_addr.rcvd_addr;
838 	statep->stored_reply_addr.grh_exists = cm_reply_addr.grh_exists;
839 	statep->stored_reply_addr.port_num = cm_reply_addr.port_num;
840 
841 	/*
842 	 * The IPD on local/active side is calculated by path functions,
843 	 * hence available in the args of ibt_open_rc_channel
844 	 */
845 	statep->local_srate = IBCM_PRIM_ADDS_VECT(chan_args).av_srate;
846 	statep->local_alt_srate = IBCM_ALT_ADDS_VECT(chan_args).av_srate;
847 
848 	/* Store the source path bits for primary and alt paths */
849 	statep->prim_src_path_bits = IBCM_PRIM_ADDS_VECT(chan_args).av_src_path;
850 	statep->alt_src_path_bits = IBCM_ALT_ADDS_VECT(chan_args).av_src_path;
851 
852 	statep->open_flow = 1;
853 	statep->open_done = B_FALSE;
854 	statep->state = statep->timer_stored_state = IBCM_STATE_REQ_SENT;
855 	IBCM_REF_CNT_INCR(statep);	/* Decremented before return */
856 	IBCM_REF_CNT_INCR(statep);	/* Decremented after REQ is posted */
857 	statep->send_mad_flags |= IBCM_REQ_POST_BUSY;
858 
859 	IBCM_OUT_HDRP(statep->stored_msg)->AttributeID =
860 	    h2b16(IBCM_INCOMING_REQ + IBCM_ATTR_BASE_ID);
861 
862 	IBCM_OUT_HDRP(statep->stored_msg)->TransactionID =
863 	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_REQ, statep->local_comid,
864 	    0));
865 
866 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*statep))
867 
868 	ibcm_open_enqueue(statep);
869 
870 	mutex_enter(&statep->state_mutex);
871 
872 	if (mode == IBT_BLOCKING) {
873 
874 		/* wait for REQ/REP/RTU */
875 		while (statep->open_done != B_TRUE) {
876 			cv_wait(&statep->block_client_cv, &statep->state_mutex);
877 		}
878 
879 		/*
880 		 * In the case that open_channel() fails because of a
881 		 * REJ or timeout, change retval to IBT_CM_FAILURE
882 		 */
883 		if (statep->open_return_data->rc_status != IBT_CM_ACCEPT)
884 			status = IBT_CM_FAILURE;
885 
886 		IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p "
887 		    "ret status %d cm status %d", channel, status,
888 		    statep->open_return_data->rc_status);
889 	}
890 
891 	/* decrement the ref-count before leaving here */
892 	IBCM_REF_CNT_DECR(statep);
893 
894 	mutex_exit(&statep->state_mutex);
895 
896 	IBTF_DPRINTF_L4(cmlog, "ibt_open_rc_channel: chan 0x%p done", channel);
897 	return (status);
898 }
899 
900 /*
901  * ibcm_init_reply_addr:
902  *
903  * The brief description of functionality below.
904  *
905  * For IBT_OCHAN_PORT_REDIRECTED (ie., port redirected case):
906  *	Build CM path from chan_args->oc_cm_cep_path
907  *	Set CM pkt lt (ie.,life time) to chan_args->oc_cm_pkt_lt
908  *
909  * For IBT_OCHAN_REDIRECTED (ie., port and CM redirected case):
910  *	If Redirect LID is specified,
911  *		If Redirect GID is not specified or specified to be on the same
912  *		    subnet, then
913  *			Build CM path from chan_args->oc_cm_redirect_info
914  *			Set CM pkt lt to subnet timeout
915  *		Else (ie., GID specified, but on a different subnet)
916  *			Do a path lookup to build CM Path and set CM pkt lt
917  *
918  */
919 static ibt_status_t
920 ibcm_init_reply_addr(ibcm_hca_info_t *hcap, ibcm_mad_addr_t *reply_addr,
921     ibt_chan_open_args_t *chan_args, ibt_chan_open_flags_t flags,
922     ib_time_t *cm_pkt_lt, ib_lid_t prim_slid)
923 {
924 	ibt_adds_vect_t	*cm_adds;
925 	ibt_path_info_t	path;
926 	boolean_t	cm_grh;
927 	ibt_status_t	status;
928 
929 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_reply_addr:");
930 
931 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*reply_addr))
932 
933 	/*
934 	 * sending side CM lid/gid/port num are not based on any redirect
935 	 * params. These values are set to primary RC path lid/gid/port num.
936 	 * In the future, these values can be set based on framework policy
937 	 * decisions ensuring reachability.
938 	 */
939 	reply_addr->grh_hdr.ig_sender_gid =
940 	    IBCM_PRIM_ADDS_VECT(chan_args).av_sgid;
941 	reply_addr->rcvd_addr.ia_local_lid = prim_slid;
942 	reply_addr->port_num = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;
943 
944 	if (flags & IBT_OCHAN_PORT_REDIRECTED) {
945 		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: "
946 		    "IBT_OCHAN_PORT_REDIRECTED specified");
947 
948 		status = ibt_index2pkey_byguid(hcap->hca_guid,
949 		    chan_args->oc_cm_cep_path->cep_hca_port_num,
950 		    chan_args->oc_cm_cep_path->cep_pkey_ix,
951 		    &reply_addr->rcvd_addr.ia_p_key);
952 
953 		if (status != IBT_SUCCESS) {
954 			IBTF_DPRINTF_L2(cmlog, "ibcm_init_rely_addr: Invalid "
955 			    "CM PKeyIx %x port_num %x",
956 			    chan_args->oc_cm_cep_path->cep_pkey_ix,
957 			    chan_args->oc_cm_cep_path->cep_hca_port_num);
958 			return (status);
959 		}
960 
961 		cm_adds = &(chan_args->oc_cm_cep_path->cep_adds_vect);
962 		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: dlid = %x",
963 		    cm_adds->av_dlid);
964 
965 		reply_addr->rcvd_addr.ia_q_key = IB_GSI_QKEY;
966 		reply_addr->rcvd_addr.ia_remote_qno = 1;
967 		*cm_pkt_lt = chan_args->oc_cm_pkt_lt;
968 
969 	} else if (flags & IBT_OCHAN_REDIRECTED) {
970 		ibt_redirect_info_t	*redirect_info;
971 		ibt_hca_portinfo_t	*port_infop;
972 		uint_t			psize, nports;
973 
974 		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: "
975 		    "IBT_OCHAN_REDIRECTED specified");
976 
977 		redirect_info = chan_args->oc_cm_redirect_info;
978 
979 		if ((redirect_info->rdi_gid.gid_prefix == 0) ||
980 		    (redirect_info->rdi_gid.gid_guid == 0)) {
981 			IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
982 			    "ERROR: Re-direct GID value NOT Provided.");
983 			return (IBT_INVALID_PARAM);
984 		}
985 
986 		/* As per spec definition 1.1, it's always IB_GSI_QKEY */
987 		reply_addr->rcvd_addr.ia_q_key = redirect_info->rdi_qkey;
988 		reply_addr->rcvd_addr.ia_remote_qno = redirect_info->rdi_qpn;
989 		reply_addr->rcvd_addr.ia_p_key = redirect_info->rdi_pkey;
990 
991 		/*
992 		 * if LID is non-zero in classportinfo then use classportinfo
993 		 * fields to form CM MAD destination address.
994 		 */
995 		if (redirect_info->rdi_dlid != 0) {
996 			status = ibtl_cm_query_hca_ports_byguid(hcap->hca_guid,
997 			    reply_addr->port_num, &port_infop, &nports, &psize);
998 			if ((status != IBT_SUCCESS) || (nports == 0)) {
999 				IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
1000 				    "Query Ports Failed: %d", status);
1001 				return (status);
1002 			} else if (port_infop->p_subnet_timeout >
1003 			    ibcm_max_ib_pkt_lt) {
1004 				IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
1005 				    "large subnet timeout %x port_no %x",
1006 				    port_infop->p_subnet_timeout,
1007 				    reply_addr->port_num);
1008 				ibt_free_portinfo(port_infop, psize);
1009 				return (IBT_PATH_PKT_LT_TOO_HIGH);
1010 			} else {
1011 				IBTF_DPRINTF_L3(cmlog, "ibcm_init_reply_addr: "
1012 				    "subnet timeout %x port_no %x",
1013 				    port_infop->p_subnet_timeout,
1014 				    reply_addr->port_num);
1015 
1016 				*cm_pkt_lt =
1017 				    ibt_ib2usec(min(ibcm_max_ib_mad_pkt_lt,
1018 				    port_infop->p_subnet_timeout));
1019 
1020 				ibt_free_portinfo(port_infop, psize);
1021 			}
1022 
1023 			reply_addr->rcvd_addr.ia_remote_lid =
1024 			    redirect_info->rdi_dlid;
1025 			reply_addr->rcvd_addr.ia_service_level =
1026 			    redirect_info->rdi_sl;
1027 			reply_addr->grh_exists = B_TRUE;
1028 			reply_addr->grh_hdr.ig_recver_gid =
1029 			    redirect_info->rdi_gid;
1030 			reply_addr->grh_hdr.ig_tclass =
1031 			    redirect_info->rdi_tclass;
1032 			reply_addr->grh_hdr.ig_flow_label =
1033 			    redirect_info->rdi_flow;
1034 
1035 			/* Classportinfo doesn't have hoplimit field */
1036 			reply_addr->grh_hdr.ig_hop_limit = 1;
1037 			return (IBT_SUCCESS);
1038 
1039 		} else {
1040 			ibt_path_attr_t	path_attr;
1041 			ib_gid_t	path_dgid[1];
1042 
1043 			/*
1044 			 * If GID is specified, and LID is zero in classportinfo
1045 			 * do a path lookup using specified GID, Pkey,
1046 			 * in classportinfo
1047 			 */
1048 
1049 			bzero(&path_attr, sizeof (path_attr));
1050 
1051 			path_attr.pa_dgids = &path_dgid[0];
1052 			path_attr.pa_dgids[0] = redirect_info->rdi_gid;
1053 
1054 			/*
1055 			 * use reply_addr below, as sender_gid in reply_addr
1056 			 * may have been set above based on some policy decision
1057 			 * for originating end point for CM MADs above
1058 			 */
1059 			path_attr.pa_sgid = reply_addr->grh_hdr.ig_sender_gid;
1060 			path_attr.pa_num_dgids = 1;
1061 			path_attr.pa_pkey = redirect_info->rdi_pkey;
1062 
1063 			if ((status = ibt_get_paths(ibcm_ibt_handle,
1064 			    IBT_PATH_PKEY, &path_attr, 1, &path, NULL)) !=
1065 			    IBT_SUCCESS)
1066 				return (status);
1067 
1068 			/* Initialize cm_adds */
1069 			cm_adds = &path.pi_prim_cep_path.cep_adds_vect;
1070 			*cm_pkt_lt = path.pi_prim_pkt_lt;
1071 		}
1072 
1073 	} else	{ /* cm_pkey initialized in ibt_open_rc_channel */
1074 		reply_addr->rcvd_addr.ia_q_key = IB_GSI_QKEY;
1075 		reply_addr->rcvd_addr.ia_remote_qno = 1;
1076 		*cm_pkt_lt = chan_args->oc_path->pi_prim_pkt_lt;
1077 		cm_adds = &(IBCM_PRIM_ADDS_VECT(chan_args));
1078 	}
1079 
1080 
1081 	cm_grh = cm_adds->av_send_grh;
1082 	reply_addr->grh_exists = cm_grh;
1083 
1084 	reply_addr->rcvd_addr.ia_remote_lid =
1085 	    cm_adds->av_dlid;
1086 	reply_addr->grh_hdr.ig_recver_gid =
1087 	    cm_adds->av_dgid;
1088 	reply_addr->grh_hdr.ig_flow_label =
1089 	    cm_adds->av_flow & IB_GRH_FLOW_LABEL_MASK;
1090 	reply_addr->grh_hdr.ig_tclass =
1091 	    (cm_grh == B_TRUE) ? cm_adds->av_tclass : 0;
1092 	reply_addr->grh_hdr.ig_hop_limit =
1093 	    (cm_grh == B_TRUE) ? cm_adds->av_hop : 1;
1094 	reply_addr->rcvd_addr.ia_service_level =
1095 	    cm_adds->av_srvl;
1096 
1097 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*reply_addr))
1098 
1099 	return (IBT_SUCCESS);
1100 }
1101 
1102 
1103 /*
1104  * ibt_prime_close_rc_channel()
1105  *	It allocates resources required for close channel operation, so
1106  *	ibt_close_rc_channel can be called from interrupt routine.
1107  *
1108  * INPUTS:
1109  *	channel			The address of an ibt_channel_t struct that
1110  *				specifies the channel to open.
1111  *
1112  * RETURN VALUES:
1113  *	IBT_SUCCESS	on success(or respective failure on error)
1114  *
1115  * Clients are typically expected to call this function in established state
1116  */
1117 ibt_status_t
1118 ibt_prime_close_rc_channel(ibt_channel_hdl_t channel)
1119 {
1120 	ibcm_state_data_t	*statep;
1121 	ibt_status_t		status = IBT_SUCCESS;
1122 
1123 	IBTF_DPRINTF_L3(cmlog, "ibt_prime_close_rc_channel(%p)", channel);
1124 
1125 	/* validate channel, first */
1126 	if (IBCM_INVALID_CHANNEL(channel)) {
1127 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1128 		    "invalid channel", channel);
1129 		return (IBT_CHAN_HDL_INVALID);
1130 	}
1131 
1132 	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
1133 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1134 		    "Invalid Channel type: Applicable only to RC Channel",
1135 		    channel);
1136 		return (IBT_CHAN_SRV_TYPE_INVALID);
1137 	}
1138 
1139 	/* get the statep */
1140 	IBCM_GET_CHAN_PRIVATE(channel, statep);
1141 
1142 	/*
1143 	 * This can happen, if the statep is already gone by a DREQ from
1144 	 * the remote side
1145 	 */
1146 
1147 	if (statep == NULL) {
1148 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1149 		    "statep NULL", channel);
1150 		return (IBT_SUCCESS);
1151 	}
1152 
1153 	mutex_enter(&statep->state_mutex);
1154 	IBCM_RELEASE_CHAN_PRIVATE(channel);
1155 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1156 		mutex_exit(&statep->state_mutex);
1157 		return (IBT_CHAN_STATE_INVALID);
1158 	}
1159 	IBCM_REF_CNT_INCR(statep);
1160 	IBTF_DPRINTF_L4(cmlog, "ibt_prime_close_rc_channel: chan 0x%p statep %p"
1161 	    " state %x", channel, statep, statep->state);
1162 	mutex_exit(&statep->state_mutex);
1163 
1164 	/* clients could pre-allocate dreq mad, even before connection est */
1165 	if (statep->dreq_msg == NULL)
1166 		status = ibcm_alloc_out_msg(statep->stored_reply_addr.ibmf_hdl,
1167 		    &statep->dreq_msg, MAD_METHOD_SEND);
1168 
1169 	mutex_enter(&statep->state_mutex);
1170 	IBCM_REF_CNT_DECR(statep);
1171 	mutex_exit(&statep->state_mutex);
1172 
1173 	if (status != IBT_SUCCESS) {
1174 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1175 		    "ibcm_alloc_out_msg failed ", channel);
1176 		return (status);
1177 	}
1178 
1179 	/* If this message isn't seen then ibt_prime_close_rc_channel failed */
1180 	IBTF_DPRINTF_L5(cmlog, "ibt_prime_close_rc_channel: chan 0x%p done",
1181 	    channel);
1182 
1183 	return (IBT_SUCCESS);
1184 }
1185 
1186 /*
1187  * ibt_close_rc_channel()
1188  *	It closes an established channel.
1189  *
1190  * RETURN VALUES:
1191  *	IBT_SUCCESS	on success(or respective failure on error)
1192  */
1193 ibt_status_t
1194 ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode,
1195     void *priv_data, ibt_priv_data_len_t priv_data_len, uint8_t *ret_status,
1196     void *ret_priv_data, ibt_priv_data_len_t *ret_priv_data_len_p)
1197 {
1198 	ibcm_state_data_t	*statep;
1199 
1200 	IBTF_DPRINTF_L3(cmlog, "ibt_close_rc_channel(%p, %x, %p, %d, %p)",
1201 	    channel, mode, priv_data, priv_data_len,
1202 	    (ret_priv_data_len_p == NULL) ? 0 : *ret_priv_data_len_p);
1203 
1204 	/* validate channel, first */
1205 	if (IBCM_INVALID_CHANNEL(channel)) {
1206 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1207 		    "invalid channel", channel);
1208 		return (IBT_CHAN_HDL_INVALID);
1209 	}
1210 
1211 	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
1212 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1213 		    "Invalid Channel type: Applicable only to RC Channel",
1214 		    channel);
1215 		return (IBT_CHAN_SRV_TYPE_INVALID);
1216 	}
1217 
1218 	if (mode == IBT_BLOCKING) {
1219 		/* valid only for BLOCKING MODE */
1220 		if ((ret_priv_data_len_p != NULL) &&
1221 		    (*ret_priv_data_len_p > IBT_DREP_PRIV_DATA_SZ)) {
1222 			IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p"
1223 			    " private data len %d is too large", channel,
1224 			    *ret_priv_data_len_p);
1225 			return (IBT_INVALID_PARAM);
1226 		}
1227 	} else if ((mode != IBT_NONBLOCKING) && (mode != IBT_NOCALLBACKS)) {
1228 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1229 		    "invalid mode %x specified", channel, mode);
1230 		return (IBT_INVALID_PARAM);
1231 	}
1232 
1233 	if (ibtl_cm_is_chan_closing(channel) ||
1234 	    ibtl_cm_is_chan_closed(channel)) {
1235 		if (ret_status)
1236 			*ret_status = IBT_CM_CLOSED_ALREADY;
1237 
1238 		/* No private data to return to the client */
1239 		if (ret_priv_data_len_p != NULL)
1240 			*ret_priv_data_len_p = 0;
1241 
1242 		if ((mode == IBT_BLOCKING) ||
1243 		    (mode == IBT_NOCALLBACKS)) {
1244 			IBCM_GET_CHAN_PRIVATE(channel, statep);
1245 			if (statep == NULL)
1246 				return (IBT_SUCCESS);
1247 			mutex_enter(&statep->state_mutex);
1248 			IBCM_RELEASE_CHAN_PRIVATE(channel);
1249 			IBCM_REF_CNT_INCR(statep);
1250 			while (statep->close_done != B_TRUE)
1251 				cv_wait(&statep->block_client_cv,
1252 				    &statep->state_mutex);
1253 			IBCM_REF_CNT_DECR(statep);
1254 			mutex_exit(&statep->state_mutex);
1255 		}
1256 
1257 		IBTF_DPRINTF_L3(cmlog, "ibt_close_rc_channel: chan 0x%p "
1258 		    "already marked for closing", channel);
1259 
1260 		return (IBT_SUCCESS);
1261 	}
1262 
1263 	/* get the statep */
1264 	IBCM_GET_CHAN_PRIVATE(channel, statep);
1265 	if (statep == NULL) {
1266 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1267 		    "statep NULL", channel);
1268 		return (IBT_CHAN_STATE_INVALID);
1269 	}
1270 
1271 	mutex_enter(&statep->state_mutex);
1272 
1273 	if (statep->dreq_msg == NULL) {
1274 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1275 		    "Fatal Error: dreq_msg is NULL", channel);
1276 		IBCM_RELEASE_CHAN_PRIVATE(channel);
1277 		mutex_exit(&statep->state_mutex);
1278 		return (IBT_CHAN_STATE_INVALID);
1279 	}
1280 
1281 	if ((ret_priv_data == NULL) || (ret_priv_data_len_p == NULL)) {
1282 		statep->close_ret_priv_data = NULL;
1283 		statep->close_ret_priv_data_len = NULL;
1284 	} else {
1285 		statep->close_ret_priv_data = ret_priv_data;
1286 		statep->close_ret_priv_data_len = ret_priv_data_len_p;
1287 	}
1288 
1289 	priv_data_len = min(priv_data_len, IBT_DREQ_PRIV_DATA_SZ);
1290 	if ((priv_data != NULL) && (priv_data_len > 0)) {
1291 		bcopy(priv_data, ((ibcm_dreq_msg_t *)
1292 		    IBCM_OUT_MSGP(statep->dreq_msg))->dreq_private_data,
1293 		    priv_data_len);
1294 	}
1295 	statep->close_ret_status = ret_status;
1296 
1297 	IBCM_RELEASE_CHAN_PRIVATE(channel);
1298 	IBCM_REF_CNT_INCR(statep);
1299 
1300 	if (mode != IBT_NONBLOCKING) {
1301 		return (ibcm_close_rc_channel(channel, statep, mode));
1302 	}
1303 
1304 	/* IBT_NONBLOCKING */
1305 	ibcm_close_enqueue(statep);
1306 	mutex_exit(&statep->state_mutex);
1307 
1308 	return (IBT_SUCCESS);
1309 }
1310 
1311 void
1312 ibcm_close_start(ibcm_state_data_t *statep)
1313 {
1314 	mutex_enter(&statep->state_mutex);
1315 	(void) ibcm_close_rc_channel(statep->channel, statep, IBT_NONBLOCKING);
1316 }
1317 
1318 static
1319 ibt_status_t
1320 ibcm_close_rc_channel(ibt_channel_hdl_t channel, ibcm_state_data_t *statep,
1321     ibt_execution_mode_t mode)
1322 {
1323 	ibcm_hca_info_t		*hcap;
1324 
1325 	_NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&statep->state_mutex));
1326 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1327 
1328 	IBTF_DPRINTF_L3(cmlog, "ibcm_close_rc_channel: chan 0x%p statep %p",
1329 	    channel, statep);
1330 
1331 	hcap = statep->hcap;
1332 
1333 	/* HCA must have been in active state. If not, it's a client bug */
1334 	if (!IBCM_ACCESS_HCA_OK(hcap)) {
1335 		IBTF_DPRINTF_L2(cmlog, "ibcm_close_rc_channel: chan 0x%p "
1336 		    "hcap 0x%p not active", channel, hcap);
1337 		IBCM_REF_CNT_DECR(statep);
1338 		mutex_exit(&statep->state_mutex);
1339 		return (IBT_CHAN_HDL_INVALID);
1340 	}
1341 
1342 	if (statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) {
1343 		while (statep->cep_in_rts == IBCM_BLOCK)
1344 			cv_wait(&statep->block_mad_cv, &statep->state_mutex);
1345 	}
1346 
1347 	/* Do TRANSIENT_DREQ check after TRANSIENT_ESTABLISHED check */
1348 	while (statep->state == IBCM_STATE_TRANSIENT_DREQ_SENT)
1349 		cv_wait(&statep->block_mad_cv, &statep->state_mutex);
1350 
1351 	IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: chan 0x%p "
1352 	    "connection state is %x", channel, statep->state);
1353 
1354 	/* If state is in pre-established states, abort the connection est */
1355 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1356 		statep->cm_retries++;	/* ensure connection trace is dumped */
1357 
1358 		/* No DREP private data possible */
1359 		if (statep->close_ret_priv_data_len != NULL)
1360 			*statep->close_ret_priv_data_len = 0;
1361 
1362 		/*
1363 		 * If waiting for a response mad, then cancel the timer,
1364 		 * and delete the connection
1365 		 */
1366 		if (statep->state == IBCM_STATE_REQ_SENT ||
1367 		    statep->state == IBCM_STATE_REP_SENT ||
1368 		    statep->state == IBCM_STATE_REP_WAIT ||
1369 		    statep->state == IBCM_STATE_MRA_REP_RCVD) {
1370 			timeout_id_t		timer_val = statep->timerid;
1371 			ibcm_conn_state_t	old_state;
1372 
1373 			IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
1374 			    "chan 0x%p connection aborted in state %x", channel,
1375 			    statep->state);
1376 
1377 			old_state = statep->state;
1378 			statep->state = IBCM_STATE_DELETE;
1379 
1380 			if (mode == IBT_NONBLOCKING) {
1381 				if (taskq_dispatch(ibcm_taskq,
1382 				    ibcm_process_abort_via_taskq, statep,
1383 				    TQ_NOSLEEP) == 0) {
1384 
1385 					IBCM_REF_CNT_DECR(statep);
1386 					statep->state = old_state;
1387 					mutex_exit(&statep->state_mutex);
1388 					return (IBT_INSUFF_KERNEL_RESOURCE);
1389 				}	/* if taskq_dispatch succeeds */
1390 				/* Cancel the timer */
1391 				statep->timerid = 0;
1392 				mutex_exit(&statep->state_mutex);
1393 			} else {
1394 				/* Cancel the timer */
1395 				statep->timerid = 0;
1396 				mutex_exit(&statep->state_mutex);
1397 				(void) taskq_dispatch(ibcm_taskq,
1398 				    ibcm_process_abort_via_taskq, statep,
1399 				    TQ_SLEEP);
1400 			}
1401 
1402 			/* cancel the currently running timer */
1403 			if (timer_val != 0)
1404 				(void) untimeout(timer_val);
1405 
1406 			/* wait until cm handler returns for BLOCKING cases */
1407 			mutex_enter(&statep->state_mutex);
1408 			if ((mode == IBT_BLOCKING) ||
1409 			    (mode == IBT_NOCALLBACKS)) {
1410 				while (statep->close_done != B_TRUE)
1411 					cv_wait(&statep->block_client_cv,
1412 					    &statep->state_mutex);
1413 			}
1414 
1415 			if (statep->close_ret_status)
1416 				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
1417 			mutex_exit(&statep->state_mutex);
1418 
1419 			/*
1420 			 * It would ideal to post a REJ MAD, but that would
1421 			 * be non-conformance to spec. Hence, delete the state
1422 			 * data. Assuming that happens quickly, any retransmits
1423 			 * from the remote are replied by CM with reject
1424 			 * reason " no valid com id". That would stop remote
1425 			 * sending any more MADs.
1426 			 */
1427 			ibcm_delete_state_data(statep);
1428 			return (IBT_SUCCESS);
1429 
1430 		/* if CM busy in cm handler, wait until cm handler returns */
1431 		} else if (statep->state == IBCM_STATE_REQ_RCVD ||
1432 		    statep->state == IBCM_STATE_REP_RCVD ||
1433 		    statep->state == IBCM_STATE_MRA_SENT ||
1434 		    statep->state == IBCM_STATE_MRA_REP_SENT) {
1435 
1436 			/* take control of statep */
1437 			statep->abort_flag |= IBCM_ABORT_CLIENT;
1438 
1439 			IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
1440 			    "chan 0x%p connection aborted in state = %x",
1441 			    channel, statep->state);
1442 
1443 			/*
1444 			 * wait until state machine modifies qp state to error,
1445 			 * including disassociating statep and QP
1446 			 */
1447 			if ((mode == IBT_BLOCKING) || (mode == IBT_NOCALLBACKS))
1448 				while (statep->close_done != B_TRUE)
1449 					cv_wait(&statep->block_client_cv,
1450 					    &statep->state_mutex);
1451 
1452 			/* a sanity setting */
1453 			if (mode == IBT_NOCALLBACKS)
1454 				statep->cm_handler = NULL;
1455 			IBCM_REF_CNT_DECR(statep);
1456 
1457 			/*
1458 			 * In rare situations, connection attempt could be
1459 			 * terminated for some other reason, before abort is
1460 			 * processed, but CM still returns ret_status as abort
1461 			 */
1462 			if (statep->close_ret_status)
1463 				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
1464 			mutex_exit(&statep->state_mutex);
1465 
1466 			/*
1467 			 * REJ MAD is posted by the CM state machine for this
1468 			 * case, hence state structure is deleted in the
1469 			 * state machine processing.
1470 			 */
1471 			return (IBT_SUCCESS);
1472 
1473 		} else if ((statep->state == IBCM_STATE_TIMEWAIT) ||
1474 		    (statep->state == IBCM_STATE_DELETE)) {
1475 
1476 			/* State already in timewait, so no return priv data */
1477 			IBCM_REF_CNT_DECR(statep);
1478 
1479 			/* The teardown has already been done */
1480 			if (statep->close_ret_status)
1481 				*statep->close_ret_status =
1482 				    IBT_CM_CLOSED_ALREADY;
1483 			mutex_exit(&statep->state_mutex);
1484 
1485 			return (IBT_SUCCESS);
1486 
1487 		} else if ((statep->state == IBCM_STATE_DREQ_RCVD) ||
1488 		    (statep->state == IBCM_STATE_DREQ_SENT) ||
1489 		    (statep->state == IBCM_STATE_DREP_RCVD) ||
1490 		    ((statep->state == IBCM_STATE_TIMED_OUT) &&
1491 		    (statep->timedout_state == IBCM_STATE_DREQ_SENT))) {
1492 
1493 			/*
1494 			 * Either the remote or local client has already
1495 			 * initiated the teardown.  IBCM_STATE_DREP_RCVD is
1496 			 * possible, if CM initiated teardown without client's
1497 			 * knowledge, for stale handling, etc.,
1498 			 */
1499 			if (mode == IBT_NOCALLBACKS) {
1500 				if (statep->close_nocb_state == IBCM_UNBLOCK) {
1501 					statep->close_nocb_state = IBCM_FAIL;
1502 					/* enable free qp after return */
1503 					ibtl_cm_chan_is_closing(
1504 					    statep->channel);
1505 				} else while (statep->close_nocb_state ==
1506 				    IBCM_BLOCK)
1507 					cv_wait(&statep->block_client_cv,
1508 					    &statep->state_mutex);
1509 				statep->cm_handler = NULL; /* sanity setting */
1510 				if (statep->close_ret_status)
1511 					*statep->close_ret_status =
1512 					    IBT_CM_CLOSED_ALREADY;
1513 			} else if (mode == IBT_BLOCKING) {
1514 				/* wait until state is moved to timewait */
1515 				while (statep->close_done != B_TRUE)
1516 					cv_wait(&statep->block_client_cv,
1517 					    &statep->state_mutex);
1518 			}
1519 
1520 			IBCM_REF_CNT_DECR(statep);
1521 			mutex_exit(&statep->state_mutex);
1522 
1523 			/* ret_status is set in state machine code */
1524 			return (IBT_SUCCESS);
1525 
1526 		} else if (statep->state == IBCM_STATE_TIMED_OUT) {
1527 
1528 			if ((mode == IBT_BLOCKING) ||
1529 			    (mode == IBT_NOCALLBACKS)) {
1530 
1531 				/*
1532 				 * wait until cm handler invocation and
1533 				 * disassociation between statep and channel
1534 				 * is complete
1535 				 */
1536 				while (statep->close_done != B_TRUE)
1537 					cv_wait(&statep->block_client_cv,
1538 					    &statep->state_mutex);
1539 			}
1540 
1541 			if (statep->close_ret_status)
1542 				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
1543 			IBCM_REF_CNT_DECR(statep);
1544 			mutex_exit(&statep->state_mutex);
1545 
1546 			return (IBT_SUCCESS);
1547 		} else {
1548 			IBCM_REF_CNT_DECR(statep);
1549 			mutex_exit(&statep->state_mutex);
1550 
1551 			return (IBT_CM_FAILURE);
1552 		}
1553 	}
1554 
1555 	ASSERT(statep->close_nocb_state != IBCM_BLOCK);
1556 
1557 	if (mode == IBT_NOCALLBACKS) {
1558 		statep->close_nocb_state = IBCM_FAIL;
1559 		statep->cm_handler = NULL;
1560 		ibtl_cm_chan_is_closing(statep->channel);
1561 		IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
1562 		    "NOCALLBACKS on in statep = %p", statep);
1563 	}
1564 
1565 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1566 		goto lost_race;
1567 	}
1568 
1569 	/*
1570 	 * Cancel/wait for any pending ibt_set_alt_path, and
1571 	 * release state mutex
1572 	 */
1573 	ibcm_sync_lapr_idle(statep);
1574 
1575 	ibcm_close_enter();
1576 
1577 	mutex_enter(&statep->state_mutex);
1578 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1579 		ibcm_close_exit();
1580 		goto lost_race;
1581 	}
1582 
1583 	statep->state = IBCM_STATE_TRANSIENT_DREQ_SENT;
1584 	statep->timerid = 0;
1585 	statep->close_done = B_FALSE;
1586 	statep->close_flow = 1;
1587 	mutex_exit(&statep->state_mutex);
1588 
1589 	ibcm_post_dreq_mad(statep);
1590 
1591 	mutex_enter(&statep->state_mutex);
1592 
1593 lost_race:
1594 	if (mode == IBT_BLOCKING) {
1595 
1596 		/* wait for DREP */
1597 		while (statep->close_done != B_TRUE)
1598 			cv_wait(&statep->block_client_cv,
1599 			    &statep->state_mutex);
1600 
1601 		IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: chan 0x%p "
1602 		    "done blocking", channel);
1603 	}
1604 
1605 	IBCM_REF_CNT_DECR(statep);
1606 	mutex_exit(&statep->state_mutex);
1607 
1608 	/* If this message isn't seen then ibt_close_rc_channel failed */
1609 	IBTF_DPRINTF_L5(cmlog, "ibcm_close_rc_channel: chan 0x%p done",
1610 	    channel);
1611 
1612 	return (IBT_SUCCESS);
1613 }
1614 
1615 ibt_status_t
1616 ibt_recycle_rc(ibt_channel_hdl_t rc_chan, ibt_cep_flags_t control,
1617     uint8_t hca_port_num, ibt_recycle_handler_t func, void *arg)
1618 {
1619 	ibcm_state_data_t		*statep;
1620 	ibcm_taskq_recycle_arg_t	*ibcm_tq_recycle_arg;
1621 	ibt_qp_query_attr_t		qp_attr;
1622 	ibt_status_t			retval;
1623 
1624 	IBTF_DPRINTF_L3(cmlog, "ibt_recycle_rc (%p, 0x%X, %d, %p, %p)", rc_chan,
1625 	    control, hca_port_num, func, arg);
1626 
1627 	if (IBCM_INVALID_CHANNEL(rc_chan)) {
1628 		IBTF_DPRINTF_L2(cmlog, "ibt_recycle_rc: invalid channel");
1629 		return (IBT_CHAN_HDL_INVALID);
1630 	}
1631 
1632 	/* check qp state */
1633 	retval = ibt_query_qp(rc_chan, &qp_attr);
1634 
1635 	if (retval != IBT_SUCCESS)
1636 		return (retval);
1637 
1638 	if (qp_attr.qp_info.qp_trans != IBT_RC_SRV)
1639 		return (IBT_CHAN_SRV_TYPE_INVALID);
1640 
1641 	if (qp_attr.qp_info.qp_state != IBT_STATE_ERROR)
1642 		return (IBT_CHAN_STATE_INVALID);
1643 
1644 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibcm_tq_recycle_arg))
1645 
1646 	ibcm_tq_recycle_arg = kmem_alloc(sizeof (ibcm_taskq_recycle_arg_t),
1647 	    KM_SLEEP);
1648 
1649 	ibcm_tq_recycle_arg->rc_chan		= rc_chan;
1650 	ibcm_tq_recycle_arg->control		= control;
1651 	ibcm_tq_recycle_arg->hca_port_num	= hca_port_num;
1652 	ibcm_tq_recycle_arg->func		= func;
1653 	ibcm_tq_recycle_arg->arg		= arg;
1654 
1655 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ibcm_tq_recycle_arg))
1656 
1657 	IBCM_GET_CHAN_PRIVATE(rc_chan, statep);
1658 
1659 	/*
1660 	 * If non-blocking ie., func specified and channel has not yet completed
1661 	 * the timewait, then schedule the work for later
1662 	 */
1663 	if ((func != NULL) && (statep != NULL)) {
1664 		IBCM_RELEASE_CHAN_PRIVATE(rc_chan);
1665 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(statep->recycle_arg))
1666 		statep->recycle_arg = ibcm_tq_recycle_arg;
1667 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(statep->recycle_arg))
1668 		return (IBT_SUCCESS);
1669 	}
1670 
1671 	/*
1672 	 * if blocking ie., func specified, and channel has not yet completed
1673 	 * the timewait, then block until the channel completes the timewait
1674 	 */
1675 	if (statep != NULL)
1676 		IBCM_RELEASE_CHAN_PRIVATE(rc_chan);
1677 	IBCM_WAIT_CHAN_PRIVATE(rc_chan);
1678 
1679 	if (func) {	/* NON BLOCKING case. Taskq for QP state change */
1680 		(void) taskq_dispatch(ibcm_taskq, ibcm_process_rc_recycle,
1681 		    ibcm_tq_recycle_arg, TQ_SLEEP);
1682 		return (IBT_SUCCESS);
1683 	} else	/* BLOCKING case */
1684 		return (ibcm_process_rc_recycle_ret(ibcm_tq_recycle_arg));
1685 }
1686 
1687 void
1688 ibcm_process_rc_recycle(void *recycle_arg)
1689 {
1690 	(void) ibcm_process_rc_recycle_ret(recycle_arg);
1691 }
1692 
1693 static ibt_status_t
1694 ibcm_process_rc_recycle_ret(void *recycle_arg)
1695 {
1696 	ibt_qp_info_t			qp_info;
1697 	ibt_status_t			ibt_status = IBT_SUCCESS;
1698 	ibt_cep_modify_flags_t		cep_flags;
1699 	ibt_qp_query_attr_t		qp_attr;
1700 	ibcm_taskq_recycle_arg_t	*ibcm_tq_recycle_arg =
1701 	    (ibcm_taskq_recycle_arg_t *)recycle_arg;
1702 
1703 	/* QP must have been in error state */
1704 	ibt_status = ibt_query_qp(ibcm_tq_recycle_arg->rc_chan, &qp_attr);
1705 	if (ibt_status != IBT_SUCCESS)
1706 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
1707 		    "chanp %p ibt_query_qp() = %d",
1708 		    ibcm_tq_recycle_arg->rc_chan, ibt_status);
1709 	else {
1710 		/* perform the QP state change from ERROR to RESET */
1711 		bzero(&qp_info, sizeof (qp_info));
1712 
1713 		qp_info.qp_trans = IBT_RC_SRV;
1714 		qp_info.qp_state = IBT_STATE_RESET;
1715 
1716 		/* Call modify_qp to move to RESET state */
1717 		ibt_status = ibt_modify_qp(ibcm_tq_recycle_arg->rc_chan,
1718 		    IBT_CEP_SET_STATE, &qp_info, NULL);
1719 
1720 		if (ibt_status != IBT_SUCCESS)
1721 			IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
1722 			    "chanp %p ibt_modify_qp() = %d for ERROR to RESET",
1723 			    ibcm_tq_recycle_arg->rc_chan, ibt_status);
1724 	}
1725 
1726 	if (ibt_status == IBT_SUCCESS) {
1727 
1728 		qp_info.qp_state = IBT_STATE_INIT;
1729 
1730 		/* set flags for all mandatory args from RESET to INIT */
1731 		cep_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_PORT;
1732 		cep_flags |= IBT_CEP_SET_RDMA_R | IBT_CEP_SET_RDMA_W;
1733 		cep_flags |= IBT_CEP_SET_ATOMIC;
1734 
1735 		qp_info.qp_transport.rc.rc_path.cep_hca_port_num =
1736 		    ibcm_tq_recycle_arg->hca_port_num;
1737 		qp_info.qp_flags |=
1738 		    ibcm_tq_recycle_arg->control & IBT_CEP_RDMA_RD;
1739 		qp_info.qp_flags |=
1740 		    ibcm_tq_recycle_arg->control & IBT_CEP_RDMA_WR;
1741 		qp_info.qp_flags |=
1742 		    ibcm_tq_recycle_arg->control & IBT_CEP_ATOMIC;
1743 
1744 		/* Always use the existing pkey */
1745 		qp_info.qp_transport.rc.rc_path.cep_pkey_ix =
1746 		    qp_attr. qp_info.qp_transport.rc.rc_path.cep_pkey_ix;
1747 
1748 		/* Call modify_qp to move to INIT state */
1749 		ibt_status = ibt_modify_qp(ibcm_tq_recycle_arg->rc_chan,
1750 		    cep_flags, &qp_info, NULL);
1751 
1752 		if (ibt_status != IBT_SUCCESS)
1753 			IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
1754 			    "chanp %p ibt_modify_qp() = %d for RESET to INIT",
1755 			    ibcm_tq_recycle_arg->rc_chan, ibt_status);
1756 	}
1757 
1758 	/* Change the QP CM state to indicate QP being re-used */
1759 	if (ibt_status == IBT_SUCCESS)
1760 		ibtl_cm_chan_is_reused(ibcm_tq_recycle_arg->rc_chan);
1761 
1762 	/* Call func, if defined */
1763 	if (ibcm_tq_recycle_arg->func)
1764 		(*(ibcm_tq_recycle_arg->func))(ibt_status,
1765 		    ibcm_tq_recycle_arg->arg);
1766 
1767 	kmem_free(ibcm_tq_recycle_arg, sizeof (ibcm_taskq_recycle_arg_t));
1768 
1769 	return (ibt_status);
1770 }
1771 
1772 static void
1773 ibcm_process_abort_via_taskq(void *args)
1774 {
1775 	ibcm_state_data_t	*statep = (ibcm_state_data_t *)args;
1776 
1777 	ibcm_process_abort(statep);
1778 	mutex_enter(&statep->state_mutex);
1779 	IBCM_REF_CNT_DECR(statep);
1780 	mutex_exit(&statep->state_mutex);
1781 }
1782 
1783 /*
1784  * Local UD CM Handler's private data, used during ibt_request_ud_dest() in
1785  * Non-Blocking mode operations.
1786  */
1787 typedef struct ibcm_local_handler_s {
1788 	ibt_cm_ud_handler_t	actual_cm_handler;
1789 	void			*actual_cm_private;
1790 	ibt_ud_dest_t		*dest_hdl;
1791 } ibcm_local_handler_t;
1792 
1793 _NOTE(READ_ONLY_DATA(ibcm_local_handler_s))
1794 
1795 /*
1796  * Local UD CM Handler, used when ibt_alloc_ud_dest() is issued in
1797  * NON-Blocking mode.
1798  *
1799  * Out here, we update the UD Destination handle with
1800  * the obtained DQPN and QKey (from SIDR REP) and invokes actual client
1801  * handler that was specified by the client.
1802  */
1803 static ibt_cm_status_t
1804 ibcm_local_cm_handler(void *priv, ibt_cm_ud_event_t *event,
1805     ibt_cm_ud_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
1806 {
1807 	ibcm_local_handler_t	*handler_priv = (ibcm_local_handler_t *)priv;
1808 
1809 	IBTF_DPRINTF_L4(cmlog, "ibcm_local_cm_handler: event %d",
1810 	    event->cm_type);
1811 
1812 	ASSERT(handler_priv != NULL);
1813 
1814 	switch (event->cm_type) {
1815 	case IBT_CM_UD_EVENT_SIDR_REP:
1816 		/* Update QPN & QKey from event into destination handle. */
1817 		if (handler_priv->dest_hdl != NULL) {
1818 			handler_priv->dest_hdl->ud_dst_qpn =
1819 			    event->cm_event.sidr_rep.srep_remote_qpn;
1820 			handler_priv->dest_hdl->ud_qkey =
1821 			    event->cm_event.sidr_rep.srep_remote_qkey;
1822 		}
1823 
1824 		/* Invoke the client handler - inform only, so ignore retval */
1825 		(void) handler_priv->actual_cm_handler(
1826 		    handler_priv->actual_cm_private, event, ret_args, priv_data,
1827 		    len);
1828 
1829 		/* Free memory allocated for local handler's private data. */
1830 		if (handler_priv != NULL)
1831 			kmem_free(handler_priv, sizeof (*handler_priv));
1832 
1833 		break;
1834 	default:
1835 		IBTF_DPRINTF_L2(cmlog, "ibcm_local_cm_handler: ERROR");
1836 		break;
1837 	}
1838 
1839 	return (IBT_CM_ACCEPT);
1840 }
1841 
1842 
1843 /* Validate the input UD destination attributes.  */
1844 static ibt_status_t
1845 ibcm_validate_dqpn_data(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
1846     ibt_ud_returns_t *ret_args)
1847 {
1848 	/* cm handler must always be specified */
1849 	if (mode == IBT_NONBLOCKING && attr->ud_cm_handler == NULL) {
1850 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1851 		    "CM handler is not specified ");
1852 		return (IBT_INVALID_PARAM);
1853 	}
1854 
1855 	if (mode == IBT_NONBLOCKING) {
1856 		if (ret_args != NULL) {
1857 			IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1858 			    "ret_args should be NULL when called in "
1859 			    "non-blocking mode");
1860 			return (IBT_INVALID_PARAM);
1861 		}
1862 	} else if (mode == IBT_BLOCKING) {
1863 		if (ret_args == NULL) {
1864 			IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1865 			    "ret_args should be Non-NULL when called in "
1866 			    "blocking mode");
1867 			return (IBT_INVALID_PARAM);
1868 		}
1869 	} else {
1870 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1871 		    "invalid mode %x specified ", mode);
1872 		return (IBT_INVALID_PARAM);
1873 	}
1874 
1875 	if (attr->ud_sid == 0) {
1876 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1877 		    "ServiceID must be specified. ");
1878 		return (IBT_INVALID_PARAM);
1879 	}
1880 
1881 	if (attr->ud_addr == NULL) {
1882 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1883 		    "Address Info NULL");
1884 		return (IBT_INVALID_PARAM);
1885 	}
1886 
1887 	/* Validate SGID */
1888 	if ((attr->ud_addr->av_sgid.gid_prefix == 0) ||
1889 	    (attr->ud_addr->av_sgid.gid_guid == 0)) {
1890 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: Invalid SGID");
1891 		return (IBT_INVALID_PARAM);
1892 	}
1893 	IBTF_DPRINTF_L3(cmlog, "ibcm_validate_dqpn_data: SGID<%llX:%llX>",
1894 	    attr->ud_addr->av_sgid.gid_prefix,
1895 	    attr->ud_addr->av_sgid.gid_guid);
1896 
1897 	/* Validate DGID */
1898 	if ((attr->ud_addr->av_dgid.gid_prefix == 0) ||
1899 	    (attr->ud_addr->av_dgid.gid_guid == 0)) {
1900 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: Invalid DGID");
1901 		return (IBT_INVALID_PARAM);
1902 	}
1903 	IBTF_DPRINTF_L3(cmlog, "ibcm_validate_dqpn_data: DGID<%llX:%llX>",
1904 	    attr->ud_addr->av_dgid.gid_prefix,
1905 	    attr->ud_addr->av_dgid.gid_guid);
1906 
1907 	return (IBT_SUCCESS);
1908 }
1909 
1910 
1911 /* Perform SIDR to retrieve DQPN and QKey.  */
1912 static ibt_status_t
1913 ibcm_ud_get_dqpn(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
1914     ibt_ud_returns_t *ret_args)
1915 {
1916 	ibt_status_t		retval;
1917 	ib_pkey_t		ud_pkey;
1918 	ibmf_handle_t		ibmf_hdl;
1919 	ibmf_msg_t		*ibmf_msg;
1920 	ibcm_hca_info_t		*hcap;
1921 	ibcm_sidr_req_msg_t	*sidr_req_msgp;
1922 	ibcm_ud_state_data_t	*ud_statep;
1923 	ibtl_cm_hca_port_t	port;
1924 	ibcm_sidr_srch_t	sidr_entry;
1925 	ibcm_qp_list_t		*cm_qp_entry;
1926 
1927 	/* Retrieve HCA GUID value from the available SGID info. */
1928 	retval = ibtl_cm_get_hca_port(attr->ud_addr->av_sgid, 0, &port);
1929 	if ((retval != IBT_SUCCESS) || (port.hp_port == 0)) {
1930 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1931 		    "ibtl_cm_get_hca_port failed: %d", retval);
1932 		return (retval);
1933 	}
1934 
1935 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: "
1936 	    "HCA GUID:%llX, port_num:%d", port.hp_hca_guid, port.hp_port);
1937 
1938 	/* Lookup the HCA info for this GUID */
1939 	if ((hcap = ibcm_find_hca_entry(port.hp_hca_guid)) == NULL) {
1940 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: hcap is NULL");
1941 		return (IBT_HCA_INVALID);
1942 	}
1943 
1944 	/* Return failure if the HCA device or Port is not operational */
1945 
1946 	if ((retval = ibt_get_port_state_byguid(port.hp_hca_guid, port.hp_port,
1947 	    NULL, NULL)) != IBT_SUCCESS) {
1948 		/* Device Port is not in good state, don't use it. */
1949 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: Invalid "
1950 		    "port specified or port not active");
1951 		ibcm_dec_hca_acc_cnt(hcap);
1952 		return (retval);
1953 	}
1954 
1955 	retval = ibt_index2pkey_byguid(port.hp_hca_guid, port.hp_port,
1956 	    attr->ud_pkey_ix, &ud_pkey);
1957 	if (retval != IBT_SUCCESS) {
1958 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1959 		    "Failed to convert index2pkey: %d", retval);
1960 		ibcm_dec_hca_acc_cnt(hcap);
1961 		return (retval);
1962 	}
1963 
1964 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(sidr_entry))
1965 
1966 	/* Allocate a new request id */
1967 	if (ibcm_alloc_reqid(hcap, &sidr_entry.srch_req_id) == IBCM_FAILURE) {
1968 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1969 		    "no req id available");
1970 		ibcm_dec_hca_acc_cnt(hcap);
1971 		return (IBT_INSUFF_KERNEL_RESOURCE);
1972 	}
1973 
1974 	if ((hcap->hca_port_info[port.hp_port - 1].port_ibmf_hdl == NULL) &&
1975 	    ((retval = ibcm_hca_reinit_port(hcap, port.hp_port - 1))
1976 	    != IBT_SUCCESS)) {
1977 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1978 		    "ibmf reg or callback setup failed during re-initialize");
1979 		return (retval);
1980 	}
1981 
1982 	ibmf_hdl = hcap->hca_port_info[port.hp_port - 1].port_ibmf_hdl;
1983 
1984 	/* find the ibmf QP to post the SIDR REQ */
1985 	if ((cm_qp_entry = ibcm_find_qp(hcap, port.hp_port, ud_pkey)) ==
1986 	    NULL) {
1987 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: IBMF QP allocation"
1988 		    " failed");
1989 		ibcm_dec_hca_acc_cnt(hcap);
1990 		return (IBT_INSUFF_RESOURCE);
1991 	}
1992 
1993 	if ((retval = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg, MAD_METHOD_SEND))
1994 	    != IBT_SUCCESS) {
1995 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: IBMF MSG allocation"
1996 		    " failed");
1997 		ibcm_release_qp(cm_qp_entry);
1998 		ibcm_dec_hca_acc_cnt(hcap);
1999 		return (retval);
2000 	}
2001 
2002 	sidr_entry.srch_lid = port.hp_base_lid;
2003 	sidr_entry.srch_gid = attr->ud_addr->av_sgid;
2004 	sidr_entry.srch_grh_exists = attr->ud_addr->av_send_grh;
2005 	sidr_entry.srch_mode = IBCM_ACTIVE_MODE;
2006 
2007 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(sidr_entry))
2008 
2009 	/* do various allocations needed here */
2010 	rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);
2011 
2012 	(void) ibcm_find_sidr_entry(&sidr_entry, hcap, &ud_statep,
2013 	    IBCM_FLAG_ADD);
2014 	rw_exit(&hcap->hca_sidr_list_lock);
2015 
2016 	/* Increment hca's resource count */
2017 	ibcm_inc_hca_res_cnt(hcap);
2018 
2019 	/* After a resource created on hca, no need to hold the acc cnt */
2020 	ibcm_dec_hca_acc_cnt(hcap);
2021 
2022 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ud_statep))
2023 
2024 	/* Initialize some ud_statep fields */
2025 	ud_statep->ud_stored_msg = ibmf_msg;
2026 	ud_statep->ud_svc_id = attr->ud_sid;
2027 	ud_statep->ud_pkt_life_time =
2028 	    ibt_ib2usec(attr->ud_pkt_lt);
2029 	ud_statep->ud_stored_reply_addr.cm_qp_entry = cm_qp_entry;
2030 
2031 	/* set remaining retry cnt */
2032 	ud_statep->ud_remaining_retry_cnt = ud_statep->ud_max_cm_retries;
2033 
2034 	/*
2035 	 * Get UD handler and corresponding args which is pass it back
2036 	 * as first argument for the handler.
2037 	 */
2038 	ud_statep->ud_state_cm_private = attr->ud_cm_private;
2039 
2040 	if (mode == IBT_BLOCKING)
2041 		ud_statep->ud_return_data = ret_args;
2042 	else
2043 		ud_statep->ud_cm_handler = attr->ud_cm_handler;
2044 
2045 	/* Initialize the fields of ud_statep->ud_stored_reply_addr */
2046 	ud_statep->ud_stored_reply_addr.grh_exists = attr->ud_addr->av_send_grh;
2047 	ud_statep->ud_stored_reply_addr.ibmf_hdl = ibmf_hdl;
2048 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_hop_limit =
2049 	    attr->ud_addr->av_hop;
2050 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_sender_gid =
2051 	    attr->ud_addr->av_sgid;
2052 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_recver_gid =
2053 	    attr->ud_addr->av_dgid;
2054 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_tclass =
2055 	    attr->ud_addr->av_tclass;
2056 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_flow_label =
2057 	    attr->ud_addr->av_flow & IB_GRH_FLOW_LABEL_MASK;
2058 
2059 	/* needs to be derived based on the base LID and path bits */
2060 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_local_lid =
2061 	    port.hp_base_lid;
2062 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_remote_lid =
2063 	    attr->ud_addr->av_dlid;
2064 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_p_key = ud_pkey;
2065 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_q_key = IB_GSI_QKEY;
2066 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_service_level =
2067 	    attr->ud_addr->av_srvl;
2068 
2069 	/*
2070 	 * This may be enchanced later, to use a remote qno based on past
2071 	 * redirect rej mad responses. This would be the place to specify
2072 	 * appropriate remote qno
2073 	 */
2074 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_remote_qno = 1;
2075 
2076 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sidr_req_msgp))
2077 
2078 	/* Initialize the SIDR REQ message fields */
2079 	sidr_req_msgp =
2080 	    (ibcm_sidr_req_msg_t *)IBCM_OUT_MSGP(ud_statep->ud_stored_msg);
2081 
2082 	sidr_req_msgp->sidr_req_request_id = h2b32(ud_statep->ud_req_id);
2083 	sidr_req_msgp->sidr_req_service_id = h2b64(attr->ud_sid);
2084 	sidr_req_msgp->sidr_req_pkey = h2b16(ud_pkey);
2085 	IBCM_OUT_HDRP(ud_statep->ud_stored_msg)->AttributeID =
2086 	    h2b16(IBCM_INCOMING_SIDR_REQ + IBCM_ATTR_BASE_ID);
2087 
2088 	if ((attr->ud_priv_data != NULL) && (attr->ud_priv_data_len > 0)) {
2089 		bcopy(attr->ud_priv_data, sidr_req_msgp->sidr_req_private_data,
2090 		    min(attr->ud_priv_data_len, IBT_SIDR_REQ_PRIV_DATA_SZ));
2091 	}
2092 
2093 	/* Send out the SIDR REQ message */
2094 	ud_statep->ud_state = IBCM_STATE_SIDR_REQ_SENT;
2095 	ud_statep->ud_timer_stored_state = IBCM_STATE_SIDR_REQ_SENT;
2096 	IBCM_UD_REF_CNT_INCR(ud_statep); /* for non-blocking SIDR REQ post */
2097 	ud_statep->ud_timer_value = ibt_ib2usec(ibcm_max_sidr_rep_proctime) +
2098 	    (ud_statep->ud_pkt_life_time * 2);
2099 
2100 	IBCM_OUT_HDRP(ud_statep->ud_stored_msg)->TransactionID =
2101 	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_SIDR_REQ,
2102 	    ud_statep->ud_req_id, 0));
2103 
2104 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: timer_value in HZ = %x",
2105 	    ud_statep->ud_timer_value);
2106 
2107 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ud_statep))
2108 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sidr_req_msgp))
2109 
2110 	ibcm_post_ud_mad(ud_statep, ud_statep->ud_stored_msg,
2111 	    ibcm_post_sidr_req_complete, ud_statep);
2112 
2113 	mutex_enter(&ud_statep->ud_state_mutex);
2114 
2115 	/* Wait for SIDR_REP */
2116 	if (mode == IBT_BLOCKING) {
2117 		IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: blocking");
2118 
2119 		while (ud_statep->ud_blocking_done != B_TRUE) {
2120 			cv_wait(&ud_statep->ud_block_client_cv,
2121 			    &ud_statep->ud_state_mutex);
2122 		}
2123 
2124 		IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: finished blocking");
2125 
2126 		if (ret_args->ud_status == IBT_CM_SREP_QPN_VALID) {
2127 			IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: DQPN = %x, "
2128 			    "status = %x, QKey = %x", ret_args->ud_dqpn,
2129 			    ret_args->ud_status, ret_args->ud_qkey);
2130 
2131 		} else {
2132 			IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: Status<%x>",
2133 			    ret_args->ud_status);
2134 			retval = IBT_CM_FAILURE;
2135 		}
2136 	}
2137 
2138 	IBCM_UD_REF_CNT_DECR(ud_statep);
2139 	mutex_exit(&ud_statep->ud_state_mutex);
2140 
2141 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: done");
2142 
2143 	return (retval);
2144 }
2145 
2146 
2147 /*
2148  * Function:
2149  *	ibt_request_ud_dest
2150  * Input:
2151  *	ud_dest		A previously allocated UD destination handle.
2152  *	mode		This function can execute in blocking or non blocking
2153  *			modes.
2154  *	attr		UD destination attributes to be modified.
2155  * Output:
2156  *	ud_ret_args	If the function is called in blocking mode, ud_ret_args
2157  *			should be a pointer to an ibt_ud_returns_t struct.
2158  * Returns:
2159  *	IBT_SUCCESS
2160  * Description:
2161  *	Modify a previously allocated UD destination handle based on the
2162  *	results of doing the SIDR protocol.
2163  */
2164 ibt_status_t
2165 ibt_request_ud_dest(ibt_ud_dest_hdl_t ud_dest, ibt_execution_mode_t mode,
2166     ibt_ud_dest_attr_t *attr, ibt_ud_returns_t *ud_ret_args)
2167 {
2168 	ibt_status_t		retval;
2169 	ibt_ud_dest_t		*ud_destp;
2170 	ibcm_local_handler_t	*local_handler_priv = NULL;
2171 
2172 	IBTF_DPRINTF_L3(cmlog, "ibt_request_ud_dest(%p, %x, %p, %p)",
2173 	    ud_dest, mode, attr, ud_ret_args);
2174 
2175 	retval = ibcm_validate_dqpn_data(attr, mode, ud_ret_args);
2176 	if (retval != IBT_SUCCESS) {
2177 		return (retval);
2178 	}
2179 
2180 	ud_destp = ud_dest;
2181 
2182 	/* Allocate an Address handle. */
2183 	retval = ibt_modify_ah(ud_destp->ud_dest_hca, ud_destp->ud_ah,
2184 	    attr->ud_addr);
2185 	if (retval != IBT_SUCCESS) {
2186 		IBTF_DPRINTF_L2(cmlog, "ibt_request_ud_dest: "
2187 		    "Address Handle Modification failed: %d", retval);
2188 		return (retval);
2189 	}
2190 
2191 	if (mode == IBT_NONBLOCKING) {
2192 		/*
2193 		 * In NON-BLOCKING mode, and we need to update the destination
2194 		 * handle with the DQPN and QKey that are obtained from
2195 		 * SIDR REP, hook-up our own handler, so that we can catch
2196 		 * the event, and we ourselves call the actual client's
2197 		 * ud_cm_handler, in our handler.
2198 		 */
2199 
2200 		/* Allocate memory for local handler's private data. */
2201 		local_handler_priv =
2202 		    kmem_alloc(sizeof (*local_handler_priv), KM_SLEEP);
2203 
2204 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*local_handler_priv))
2205 
2206 		local_handler_priv->actual_cm_handler = attr->ud_cm_handler;
2207 		local_handler_priv->actual_cm_private = attr->ud_cm_private;
2208 		local_handler_priv->dest_hdl = ud_destp;
2209 
2210 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*local_handler_priv))
2211 
2212 		attr->ud_cm_handler = ibcm_local_cm_handler;
2213 		attr->ud_cm_private = local_handler_priv;
2214 	}
2215 
2216 	/* In order to get DQPN and Destination QKey, perform SIDR */
2217 	retval = ibcm_ud_get_dqpn(attr, mode, ud_ret_args);
2218 	if (retval != IBT_SUCCESS) {
2219 		IBTF_DPRINTF_L2(cmlog, "ibt_request_ud_dest: "
2220 		    "Failed to get DQPN: %d", retval);
2221 
2222 		/* Free memory allocated for local handler's private data. */
2223 		if (local_handler_priv != NULL)
2224 			kmem_free(local_handler_priv,
2225 			    sizeof (*local_handler_priv));
2226 		return (retval);
2227 	}
2228 
2229 	/*
2230 	 * Fill in the dqpn and dqkey as obtained from ud_ret_args,
2231 	 * values will be valid only on BLOCKING mode.
2232 	 */
2233 	if (mode == IBT_BLOCKING) {
2234 		ud_destp->ud_dst_qpn = ud_ret_args->ud_dqpn;
2235 		ud_destp->ud_qkey = ud_ret_args->ud_qkey;
2236 	}
2237 
2238 	return (retval);
2239 }
2240 
2241 /*
2242  * Function:
2243  *	ibt_ud_get_dqpn
2244  * Input:
2245  *	attr		A pointer to an ibt_ud_dest_attr_t struct that are
2246  *			required for SIDR REQ message. Not specified attributes
2247  *			should be set to "NULL" or "0".
2248  *			ud_sid, ud_addr and ud_pkt_lt must be specified.
2249  *	mode		This function can execute in blocking or non blocking
2250  *			modes.
2251  * Output:
2252  *	returns		If the function is called in blocking mode, returns
2253  *			should be a pointer to an ibt_ud_returns_t struct.
2254  * Return:
2255  *	IBT_SUCCESS	on success or respective failure on error.
2256  * Description:
2257  *	Finds the destination QPN at the specified destination that the
2258  *	specified service can be reached on. The IBTF CM initiates the
2259  *	service ID resolution protocol (SIDR) to determine a destination QPN.
2260  *
2261  * NOTE: SIDR_REQ is initiated from active side.
2262  */
2263 ibt_status_t
2264 ibt_ud_get_dqpn(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
2265     ibt_ud_returns_t *returns)
2266 {
2267 	ibt_status_t		retval;
2268 
2269 	IBTF_DPRINTF_L3(cmlog, "ibt_ud_get_dqpn(%p, %x, %p)",
2270 	    attr, mode, returns);
2271 
2272 	retval = ibcm_validate_dqpn_data(attr, mode, returns);
2273 	if (retval != IBT_SUCCESS) {
2274 		return (retval);
2275 	}
2276 
2277 	return (ibcm_ud_get_dqpn(attr, mode, returns));
2278 }
2279 
2280 
2281 /*
2282  * ibt_cm_delay:
2283  *	A client CM handler function can call this function
2284  *	to extend its response time to a CM event.
2285  * INPUTS:
2286  *	flags		Indicates what CM message processing is being delayed
2287  *			by the CM handler, valid values are:
2288  *				IBT_CM_DELAY_REQ
2289  *				IBT_CM_DELAY_REP
2290  *				IBT_CM_DELAY_LAP
2291  *	cm_session_id	The session ID that was passed to client srv_handler
2292  *			by the CM
2293  *	service_time	The extended service time
2294  *	priv_data	Vendor specific data to be sent in the CM generated
2295  *			MRA message. Should be NULL if not specified.
2296  *	len		The number of bytes of data specified by priv_data.
2297  *
2298  * RETURN VALUES:
2299  *	IBT_SUCCESS	on success (or respective failure on error)
2300  */
2301 ibt_status_t
2302 ibt_cm_delay(ibt_cmdelay_flags_t flags, void *cm_session_id,
2303     clock_t service_time, void *priv_data, ibt_priv_data_len_t len)
2304 {
2305 	uint8_t			msg_typ = 0;
2306 	ibcm_mra_msg_t		*mra_msgp;
2307 	ibcm_state_data_t	*statep;
2308 	ibt_status_t		status;
2309 
2310 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_delay(0x%x, %p, 0x%x)",
2311 	    flags, cm_session_id, service_time);
2312 
2313 	/*
2314 	 * Make sure channel is associated with a statep
2315 	 */
2316 	statep = (ibcm_state_data_t *)cm_session_id;
2317 
2318 	if (statep == NULL) {
2319 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: statep NULL");
2320 		return (IBT_INVALID_PARAM);
2321 	}
2322 
2323 	IBTF_DPRINTF_L4(cmlog, "ibt_cm_delay: statep %p", statep);
2324 
2325 	/* Allocate an ibmf msg for mra, if not allocated yet */
2326 	if (statep->mra_msg == NULL) {
2327 		if ((status = ibcm_alloc_out_msg(
2328 		    statep->stored_reply_addr.ibmf_hdl, &statep->mra_msg,
2329 		    MAD_METHOD_SEND)) != IBT_SUCCESS) {
2330 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: chan 0x%p"
2331 			    "IBMF MSG allocation failed", statep->channel);
2332 			return (status);
2333 		}
2334 	}
2335 
2336 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mra_msgp))
2337 
2338 	mra_msgp = (ibcm_mra_msg_t *)IBCM_OUT_MSGP(statep->mra_msg);
2339 	mra_msgp->mra_local_comm_id = h2b32(statep->local_comid);
2340 	mra_msgp->mra_remote_comm_id = h2b32(statep->remote_comid);
2341 
2342 	/* fill in rest of MRA's fields - Message MRAed and Service Timeout */
2343 	if (flags == IBT_CM_DELAY_REQ) {
2344 		msg_typ = IBT_CM_MRA_TYPE_REQ;
2345 	} else if (flags == IBT_CM_DELAY_REP) {
2346 		msg_typ = IBT_CM_MRA_TYPE_REP;
2347 	} else if (flags == IBT_CM_DELAY_LAP) {
2348 		msg_typ = IBT_CM_MRA_TYPE_LAP;
2349 	}
2350 
2351 	mra_msgp->mra_message_type_plus = msg_typ << 6;
2352 	mra_msgp->mra_service_timeout_plus = ibt_usec2ib(service_time) << 3;
2353 
2354 	len = min(len, IBT_MRA_PRIV_DATA_SZ);
2355 	if (priv_data && (len > 0))
2356 		bcopy(priv_data, mra_msgp->mra_private_data, len);
2357 
2358 	IBCM_OUT_HDRP(statep->mra_msg)->AttributeID =
2359 	    h2b16(IBCM_INCOMING_MRA + IBCM_ATTR_BASE_ID);
2360 
2361 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mra_msgp))
2362 
2363 	mutex_enter(&statep->state_mutex);
2364 
2365 	if ((statep->mode == IBCM_ACTIVE_MODE) &&
2366 	    (statep->state == IBCM_STATE_REP_RCVD)) {
2367 		statep->state = IBCM_STATE_MRA_REP_SENT;
2368 	} else if (statep->mode == IBCM_PASSIVE_MODE) {
2369 		if (statep->state == IBCM_STATE_REQ_RCVD) {
2370 			statep->state = IBCM_STATE_MRA_SENT;
2371 		} else if (statep->ap_state == IBCM_AP_STATE_LAP_RCVD) {
2372 			statep->ap_state = IBCM_AP_STATE_MRA_LAP_RCVD;
2373 		} else {
2374 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: invalid state "
2375 			    "/ap_state/mode %x, %x, %x", statep->state,
2376 			    statep->ap_state, statep->mode);
2377 			mutex_exit(&statep->state_mutex);
2378 			return (IBT_CHAN_STATE_INVALID);
2379 		}
2380 	} else {
2381 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: invalid state "
2382 		    "/ap_state/mode %x, %x, %x", statep->state,
2383 		    statep->ap_state, statep->mode);
2384 		mutex_exit(&statep->state_mutex);
2385 
2386 		return (IBT_CHAN_STATE_INVALID);
2387 	}
2388 	/* service time is usecs, stale_clock is nsecs */
2389 	statep->stale_clock = gethrtime() +
2390 	    (hrtime_t)ibt_ib2usec(ibt_usec2ib(service_time)) * (1000 *
2391 	    statep->max_cm_retries);
2392 
2393 	statep->send_mad_flags |= IBCM_MRA_POST_BUSY;
2394 	IBCM_REF_CNT_INCR(statep);	/* for ibcm_post_mra_complete */
2395 	mutex_exit(&statep->state_mutex);
2396 
2397 	IBCM_OUT_HDRP(statep->mra_msg)->TransactionID =
2398 	    IBCM_OUT_HDRP(statep->stored_msg)->TransactionID;
2399 
2400 	/* post the MRA mad in blocking mode, as no timers involved */
2401 	ibcm_post_rc_mad(statep, statep->mra_msg, ibcm_post_mra_complete,
2402 	    statep);
2403 	ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_MRA);
2404 	/* If this message isn't seen then ibt_cm_delay failed */
2405 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_delay: done !!");
2406 
2407 	return (IBT_SUCCESS);
2408 }
2409 
2410 
2411 /*
2412  * ibt_register_service()
2413  *	Register a service with the IBCM
2414  *
2415  * INPUTS:
2416  *	ibt_hdl		The IBT client handle returned to the client
2417  *			on an ibt_attach() call.
2418  *
2419  *	srv		The address of a ibt_srv_desc_t that describes
2420  *			the service, containing the following:
2421  *
2422  *		sd_ud_handler	The Service CM UD event Handler.
2423  *		sd_handler	The Service CM RC/UC/RD event Handler.
2424  *		sd_flags	Service flags (peer-to-peer, or not).
2425  *
2426  *	sid		This tells CM if the service is local (sid is 0) or
2427  *			wellknown (sid is the starting service id of the range).
2428  *
2429  *	num_sids	The number of contiguous service-ids to reserve.
2430  *
2431  *	srv_hdl		The address of a service identification handle, used
2432  *			to deregister a service, and to bind GIDs to.
2433  *
2434  *	ret_sid		The address to store the Service ID return value.
2435  *			If num_sids > 1, ret_sid is the first Service ID
2436  *			in the range.
2437  *
2438  * ibt_register_service() returns:
2439  *	IBT_SUCCESS		- added a service successfully.
2440  *	IBT_INVALID_PARAM	- invalid input parameter.
2441  *	IBT_CM_FAILURE		- failed to add the service.
2442  *	IBT_CM_SERVICE_EXISTS	- service already exists.
2443  *	IBT_INSUFF_KERNEL_RESOURCE - ran out of local service ids (should
2444  *				     never happen).
2445  */
2446 ibt_status_t
2447 ibt_register_service(ibt_clnt_hdl_t ibt_hdl, ibt_srv_desc_t *srv,
2448     ib_svc_id_t sid, int num_sids, ibt_srv_hdl_t *srv_hdl, ib_svc_id_t *ret_sid)
2449 {
2450 	ibcm_svc_info_t		*svcinfop;
2451 
2452 	IBTF_DPRINTF_L2(cmlog, "ibt_register_service(%p, %p, %llx, %d)",
2453 	    ibt_hdl, srv, (longlong_t)sid, num_sids);
2454 
2455 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*svcinfop))
2456 
2457 	*srv_hdl = NULL;
2458 
2459 	if (num_sids <= 0) {
2460 		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
2461 		    "Invalid number of service-ids specified (%d)", num_sids);
2462 		return (IBT_INVALID_PARAM);
2463 	}
2464 
2465 	if (sid == 0) {
2466 		if (ret_sid == NULL)
2467 			return (IBT_INVALID_PARAM);
2468 		sid = ibcm_alloc_local_sids(num_sids);
2469 		if (sid == 0)
2470 			return (IBT_INSUFF_KERNEL_RESOURCE);
2471 
2472 	/* Make sure that the ServiceId specified is not of LOCAL AGN type. */
2473 	} else if ((sid & IB_SID_AGN_MASK) == IB_SID_AGN_LOCAL) {
2474 		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
2475 		    "Invalid non-LOCAL SID specified: 0x%llX",
2476 		    (longlong_t)sid);
2477 		return (IBT_INVALID_PARAM);
2478 	}
2479 
2480 	svcinfop = ibcm_create_svc_entry(sid, num_sids);
2481 
2482 	if (svcinfop == NULL) {
2483 		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
2484 		    "Service-ID 0x%llx already registered", (longlong_t)sid);
2485 		return (IBT_CM_SERVICE_EXISTS);
2486 	}
2487 
2488 	/*
2489 	 * 'sid' and 'num_sids' are filled in ibcm_create_svc_entry()
2490 	 */
2491 	svcinfop->svc_flags = srv->sd_flags;
2492 	svcinfop->svc_rc_handler = srv->sd_handler;
2493 	svcinfop->svc_ud_handler = srv->sd_ud_handler;
2494 
2495 	if (ret_sid != NULL)
2496 		*ret_sid = sid;
2497 
2498 	*srv_hdl = svcinfop;
2499 
2500 	ibtl_cm_change_service_cnt(ibt_hdl, num_sids);
2501 
2502 	/* If this message isn't seen, then ibt_register_service failed. */
2503 	IBTF_DPRINTF_L2(cmlog, "ibt_register_service: done (%p, %llX)",
2504 	    svcinfop, sid);
2505 
2506 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*svcinfop))
2507 
2508 	return (IBT_SUCCESS);
2509 }
2510 
2511 
2512 static ibt_status_t
2513 ibcm_write_service_record(ibmf_saa_handle_t saa_handle,
2514     sa_service_record_t *srv_recp, ibmf_saa_access_type_t saa_type)
2515 {
2516 	int	rval;
2517 	int	retry;
2518 
2519 	ibcm_sa_access_enter();
2520 	for (retry = 0; retry < ibcm_max_sa_retries; retry++) {
2521 		rval = ibmf_saa_update_service_record(
2522 		    saa_handle, srv_recp, saa_type, 0);
2523 		if (rval != IBMF_TRANS_TIMEOUT) {
2524 			break;
2525 		}
2526 		IBTF_DPRINTF_L2(cmlog, "ibcm_write_service_record: "
2527 		    "ibmf_saa_update_service_record timed out"
2528 		    " SID = %llX, rval = %d, saa_type = %d",
2529 		    (longlong_t)srv_recp->ServiceID, rval, saa_type);
2530 		delay(ibcm_sa_timeout_delay);
2531 	}
2532 	ibcm_sa_access_exit();
2533 
2534 	if (rval != IBMF_SUCCESS) {
2535 		IBTF_DPRINTF_L2(cmlog, "ibcm_write_service_record: "
2536 		    "ibmf_saa_update_service_record() : Failed - %d", rval);
2537 		return (ibcm_ibmf_analyze_error(rval));
2538 	} else
2539 		return (IBT_SUCCESS);
2540 }
2541 
2542 
2543 static void
2544 ibcm_rem_stale_srec(ibmf_saa_handle_t saa_handle, sa_service_record_t *srec)
2545 {
2546 	ibt_status_t		retval;
2547 	uint_t			num_found;
2548 	size_t			length;
2549 	sa_service_record_t	*srv_resp;
2550 	void			*results_p;
2551 	uint_t			i;
2552 	uint64_t		component_mask;
2553 	ibmf_saa_access_args_t	access_args;
2554 
2555 	component_mask =
2556 	    SA_SR_COMPMASK_PKEY | SA_SR_COMPMASK_NAME | SA_SR_COMPMASK_GID;
2557 
2558 	/* Call in SA Access retrieve routine to get Service Records. */
2559 	access_args.sq_attr_id = SA_SERVICERECORD_ATTRID;
2560 	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
2561 	access_args.sq_component_mask = component_mask;
2562 	access_args.sq_template = srec;
2563 	access_args.sq_template_length = sizeof (sa_service_record_t);
2564 	access_args.sq_callback = NULL;
2565 	access_args.sq_callback_arg = NULL;
2566 
2567 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
2568 	    &results_p);
2569 	if (retval != IBT_SUCCESS) {
2570 		IBTF_DPRINTF_L2(cmlog, "ibcm_rem_stale_srec: "
2571 		    "SA Access Failure");
2572 		return;
2573 	}
2574 
2575 	num_found = length / sizeof (sa_service_record_t);
2576 
2577 	if (num_found)
2578 		IBTF_DPRINTF_L3(cmlog, "ibcm_rem_stale_srec: "
2579 		    "Found %d matching Service Records.", num_found);
2580 
2581 	/* Validate the returned number of records. */
2582 	if ((results_p != NULL) && (num_found > 0)) {
2583 
2584 		/* Remove all the records. */
2585 		for (i = 0; i < num_found; i++) {
2586 
2587 			srv_resp = (sa_service_record_t *)
2588 			    ((uchar_t *)results_p +
2589 			    i * sizeof (sa_service_record_t));
2590 
2591 			/*
2592 			 * Found some matching records, but check out whether
2593 			 * this Record is really stale or just happens to match
2594 			 * the current session records. If yes, don't remove it.
2595 			 */
2596 			mutex_enter(&ibcm_svc_info_lock);
2597 			if (ibcm_find_svc_entry(srv_resp->ServiceID) != NULL) {
2598 				/* This record is NOT STALE. */
2599 				mutex_exit(&ibcm_svc_info_lock);
2600 				IBTF_DPRINTF_L3(cmlog, "ibcm_rem_stale_srec: "
2601 				    "This is not Stale, it's an active record");
2602 				continue;
2603 			}
2604 			mutex_exit(&ibcm_svc_info_lock);
2605 
2606 			IBTF_DPRINTF_L2(cmlog, "ibcm_rem_stale_srec: "
2607 			    "Removing Stale Rec: %s, %llX",
2608 			    srv_resp->ServiceName, srv_resp->ServiceID);
2609 
2610 			IBCM_DUMP_SERVICE_REC(srv_resp);
2611 
2612 			/*
2613 			 * Remove the Service Record Entry from SA.
2614 			 *
2615 			 * Get ServiceID info from Response Buf, other
2616 			 * attributes are already filled-in.
2617 			 */
2618 
2619 			 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(srec->ServiceID))
2620 
2621 			srec->ServiceID = srv_resp->ServiceID;
2622 
2623 			 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(srec->ServiceID))
2624 
2625 			(void) ibcm_write_service_record(saa_handle, srec,
2626 			    IBMF_SAA_DELETE);
2627 		}
2628 
2629 		/* Deallocate the memory for results_p. */
2630 		kmem_free(results_p, length);
2631 	}
2632 }
2633 
2634 
2635 
2636 /*
2637  * ibt_bind_service()
2638  *	Register a service with the IBCM
2639  *
2640  * INPUTS:
2641  *	srv_hdl		The service id handle returned to the client
2642  *			on an ibt_service_register() call.
2643  *
2644  *	gid		The GID to which to bind the service.
2645  *
2646  *	srv_bind	The address of a ibt_srv_bind_t that describes
2647  *			the service record.  This should be NULL if there
2648  *			is to be no service record.  This contains:
2649  *
2650  *		sb_lease	Lease period
2651  *		sb_pkey		Partition
2652  *		sb_name		pointer to ASCII string Service Name,
2653  *				NULL terminated.
2654  *		sb_key[]	Key to secure the service record.
2655  *		sb_data		Service Data structure (64-byte)
2656  *
2657  *	cm_private	First argument of Service handler.
2658  *
2659  *	sb_hdl_p	The address of a service bind handle, used
2660  *			to undo the service binding.
2661  *
2662  * ibt_bind_service() returns:
2663  *	IBT_SUCCESS		- added a service successfully.
2664  *	IBT_INVALID_PARAM	- invalid input parameter.
2665  *	IBT_CM_FAILURE		- failed to add the service.
2666  *	IBT_CM_SERVICE_EXISTS	- service already exists.
2667  */
2668 ibt_status_t
2669 ibt_bind_service(ibt_srv_hdl_t srv_hdl, ib_gid_t gid, ibt_srv_bind_t *srv_bind,
2670     void *cm_private, ibt_sbind_hdl_t *sb_hdl_p)
2671 {
2672 	ibt_status_t		status;
2673 	ibtl_cm_hca_port_t	port;
2674 	ibcm_svc_bind_t		*sbindp, *sbp;
2675 	ibcm_hca_info_t		*hcap;
2676 	ib_svc_id_t		sid, start_sid, end_sid;
2677 	ibmf_saa_handle_t	saa_handle;
2678 	sa_service_record_t	srv_rec;
2679 	uint16_t		pkey_ix;
2680 
2681 	if (sb_hdl_p != NULL)
2682 		*sb_hdl_p = NULL;	/* return value for error cases */
2683 
2684 	IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: srv_hdl %p, gid (%llX:%llX)",
2685 	    srv_hdl, (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid);
2686 
2687 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sbindp))
2688 
2689 	/* Call ibtl_cm_get_hca_port to get the port number and the HCA GUID. */
2690 	if ((status = ibtl_cm_get_hca_port(gid, 0, &port)) != IBT_SUCCESS) {
2691 		IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2692 		    "ibtl_cm_get_hca_port failed: %d", status);
2693 		return (status);
2694 	}
2695 	IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: Port:%d HCA GUID:%llX",
2696 	    port.hp_port, port.hp_hca_guid);
2697 
2698 	hcap = ibcm_find_hca_entry(port.hp_hca_guid);
2699 	if (hcap == NULL) {
2700 		IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: NO HCA found");
2701 		return (IBT_HCA_BUSY_DETACHING);
2702 	}
2703 	IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: hcap = %p", hcap);
2704 
2705 	if (srv_bind != NULL) {
2706 		saa_handle = ibcm_get_saa_handle(hcap, port.hp_port);
2707 		if (saa_handle == NULL) {
2708 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2709 			    "saa_handle is NULL");
2710 			ibcm_dec_hca_acc_cnt(hcap);
2711 			return (IBT_HCA_PORT_NOT_ACTIVE);
2712 		}
2713 		if (srv_bind->sb_pkey == 0) {
2714 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2715 			    "P_Key must not be 0");
2716 			ibcm_dec_hca_acc_cnt(hcap);
2717 			return (IBT_INVALID_PARAM);
2718 		}
2719 		if (strlen(srv_bind->sb_name) >= IB_SVC_NAME_LEN) {
2720 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2721 			    "Service Name is too long");
2722 			ibcm_dec_hca_acc_cnt(hcap);
2723 			return (IBT_INVALID_PARAM);
2724 		} else
2725 			IBTF_DPRINTF_L3(cmlog, "ibt_bind_service: "
2726 			    "Service Name='%s'", srv_bind->sb_name);
2727 		status = ibt_pkey2index_byguid(port.hp_hca_guid,
2728 		    port.hp_port, srv_bind->sb_pkey, &pkey_ix);
2729 		if (status != IBT_SUCCESS) {
2730 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2731 			    "P_Key 0x%x not found in P_Key_Table",
2732 			    srv_bind->sb_pkey);
2733 			ibcm_dec_hca_acc_cnt(hcap);
2734 			return (status);
2735 		}
2736 	}
2737 
2738 	/* assume success - allocate before locking */
2739 	sbindp = kmem_zalloc(sizeof (*sbindp), KM_SLEEP);
2740 	sbindp->sbind_cm_private = cm_private;
2741 	sbindp->sbind_gid = gid;
2742 	sbindp->sbind_hcaguid = port.hp_hca_guid;
2743 	sbindp->sbind_port = port.hp_port;
2744 
2745 	mutex_enter(&ibcm_svc_info_lock);
2746 
2747 	sbp = srv_hdl->svc_bind_list;
2748 	while (sbp != NULL) {
2749 		if (sbp->sbind_gid.gid_guid == gid.gid_guid &&
2750 		    sbp->sbind_gid.gid_prefix == gid.gid_prefix) {
2751 			if (srv_bind == NULL ||
2752 			    srv_bind->sb_pkey == sbp->sbind_pkey) {
2753 				IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2754 				    "failed: GID %llX:%llX and PKEY %x is "
2755 				    "already bound", gid.gid_prefix,
2756 				    gid.gid_guid, sbp->sbind_pkey);
2757 				mutex_exit(&ibcm_svc_info_lock);
2758 				ibcm_dec_hca_acc_cnt(hcap);
2759 				kmem_free(sbindp, sizeof (*sbindp));
2760 				return (IBT_CM_SERVICE_EXISTS);
2761 			}
2762 		}
2763 		sbp = sbp->sbind_link;
2764 	}
2765 	/* no entry found */
2766 
2767 	sbindp->sbind_link = srv_hdl->svc_bind_list;
2768 	srv_hdl->svc_bind_list = sbindp;
2769 
2770 	mutex_exit(&ibcm_svc_info_lock);
2771 
2772 	if (srv_bind != NULL) {
2773 		bzero(&srv_rec, sizeof (srv_rec));
2774 
2775 		srv_rec.ServiceLease =
2776 		    sbindp->sbind_lease = srv_bind->sb_lease;
2777 		srv_rec.ServiceP_Key =
2778 		    sbindp->sbind_pkey = srv_bind->sb_pkey;
2779 		srv_rec.ServiceKey_hi =
2780 		    sbindp->sbind_key[0] = srv_bind->sb_key[0];
2781 		srv_rec.ServiceKey_lo =
2782 		    sbindp->sbind_key[1] = srv_bind->sb_key[1];
2783 		(void) strcpy(sbindp->sbind_name, srv_bind->sb_name);
2784 		(void) strcpy((char *)srv_rec.ServiceName, srv_bind->sb_name);
2785 		srv_rec.ServiceGID = gid;
2786 
2787 		/*
2788 		 * Find out whether we have any stale Local Service records
2789 		 * matching the current attributes.  If yes, we shall try to
2790 		 * remove them from SA using the current request's ServiceKey.
2791 		 *
2792 		 * We will perform this operation only for Local Services, as
2793 		 * it is handled by SA automatically for WellKnown Services.
2794 		 *
2795 		 * Ofcourse, clients can specify NOT to do this clean-up by
2796 		 * setting IBT_SBIND_NO_CLEANUP flag (srv_bind->sb_flag).
2797 		 */
2798 		if ((srv_hdl->svc_id & IB_SID_AGN_LOCAL) &&
2799 		    (!(srv_bind->sb_flag & IBT_SBIND_NO_CLEANUP))) {
2800 			ibcm_rem_stale_srec(saa_handle, &srv_rec);
2801 		}
2802 
2803 		/* Handle endianess for service data. */
2804 		ibcm_swizzle_from_srv(&srv_bind->sb_data, sbindp->sbind_data);
2805 
2806 		bcopy(sbindp->sbind_data, srv_rec.ServiceData, IB_SVC_DATA_LEN);
2807 
2808 		/* insert srv record into the SA */
2809 		start_sid = srv_hdl->svc_id;
2810 		end_sid = start_sid + srv_hdl->svc_num_sids - 1;
2811 		for (sid = start_sid; sid <= end_sid; sid++) {
2812 
2813 			srv_rec.ServiceID = sid;
2814 
2815 			IBCM_DUMP_SERVICE_REC(&srv_rec);
2816 
2817 			IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: "
2818 			    "ibmf_saa_write_service_record, SvcId = %llX",
2819 			    (longlong_t)sid);
2820 
2821 			status = ibcm_write_service_record(saa_handle, &srv_rec,
2822 			    IBMF_SAA_UPDATE);
2823 			if (status != IBT_SUCCESS) {
2824 				IBTF_DPRINTF_L2(cmlog, "ibt_bind_service:"
2825 				    " ibcm_write_service_record fails %d, "
2826 				    "sid %llX", status, (longlong_t)sid);
2827 
2828 				if (sid != start_sid) {
2829 					/*
2830 					 * Bind failed while bind SID other than
2831 					 * first in the sid_range.  So we need
2832 					 * to unbind those, which are passed.
2833 					 *
2834 					 * Need to increment svc count to
2835 					 * compensate for ibt_unbind_service().
2836 					 */
2837 					ibcm_inc_hca_svc_cnt(hcap);
2838 					ibcm_dec_hca_acc_cnt(hcap);
2839 
2840 					(void) ibt_unbind_service(srv_hdl,
2841 					    sbindp);
2842 				} else {
2843 					ibcm_svc_bind_t		**sbpp;
2844 
2845 					/*
2846 					 * Bind failed for the first SID or the
2847 					 * only SID in question, then no need
2848 					 * to unbind, just free memory and
2849 					 * return error.
2850 					 */
2851 					mutex_enter(&ibcm_svc_info_lock);
2852 
2853 					sbpp = &srv_hdl->svc_bind_list;
2854 					sbp = *sbpp;
2855 					while (sbp != NULL) {
2856 						if (sbp == sbindp) {
2857 							*sbpp = sbp->sbind_link;
2858 							break;
2859 						}
2860 						sbpp = &sbp->sbind_link;
2861 						sbp = *sbpp;
2862 					}
2863 					mutex_exit(&ibcm_svc_info_lock);
2864 					ibcm_dec_hca_acc_cnt(hcap);
2865 
2866 					kmem_free(sbindp, sizeof (*sbindp));
2867 				}
2868 				return (status);
2869 			}
2870 		}
2871 	}
2872 	ibcm_inc_hca_svc_cnt(hcap);
2873 	ibcm_dec_hca_acc_cnt(hcap);
2874 
2875 	/* If this message isn't seen then ibt_bind_service failed */
2876 	IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: DONE (%p, %llX:%llX)",
2877 	    srv_hdl, gid.gid_prefix, gid.gid_guid);
2878 
2879 	if (sb_hdl_p != NULL)
2880 		*sb_hdl_p = sbindp;
2881 
2882 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sbindp))
2883 
2884 	return (IBT_SUCCESS);
2885 }
2886 
2887 ibt_status_t
2888 ibt_unbind_service(ibt_srv_hdl_t srv_hdl, ibt_sbind_hdl_t sbindp)
2889 {
2890 	ib_svc_id_t	sid, end_sid;
2891 	ibt_status_t	rval;
2892 	ibcm_hca_info_t	*hcap;
2893 	ibcm_svc_bind_t	*sbp, **sbpp;
2894 
2895 	IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service(%p, %p)",
2896 	    srv_hdl, sbindp);
2897 
2898 	hcap = ibcm_find_hca_entry(sbindp->sbind_hcaguid);
2899 
2900 	/* If there is a service on hca, respective hcap cannot go away */
2901 	ASSERT(hcap != NULL);
2902 
2903 	mutex_enter(&ibcm_svc_info_lock);
2904 
2905 	sbpp = &srv_hdl->svc_bind_list;
2906 	sbp = *sbpp;
2907 	while (sbp != NULL) {
2908 		if (sbp == sbindp) {
2909 			*sbpp = sbp->sbind_link;
2910 			break;
2911 		}
2912 		sbpp = &sbp->sbind_link;
2913 		sbp = *sbpp;
2914 	}
2915 	sid = srv_hdl->svc_id;
2916 	end_sid = srv_hdl->svc_id + srv_hdl->svc_num_sids - 1;
2917 	if (sbp != NULL)
2918 		while (sbp->sbind_rewrite_state == IBCM_REWRITE_BUSY)
2919 			cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
2920 	mutex_exit(&ibcm_svc_info_lock);
2921 
2922 	if (sbp == NULL) {
2923 		IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2924 		    "service binding not found: srv_hdl %p, srv_bind %p",
2925 		    srv_hdl, sbindp);
2926 		ibcm_dec_hca_acc_cnt(hcap);
2927 		return (IBT_INVALID_PARAM);
2928 	}
2929 
2930 	if (sbindp->sbind_pkey != 0) {	/* Are there service records? */
2931 		ibtl_cm_hca_port_t	port;
2932 		sa_service_record_t	srv_rec;
2933 		ibmf_saa_handle_t	saa_handle;
2934 		ibt_status_t		status;
2935 
2936 		/* get the default SGID of the port */
2937 		if ((status = ibtl_cm_get_hca_port(sbindp->sbind_gid, 0, &port))
2938 		    != IBT_SUCCESS) {
2939 			IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2940 			    "ibtl_cm_get_hca_port failed: %d", status);
2941 			/* we're done, but there may be stale service records */
2942 			goto done;
2943 		}
2944 
2945 		saa_handle = ibcm_get_saa_handle(hcap, port.hp_port);
2946 		if (saa_handle == NULL) {
2947 			IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2948 			    "saa_handle is NULL");
2949 			/* we're done, but there may be stale service records */
2950 			goto done;
2951 		}
2952 
2953 		/* Fill in fields of srv_rec */
2954 		bzero(&srv_rec, sizeof (srv_rec));
2955 
2956 		srv_rec.ServiceP_Key = sbindp->sbind_pkey;
2957 		srv_rec.ServiceKey_hi = sbindp->sbind_key[0];
2958 		srv_rec.ServiceKey_lo = sbindp->sbind_key[1];
2959 		srv_rec.ServiceGID = sbindp->sbind_gid;
2960 		(void) strcpy((char *)srv_rec.ServiceName, sbindp->sbind_name);
2961 
2962 		while (sid <= end_sid) {
2963 
2964 			srv_rec.ServiceID = sid;
2965 			IBCM_DUMP_SERVICE_REC(&srv_rec);
2966 
2967 			rval = ibcm_write_service_record(saa_handle, &srv_rec,
2968 			    IBMF_SAA_DELETE);
2969 
2970 			IBTF_DPRINTF_L4(cmlog, "ibt_unbind_service: "
2971 			    "ibcm_write_service_record rval = %d, SID %llx",
2972 			    rval, sid);
2973 			if (rval != IBT_SUCCESS) {
2974 				/* this is not considered a reason to fail */
2975 				IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2976 				    "ibcm_write_service_record fails %d, "
2977 				    "sid %llx", rval, sid);
2978 			}
2979 			sid++;
2980 		}
2981 	}
2982 done:
2983 	ibcm_dec_hca_svc_cnt(hcap);
2984 	ibcm_dec_hca_acc_cnt(hcap);
2985 	kmem_free(sbindp, sizeof (*sbindp));
2986 
2987 	/* If this message isn't seen then ibt_unbind_service failed */
2988 	IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: done !!");
2989 
2990 	return (IBT_SUCCESS);
2991 }
2992 
2993 /*
2994  * Simply pull off each binding from the list and unbind it.
2995  * If any of the unbind calls fail, we fail.
2996  */
2997 ibt_status_t
2998 ibt_unbind_all_services(ibt_srv_hdl_t srv_hdl)
2999 {
3000 	ibt_status_t	status;
3001 	ibcm_svc_bind_t	*sbp;
3002 
3003 	mutex_enter(&ibcm_svc_info_lock);
3004 	sbp = NULL;
3005 
3006 	/* this compare keeps the loop from being infinite */
3007 	while (sbp != srv_hdl->svc_bind_list) {
3008 		sbp = srv_hdl->svc_bind_list;
3009 		mutex_exit(&ibcm_svc_info_lock);
3010 		status = ibt_unbind_service(srv_hdl, sbp);
3011 		if (status != IBT_SUCCESS)
3012 			return (status);
3013 		mutex_enter(&ibcm_svc_info_lock);
3014 		if (srv_hdl->svc_bind_list == NULL)
3015 			break;
3016 	}
3017 	mutex_exit(&ibcm_svc_info_lock);
3018 	return (IBT_SUCCESS);
3019 }
3020 
3021 /*
3022  * ibt_deregister_service()
3023  *	Deregister a service with the IBCM
3024  *
3025  * INPUTS:
3026  *	ibt_hdl		The IBT client handle returned to the client
3027  *			on an ibt_attach() call.
3028  *
3029  *	srv_hdl		The address of a service identification handle, used
3030  *			to de-register a service.
3031  * RETURN VALUES:
3032  *	IBT_SUCCESS	on success (or respective failure on error)
3033  */
3034 ibt_status_t
3035 ibt_deregister_service(ibt_clnt_hdl_t ibt_hdl, ibt_srv_hdl_t srv_hdl)
3036 {
3037 	ibcm_svc_info_t		*svcp;
3038 	ibcm_svc_lookup_t	svc;
3039 
3040 	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service(%p, %p)",
3041 	    ibt_hdl, srv_hdl);
3042 
3043 	mutex_enter(&ibcm_svc_info_lock);
3044 
3045 	if (srv_hdl->svc_bind_list != NULL) {
3046 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service:"
3047 		    " srv_hdl %p still has bindings", srv_hdl);
3048 		mutex_exit(&ibcm_svc_info_lock);
3049 		return (IBT_CM_SERVICE_BUSY);
3050 	}
3051 	svc.sid = srv_hdl->svc_id;
3052 	svc.num_sids = 1;
3053 	IBTF_DPRINTF_L3(cmlog, "ibt_deregister_service: SID 0x%llX, numsids %d",
3054 	    srv_hdl->svc_id, srv_hdl->svc_num_sids);
3055 
3056 #ifdef __lock_lint
3057 	ibcm_svc_compare(NULL, NULL);
3058 #endif
3059 	svcp = avl_find(&ibcm_svc_avl_tree, &svc, NULL);
3060 	if (svcp != srv_hdl) {
3061 		mutex_exit(&ibcm_svc_info_lock);
3062 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service(): "
3063 		    "srv_hdl %p not found", srv_hdl);
3064 		return (IBT_INVALID_PARAM);
3065 	}
3066 	avl_remove(&ibcm_svc_avl_tree, svcp);
3067 
3068 	/* wait for active REQ/SREQ handling to be done */
3069 	svcp->svc_to_delete = 1;
3070 	while (svcp->svc_ref_cnt != 0)
3071 		cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
3072 
3073 	mutex_exit(&ibcm_svc_info_lock);
3074 
3075 	if ((srv_hdl->svc_id & IB_SID_AGN_MASK) == IB_SID_AGN_LOCAL)
3076 		ibcm_free_local_sids(srv_hdl->svc_id, srv_hdl->svc_num_sids);
3077 
3078 	ibtl_cm_change_service_cnt(ibt_hdl, -srv_hdl->svc_num_sids);
3079 	kmem_free(srv_hdl, sizeof (*srv_hdl));
3080 
3081 	/* If this message isn't seen then ibt_deregister_service failed */
3082 	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service: done !!");
3083 
3084 	return (IBT_SUCCESS);
3085 }
3086 
3087 ibcm_status_t
3088 ibcm_ar_init(void)
3089 {
3090 	ib_svc_id_t	sid = IBCM_DAPL_ATS_SID;
3091 	ibcm_svc_info_t *tmp_svcp;
3092 
3093 	IBTF_DPRINTF_L3(cmlog, "ibcm_ar_init()");
3094 
3095 	/* remove this special SID from the pool of available SIDs */
3096 	if ((tmp_svcp = ibcm_create_svc_entry(sid, 1)) == NULL) {
3097 		IBTF_DPRINTF_L2(cmlog, "ibcm_ar_init: "
3098 		    "DAPL ATS SID 0x%llx already registered", (longlong_t)sid);
3099 		return (IBCM_FAILURE);
3100 	}
3101 	mutex_enter(&ibcm_svc_info_lock);
3102 	ibcm_ar_svcinfop = tmp_svcp;
3103 	ibcm_ar_list = NULL;	/* no address records registered yet */
3104 	mutex_exit(&ibcm_svc_info_lock);
3105 	return (IBCM_SUCCESS);
3106 }
3107 
3108 ibcm_status_t
3109 ibcm_ar_fini(void)
3110 {
3111 	ibcm_ar_t	*ar_list;
3112 	ibcm_svc_info_t	*tmp_svcp;
3113 
3114 	mutex_enter(&ibcm_svc_info_lock);
3115 	ar_list = ibcm_ar_list;
3116 
3117 	if (ar_list == NULL &&
3118 	    avl_numnodes(&ibcm_svc_avl_tree) == 1 &&
3119 	    avl_first(&ibcm_svc_avl_tree) == ibcm_ar_svcinfop) {
3120 		avl_remove(&ibcm_svc_avl_tree, ibcm_ar_svcinfop);
3121 		tmp_svcp = ibcm_ar_svcinfop;
3122 		mutex_exit(&ibcm_svc_info_lock);
3123 		kmem_free(tmp_svcp, sizeof (*ibcm_ar_svcinfop));
3124 		return (IBCM_SUCCESS);
3125 	}
3126 	mutex_exit(&ibcm_svc_info_lock);
3127 	return (IBCM_FAILURE);
3128 }
3129 
3130 
3131 /*
3132  * Return to the caller:
3133  *	IBT_SUCCESS		Found a perfect match.
3134  *				*arpp is set to the record.
3135  *	IBT_INCONSISTENT_AR	Found a record that's inconsistent.
3136  *	IBT_AR_NOT_REGISTERED	Found no record with same GID/pkey and
3137  *				found no record with same data.
3138  */
3139 static ibt_status_t
3140 ibcm_search_ar(ibt_ar_t *arp, ibcm_ar_t **arpp)
3141 {
3142 	ibcm_ar_t	*tmp;
3143 	int		i;
3144 
3145 	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
3146 	tmp = ibcm_ar_list;
3147 	while (tmp != NULL) {
3148 		if (tmp->ar.ar_gid.gid_prefix == arp->ar_gid.gid_prefix &&
3149 		    tmp->ar.ar_gid.gid_guid == arp->ar_gid.gid_guid &&
3150 		    tmp->ar.ar_pkey == arp->ar_pkey) {
3151 			for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++)
3152 				if (tmp->ar.ar_data[i] != arp->ar_data[i])
3153 					return (IBT_INCONSISTENT_AR);
3154 			*arpp = tmp;
3155 			return (IBT_SUCCESS);
3156 		} else {
3157 			/* if all the data bytes match, we have inconsistency */
3158 			for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++)
3159 				if (tmp->ar.ar_data[i] != arp->ar_data[i])
3160 					break;
3161 			if (i == IBCM_DAPL_ATS_NBYTES)
3162 				return (IBT_INCONSISTENT_AR);
3163 			/* try next address record */
3164 		}
3165 		tmp = tmp->ar_link;
3166 	}
3167 	return (IBT_AR_NOT_REGISTERED);
3168 }
3169 
3170 ibt_status_t
3171 ibt_register_ar(ibt_clnt_hdl_t ibt_hdl, ibt_ar_t *arp)
3172 {
3173 	ibcm_ar_t		*found;
3174 	ibcm_ar_t		*tmp;
3175 	ibt_status_t		status;
3176 	ibt_status_t		s1, s2;
3177 	char			*s;
3178 	ibcm_ar_ref_t		*hdlp;
3179 	ibcm_ar_t		*new;
3180 	ibcm_ar_t		**linkp;
3181 	ibtl_cm_hca_port_t	cm_port;
3182 	uint16_t		pkey_ix;
3183 	ibcm_hca_info_t		*hcap;
3184 	ibmf_saa_handle_t	saa_handle;
3185 	sa_service_record_t	*srv_recp;
3186 	uint64_t		gid_ored;
3187 
3188 	IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: PKey 0x%X GID %llX:%llX",
3189 	    arp->ar_pkey, (longlong_t)arp->ar_gid.gid_prefix,
3190 	    (longlong_t)arp->ar_gid.gid_guid);
3191 
3192 	/*
3193 	 * If P_Key is 0, but GID is not, this query is invalid.
3194 	 * If GID is 0, but P_Key is not, this query is invalid.
3195 	 */
3196 	gid_ored = arp->ar_gid.gid_guid | arp->ar_gid.gid_prefix;
3197 	if ((arp->ar_pkey == 0 && gid_ored != 0ULL) ||
3198 	    (arp->ar_pkey != 0 && gid_ored == 0ULL)) {
3199 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
3200 		    "GID/P_Key is not valid");
3201 		return (IBT_INVALID_PARAM);
3202 	}
3203 
3204 	/* assume success, so these might be needed */
3205 	hdlp = kmem_alloc(sizeof (*hdlp), KM_SLEEP);
3206 	new = kmem_zalloc(sizeof (*new), KM_SLEEP);
3207 
3208 	mutex_enter(&ibcm_svc_info_lock);
3209 	/* search for existing GID/pkey (there can be at most 1) */
3210 	status = ibcm_search_ar(arp, &found);
3211 	if (status == IBT_INCONSISTENT_AR) {
3212 		mutex_exit(&ibcm_svc_info_lock);
3213 		kmem_free(new, sizeof (*new));
3214 		kmem_free(hdlp, sizeof (*hdlp));
3215 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
3216 		    "address record is inconsistent with a known one");
3217 		return (IBT_INCONSISTENT_AR);
3218 	} else if (status == IBT_SUCCESS) {
3219 		if (found->ar_flags == IBCM_AR_INITING) {
3220 			found->ar_waiters++;
3221 			cv_wait(&found->ar_cv, &ibcm_svc_info_lock);
3222 			found->ar_waiters--;
3223 		}
3224 		if (found->ar_flags == IBCM_AR_FAILED) {
3225 			if (found->ar_waiters == 0) {
3226 				cv_destroy(&found->ar_cv);
3227 				kmem_free(found, sizeof (*found));
3228 			}
3229 			mutex_exit(&ibcm_svc_info_lock);
3230 			kmem_free(new, sizeof (*new));
3231 			kmem_free(hdlp, sizeof (*hdlp));
3232 			return (ibt_get_module_failure(IBT_FAILURE_IBCM, 0));
3233 		}
3234 		hdlp->ar_ibt_hdl = ibt_hdl;
3235 		hdlp->ar_ref_link = found->ar_ibt_hdl_list;
3236 		found->ar_ibt_hdl_list = hdlp;
3237 		mutex_exit(&ibcm_svc_info_lock);
3238 		kmem_free(new, sizeof (*new));
3239 		ibtl_cm_change_service_cnt(ibt_hdl, 1);
3240 		return (IBT_SUCCESS);
3241 	} else {
3242 		ASSERT(status == IBT_AR_NOT_REGISTERED);
3243 	}
3244 	hdlp->ar_ref_link = NULL;
3245 	hdlp->ar_ibt_hdl = ibt_hdl;
3246 	new->ar_ibt_hdl_list = hdlp;
3247 	new->ar = *arp;
3248 	new->ar_flags = IBCM_AR_INITING;
3249 	new->ar_waiters = 0;
3250 	cv_init(&new->ar_cv, NULL, CV_DEFAULT, NULL);
3251 	new->ar_link = ibcm_ar_list;
3252 	ibcm_ar_list = new;
3253 
3254 	/* verify GID/pkey is valid for a local port, etc. */
3255 	hcap = NULL;
3256 	if ((s1 = ibtl_cm_get_hca_port(arp->ar_gid, 0, &cm_port))
3257 	    != IBT_SUCCESS ||
3258 	    (s2 = ibt_pkey2index_byguid(cm_port.hp_hca_guid, cm_port.hp_port,
3259 	    arp->ar_pkey, &pkey_ix)) != IBT_SUCCESS ||
3260 	    (hcap = ibcm_find_hca_entry(cm_port.hp_hca_guid)) == NULL) {
3261 		cv_destroy(&new->ar_cv);
3262 		ibcm_ar_list = new->ar_link;
3263 		mutex_exit(&ibcm_svc_info_lock);
3264 		kmem_free(new, sizeof (*new));
3265 		kmem_free(hdlp, sizeof (*hdlp));
3266 		status = IBT_INVALID_PARAM;
3267 		if (s1 == IBT_HCA_PORT_NOT_ACTIVE) {
3268 			s = "PORT DOWN";
3269 			status = IBT_HCA_PORT_NOT_ACTIVE;
3270 		} else if (s1 != IBT_SUCCESS)
3271 			s = "GID not found";
3272 		else if (s2 != IBT_SUCCESS)
3273 			s = "PKEY not found";
3274 		else
3275 			s = "CM could not find its HCA entry";
3276 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: %s, status = %d",
3277 		    s, status);
3278 		return (status);
3279 	}
3280 	mutex_exit(&ibcm_svc_info_lock);
3281 	saa_handle = ibcm_get_saa_handle(hcap, cm_port.hp_port);
3282 
3283 	/* create service record */
3284 	srv_recp = kmem_zalloc(sizeof (*srv_recp), KM_SLEEP);
3285 	srv_recp->ServiceLease = 0xFFFFFFFF;	/* infinite */
3286 	srv_recp->ServiceP_Key = arp->ar_pkey;
3287 	srv_recp->ServiceKey_hi = 0xDA410000ULL;	/* DAPL */
3288 	srv_recp->ServiceKey_lo = 0xA7500000ULL;	/* ATS */
3289 	(void) strcpy((char *)srv_recp->ServiceName, IBCM_DAPL_ATS_NAME);
3290 	srv_recp->ServiceGID = arp->ar_gid;
3291 	bcopy(arp->ar_data, srv_recp->ServiceData, IBCM_DAPL_ATS_NBYTES);
3292 	srv_recp->ServiceID = IBCM_DAPL_ATS_SID;
3293 
3294 	/* insert service record into the SA */
3295 
3296 	IBCM_DUMP_SERVICE_REC(srv_recp);
3297 
3298 	if (saa_handle != NULL)
3299 		status = ibcm_write_service_record(saa_handle, srv_recp,
3300 		    IBMF_SAA_UPDATE);
3301 	else
3302 		status = IBT_HCA_PORT_NOT_ACTIVE;
3303 
3304 	if (status != IBT_SUCCESS) {
3305 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: sa access fails %d, "
3306 		    "sid %llX", status, (longlong_t)srv_recp->ServiceID);
3307 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: FAILED for gid "
3308 		    "%llX:%llX pkey 0x%X", (longlong_t)arp->ar_gid.gid_prefix,
3309 		    (longlong_t)arp->ar_gid.gid_guid, arp->ar_pkey);
3310 
3311 		kmem_free(srv_recp, sizeof (*srv_recp));
3312 		kmem_free(hdlp, sizeof (*hdlp));
3313 
3314 		mutex_enter(&ibcm_svc_info_lock);
3315 		linkp = &ibcm_ar_list;
3316 		tmp = *linkp;
3317 		while (tmp != NULL) {
3318 			if (tmp == new) {
3319 				*linkp = new->ar_link;
3320 				break;
3321 			}
3322 			linkp = &tmp->ar_link;
3323 			tmp = *linkp;
3324 		}
3325 		if (new->ar_waiters > 0) {
3326 			new->ar_flags = IBCM_AR_FAILED;
3327 			cv_broadcast(&new->ar_cv);
3328 			mutex_exit(&ibcm_svc_info_lock);
3329 		} else {
3330 			cv_destroy(&new->ar_cv);
3331 			mutex_exit(&ibcm_svc_info_lock);
3332 			kmem_free(new, sizeof (*new));
3333 		}
3334 		ibcm_dec_hca_acc_cnt(hcap);
3335 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
3336 		    "IBMF_SAA failed to write address record");
3337 	} else {					/* SUCCESS */
3338 		uint8_t		*b;
3339 
3340 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: SUCCESS for gid "
3341 		    "%llx:%llx pkey %x", (longlong_t)arp->ar_gid.gid_prefix,
3342 		    (longlong_t)arp->ar_gid.gid_guid, arp->ar_pkey);
3343 		b = arp->ar_data;
3344 
3345 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar:"
3346 		    " data %d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
3347 		    b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9],
3348 		    b[10], b[11], b[12], b[13], b[14], b[15]);
3349 		mutex_enter(&ibcm_svc_info_lock);
3350 		new->ar_srv_recp = srv_recp;
3351 		new->ar_saa_handle = saa_handle;
3352 		new->ar_port = cm_port.hp_port;
3353 		new->ar_hcap = hcap;
3354 		new->ar_flags = IBCM_AR_SUCCESS;
3355 		if (new->ar_waiters > 0)
3356 			cv_broadcast(&new->ar_cv);
3357 		mutex_exit(&ibcm_svc_info_lock);
3358 		ibtl_cm_change_service_cnt(ibt_hdl, 1);
3359 		/* do not call ibcm_dec_hca_acc_cnt(hcap) until deregister */
3360 	}
3361 	return (status);
3362 }
3363 
3364 ibt_status_t
3365 ibt_deregister_ar(ibt_clnt_hdl_t ibt_hdl, ibt_ar_t *arp)
3366 {
3367 	ibcm_ar_t		*found;
3368 	ibcm_ar_t		*tmp;
3369 	ibcm_ar_t		**linkp;
3370 	ibcm_ar_ref_t		*hdlp;
3371 	ibcm_ar_ref_t		**hdlpp;
3372 	ibt_status_t		status;
3373 	ibmf_saa_handle_t	saa_handle;
3374 	sa_service_record_t	*srv_recp;
3375 	uint64_t		gid_ored;
3376 
3377 	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: pkey %x", arp->ar_pkey);
3378 	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: gid %llx:%llx",
3379 	    (longlong_t)arp->ar_gid.gid_prefix,
3380 	    (longlong_t)arp->ar_gid.gid_guid);
3381 
3382 	/*
3383 	 * If P_Key is 0, but GID is not, this query is invalid.
3384 	 * If GID is 0, but P_Key is not, this query is invalid.
3385 	 */
3386 	gid_ored = arp->ar_gid.gid_guid | arp->ar_gid.gid_prefix;
3387 	if ((arp->ar_pkey == 0 && gid_ored != 0ULL) ||
3388 	    (arp->ar_pkey != 0 && gid_ored == 0ULL)) {
3389 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3390 		    "GID/P_Key is not valid");
3391 		return (IBT_INVALID_PARAM);
3392 	}
3393 
3394 	mutex_enter(&ibcm_svc_info_lock);
3395 	/* search for existing GID/pkey (there can be at most 1) */
3396 	status = ibcm_search_ar(arp, &found);
3397 	if (status == IBT_INCONSISTENT_AR || status == IBT_AR_NOT_REGISTERED) {
3398 		mutex_exit(&ibcm_svc_info_lock);
3399 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3400 		    "address record not found");
3401 		return (IBT_AR_NOT_REGISTERED);
3402 	}
3403 	ASSERT(status == IBT_SUCCESS);
3404 
3405 	hdlpp = &found->ar_ibt_hdl_list;
3406 	hdlp = *hdlpp;
3407 	while (hdlp != NULL) {
3408 		if (hdlp->ar_ibt_hdl == ibt_hdl)
3409 			break;
3410 		hdlpp = &hdlp->ar_ref_link;
3411 		hdlp = *hdlpp;
3412 	}
3413 	if (hdlp == NULL) {	/* could not find ibt_hdl on list */
3414 		mutex_exit(&ibcm_svc_info_lock);
3415 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3416 		    "address record found, but not for this client");
3417 		return (IBT_AR_NOT_REGISTERED);
3418 	}
3419 	*hdlpp = hdlp->ar_ref_link;	/* remove ref for this client */
3420 	if (found->ar_ibt_hdl_list == NULL && found->ar_waiters == 0) {
3421 		/* last entry was removed */
3422 		found->ar_flags = IBCM_AR_INITING; /* hold off register_ar */
3423 		saa_handle = found->ar_saa_handle;
3424 		srv_recp = found->ar_srv_recp;
3425 
3426 		/* wait if this service record is being rewritten */
3427 		while (found->ar_rewrite_state == IBCM_REWRITE_BUSY)
3428 			cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
3429 		mutex_exit(&ibcm_svc_info_lock);
3430 
3431 		/* remove service record */
3432 		status = ibcm_write_service_record(saa_handle, srv_recp,
3433 		    IBMF_SAA_DELETE);
3434 		if (status != IBT_SUCCESS)
3435 			IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3436 			    "IBMF_SAA failed to delete address record");
3437 		mutex_enter(&ibcm_svc_info_lock);
3438 		if (found->ar_waiters == 0) {	/* still no waiters */
3439 			linkp = &ibcm_ar_list;
3440 			tmp = *linkp;
3441 			while (tmp != found) {
3442 				linkp = &tmp->ar_link;
3443 				tmp = *linkp;
3444 			}
3445 			*linkp = tmp->ar_link;
3446 			ibcm_dec_hca_acc_cnt(found->ar_hcap);
3447 			kmem_free(srv_recp, sizeof (*srv_recp));
3448 			cv_destroy(&found->ar_cv);
3449 			kmem_free(found, sizeof (*found));
3450 		} else {
3451 			/* add service record back in for the waiters */
3452 			mutex_exit(&ibcm_svc_info_lock);
3453 			status = ibcm_write_service_record(saa_handle, srv_recp,
3454 			    IBMF_SAA_UPDATE);
3455 			mutex_enter(&ibcm_svc_info_lock);
3456 			if (status == IBT_SUCCESS)
3457 				found->ar_flags = IBCM_AR_SUCCESS;
3458 			else {
3459 				found->ar_flags = IBCM_AR_FAILED;
3460 				IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3461 				    "IBMF_SAA failed to write address record");
3462 			}
3463 			cv_broadcast(&found->ar_cv);
3464 		}
3465 	}
3466 	mutex_exit(&ibcm_svc_info_lock);
3467 	kmem_free(hdlp, sizeof (*hdlp));
3468 	ibtl_cm_change_service_cnt(ibt_hdl, -1);
3469 	return (status);
3470 }
3471 
3472 ibt_status_t
3473 ibt_query_ar(ib_gid_t *sgid, ibt_ar_t *queryp, ibt_ar_t *resultp)
3474 {
3475 	sa_service_record_t	svcrec_req;
3476 	sa_service_record_t	*svcrec_resp;
3477 	void			*results_p;
3478 	uint64_t		component_mask = 0;
3479 	uint64_t		gid_ored;
3480 	size_t			length;
3481 	int			num_rec;
3482 	int			i;
3483 	ibmf_saa_access_args_t	access_args;
3484 	ibt_status_t		retval;
3485 	ibtl_cm_hca_port_t	cm_port;
3486 	ibcm_hca_info_t		*hcap;
3487 	ibmf_saa_handle_t	saa_handle;
3488 
3489 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar(%p, %p)", queryp, resultp);
3490 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: sgid %llx:%llx",
3491 	    (longlong_t)sgid->gid_prefix, (longlong_t)sgid->gid_guid);
3492 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: query_pkey %x", queryp->ar_pkey);
3493 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: query_gid %llx:%llx",
3494 	    (longlong_t)queryp->ar_gid.gid_prefix,
3495 	    (longlong_t)queryp->ar_gid.gid_guid);
3496 
3497 	/*
3498 	 * If P_Key is 0, but GID is not, this query is invalid.
3499 	 * If GID is 0, but P_Key is not, this query is invalid.
3500 	 */
3501 	gid_ored = queryp->ar_gid.gid_guid | queryp->ar_gid.gid_prefix;
3502 	if ((queryp->ar_pkey == 0 && gid_ored != 0ULL) ||
3503 	    (queryp->ar_pkey != 0 && gid_ored == 0ULL)) {
3504 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: GID/P_Key is not valid");
3505 		return (IBT_INVALID_PARAM);
3506 	}
3507 
3508 	hcap = NULL;
3509 	if (ibtl_cm_get_hca_port(*sgid, 0, &cm_port) != IBT_SUCCESS ||
3510 	    (hcap = ibcm_find_hca_entry(cm_port.hp_hca_guid)) == NULL ||
3511 	    (saa_handle = ibcm_get_saa_handle(hcap, cm_port.hp_port)) == NULL) {
3512 		if (hcap != NULL)
3513 			ibcm_dec_hca_acc_cnt(hcap);
3514 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: sgid is not valid");
3515 		return (IBT_INVALID_PARAM);
3516 	}
3517 
3518 	bzero(&svcrec_req, sizeof (svcrec_req));
3519 
3520 	/* Is GID/P_Key Specified. */
3521 	if (queryp->ar_pkey != 0) {	/* GID is non-zero from check above */
3522 		svcrec_req.ServiceP_Key = queryp->ar_pkey;
3523 		component_mask |= SA_SR_COMPMASK_PKEY;
3524 		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: P_Key %X",
3525 		    queryp->ar_pkey);
3526 		svcrec_req.ServiceGID = queryp->ar_gid;
3527 		component_mask |= SA_SR_COMPMASK_GID;
3528 		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: GID %llX:%llX",
3529 		    (longlong_t)queryp->ar_gid.gid_prefix,
3530 		    (longlong_t)queryp->ar_gid.gid_guid);
3531 	}
3532 
3533 	/* Is ServiceData Specified. */
3534 	for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++) {
3535 		if (queryp->ar_data[i] != 0) {
3536 			bcopy(queryp->ar_data, svcrec_req.ServiceData,
3537 			    IBCM_DAPL_ATS_NBYTES);
3538 			component_mask |= 0xFFFF << 7;	/* all 16 Data8 */
3539 							/* components */
3540 			break;
3541 		}
3542 	}
3543 
3544 	/* Service Name */
3545 	(void) strcpy((char *)svcrec_req.ServiceName, IBCM_DAPL_ATS_NAME);
3546 	component_mask |= SA_SR_COMPMASK_NAME;
3547 
3548 	svcrec_req.ServiceID = IBCM_DAPL_ATS_SID;
3549 	component_mask |= SA_SR_COMPMASK_ID;
3550 
3551 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: "
3552 	    "Perform SA Access: Mask: 0x%X", component_mask);
3553 
3554 	/*
3555 	 * Call in SA Access retrieve routine to get Service Records.
3556 	 *
3557 	 * SA Access framework allocated memory for the "results_p".
3558 	 * Make sure to deallocate once we are done with the results_p.
3559 	 * The size of the buffer allocated will be as returned in
3560 	 * "length" field.
3561 	 */
3562 	access_args.sq_attr_id = SA_SERVICERECORD_ATTRID;
3563 	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
3564 	access_args.sq_component_mask = component_mask;
3565 	access_args.sq_template = &svcrec_req;
3566 	access_args.sq_template_length = sizeof (sa_service_record_t);
3567 	access_args.sq_callback = NULL;
3568 	access_args.sq_callback_arg = NULL;
3569 
3570 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
3571 	    &results_p);
3572 
3573 	ibcm_dec_hca_acc_cnt(hcap);
3574 	if (retval != IBT_SUCCESS) {
3575 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: SA Access Failed");
3576 		return (retval);
3577 	}
3578 
3579 	num_rec = length / sizeof (sa_service_record_t);
3580 
3581 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: "
3582 	    "Found %d Service Records.", num_rec);
3583 
3584 	/* Validate the returned number of records. */
3585 	if ((results_p != NULL) && (num_rec > 0)) {
3586 		uint8_t		*b;
3587 
3588 		/* Just return info from the first service record. */
3589 		svcrec_resp = (sa_service_record_t *)results_p;
3590 
3591 		/* The Service GID and Service ID */
3592 		resultp->ar_gid = svcrec_resp->ServiceGID;
3593 		resultp->ar_pkey = svcrec_resp->ServiceP_Key;
3594 		bcopy(svcrec_resp->ServiceData,
3595 		    resultp->ar_data, IBCM_DAPL_ATS_NBYTES);
3596 
3597 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: "
3598 		    "Found: pkey %x dgid %llX:%llX", resultp->ar_pkey,
3599 		    (longlong_t)resultp->ar_gid.gid_prefix,
3600 		    (longlong_t)resultp->ar_gid.gid_guid);
3601 		b = resultp->ar_data;
3602 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar:"
3603 		    " data %d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
3604 		    b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9],
3605 		    b[10], b[11], b[12], b[13], b[14], b[15]);
3606 
3607 		/* Deallocate the memory for results_p. */
3608 		kmem_free(results_p, length);
3609 		if (num_rec > 1)
3610 			retval = IBT_MULTIPLE_AR;
3611 		else
3612 			retval = IBT_SUCCESS;
3613 	} else {
3614 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: "
3615 		    "ibmf_sa_access found 0 matching records");
3616 		retval = IBT_AR_NOT_REGISTERED;
3617 	}
3618 	return (retval);
3619 }
3620 
3621 /* mark all ATS service records associated with the port */
3622 static void
3623 ibcm_mark_ar(ib_guid_t hca_guid, uint8_t port)
3624 {
3625 	ibcm_ar_t	*tmp;
3626 
3627 	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
3628 	for (tmp = ibcm_ar_list; tmp != NULL; tmp = tmp->ar_link) {
3629 		if (tmp->ar_hcap == NULL)
3630 			continue;
3631 		if (tmp->ar_hcap->hca_guid == hca_guid &&
3632 		    tmp->ar_port == port) {
3633 			/* even if it's busy, we mark it for rewrite */
3634 			tmp->ar_rewrite_state = IBCM_REWRITE_NEEDED;
3635 		}
3636 	}
3637 }
3638 
3639 /* rewrite all ATS service records */
3640 static int
3641 ibcm_rewrite_ar(void)
3642 {
3643 	ibcm_ar_t		*tmp;
3644 	ibmf_saa_handle_t	saa_handle;
3645 	sa_service_record_t	*srv_recp;
3646 	ibt_status_t		rval;
3647 	int			did_something = 0;
3648 
3649 	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
3650 check_for_work:
3651 	for (tmp = ibcm_ar_list; tmp != NULL; tmp = tmp->ar_link) {
3652 		if (tmp->ar_rewrite_state == IBCM_REWRITE_NEEDED) {
3653 			tmp->ar_rewrite_state = IBCM_REWRITE_BUSY;
3654 			saa_handle = tmp->ar_saa_handle;
3655 			srv_recp = tmp->ar_srv_recp;
3656 			mutex_exit(&ibcm_svc_info_lock);
3657 			IBTF_DPRINTF_L3(cmlog, "ibcm_rewrite_ar: "
3658 			    "rewriting ar @ %p", tmp);
3659 			did_something = 1;
3660 			rval = ibcm_write_service_record(saa_handle, srv_recp,
3661 			    IBMF_SAA_UPDATE);
3662 			if (rval != IBT_SUCCESS)
3663 				IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_ar: "
3664 				    "ibcm_write_service_record failed: "
3665 				    "status = %d", rval);
3666 			mutex_enter(&ibcm_svc_info_lock);
3667 			/* if it got marked again, then we want to rewrite */
3668 			if (tmp->ar_rewrite_state == IBCM_REWRITE_BUSY)
3669 				tmp->ar_rewrite_state = IBCM_REWRITE_IDLE;
3670 			/* in case there was a waiter... */
3671 			cv_broadcast(&ibcm_svc_info_cv);
3672 			goto check_for_work;
3673 		}
3674 	}
3675 	return (did_something);
3676 }
3677 
3678 static void
3679 ibcm_rewrite_svc_record(ibcm_svc_info_t *srv_hdl, ibcm_svc_bind_t *sbindp)
3680 {
3681 	ibcm_hca_info_t		*hcap;
3682 	ib_svc_id_t		sid, start_sid, end_sid;
3683 	ibmf_saa_handle_t	saa_handle;
3684 	sa_service_record_t	srv_rec;
3685 	ibt_status_t		rval;
3686 
3687 	hcap = ibcm_find_hca_entry(sbindp->sbind_hcaguid);
3688 	if (hcap == NULL) {
3689 		IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record: "
3690 		    "NO HCA found for HCA GUID %llX", sbindp->sbind_hcaguid);
3691 		return;
3692 	}
3693 
3694 	saa_handle = ibcm_get_saa_handle(hcap, sbindp->sbind_port);
3695 	if (saa_handle == NULL) {
3696 		IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record: "
3697 		    "saa_handle is NULL");
3698 		ibcm_dec_hca_acc_cnt(hcap);
3699 		return;
3700 	}
3701 
3702 	IBTF_DPRINTF_L3(cmlog, "ibcm_rewrite_svc_record: "
3703 	    "rewriting svc '%s', port_guid = %llX", sbindp->sbind_name,
3704 	    sbindp->sbind_gid.gid_guid);
3705 
3706 	bzero(&srv_rec, sizeof (srv_rec));
3707 
3708 	srv_rec.ServiceLease = sbindp->sbind_lease;
3709 	srv_rec.ServiceP_Key = sbindp->sbind_pkey;
3710 	srv_rec.ServiceKey_hi = sbindp->sbind_key[0];
3711 	srv_rec.ServiceKey_lo = sbindp->sbind_key[1];
3712 	(void) strcpy((char *)srv_rec.ServiceName, sbindp->sbind_name);
3713 	srv_rec.ServiceGID = sbindp->sbind_gid;
3714 
3715 	bcopy(sbindp->sbind_data, srv_rec.ServiceData, IB_SVC_DATA_LEN);
3716 
3717 	/* insert srv record into the SA */
3718 	start_sid = srv_hdl->svc_id;
3719 	end_sid = start_sid + srv_hdl->svc_num_sids - 1;
3720 	for (sid = start_sid; sid <= end_sid; sid++) {
3721 		srv_rec.ServiceID = sid;
3722 
3723 		rval = ibcm_write_service_record(saa_handle, &srv_rec,
3724 		    IBMF_SAA_UPDATE);
3725 
3726 		IBTF_DPRINTF_L4(cmlog, "ibcm_rewrite_svc_record: "
3727 		    "ibcm_write_service_record, SvcId = %llX, "
3728 		    "rval = %d", (longlong_t)sid, rval);
3729 		if (rval != IBT_SUCCESS) {
3730 			IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record:"
3731 			    " ibcm_write_service_record fails %d sid %llX",
3732 			    rval, (longlong_t)sid);
3733 		}
3734 	}
3735 	ibcm_dec_hca_acc_cnt(hcap);
3736 }
3737 
3738 /*
3739  * Task to mark all service records as needing to be rewritten to the SM/SA.
3740  * This task does not return until all of them have been rewritten.
3741  */
3742 void
3743 ibcm_service_record_rewrite_task(void *arg)
3744 {
3745 	ibcm_port_up_t	*pup = (ibcm_port_up_t *)arg;
3746 	ib_guid_t	hca_guid = pup->pup_hca_guid;
3747 	uint8_t		port = pup->pup_port;
3748 	ibcm_svc_info_t	*svcp;
3749 	ibcm_svc_bind_t	*sbp;
3750 	avl_tree_t	*avl_tree = &ibcm_svc_avl_tree;
3751 	static int	task_is_running = 0;
3752 
3753 	IBTF_DPRINTF_L2(cmlog, "ibcm_service_record_rewrite_task STARTED "
3754 	    "for hca_guid %llX, port %d", hca_guid, port);
3755 
3756 	mutex_enter(&ibcm_svc_info_lock);
3757 	ibcm_mark_ar(hca_guid, port);
3758 	for (svcp = avl_first(avl_tree); svcp != NULL;
3759 	    svcp = avl_walk(avl_tree, svcp, AVL_AFTER)) {
3760 		sbp = svcp->svc_bind_list;
3761 		while (sbp != NULL) {
3762 			if (sbp->sbind_pkey != 0 &&
3763 			    sbp->sbind_port == port &&
3764 			    sbp->sbind_hcaguid == hca_guid) {
3765 				/* even if it's busy, we mark it for rewrite */
3766 				sbp->sbind_rewrite_state = IBCM_REWRITE_NEEDED;
3767 			}
3768 			sbp = sbp->sbind_link;
3769 		}
3770 	}
3771 	if (task_is_running) {
3772 		/* let the other task thread finish the work */
3773 		mutex_exit(&ibcm_svc_info_lock);
3774 		return;
3775 	}
3776 	task_is_running = 1;
3777 
3778 	(void) ibcm_rewrite_ar();
3779 
3780 check_for_work:
3781 	for (svcp = avl_first(avl_tree); svcp != NULL;
3782 	    svcp = avl_walk(avl_tree, svcp, AVL_AFTER)) {
3783 		sbp = svcp->svc_bind_list;
3784 		while (sbp != NULL) {
3785 			if (sbp->sbind_rewrite_state == IBCM_REWRITE_NEEDED) {
3786 				sbp->sbind_rewrite_state = IBCM_REWRITE_BUSY;
3787 				mutex_exit(&ibcm_svc_info_lock);
3788 				ibcm_rewrite_svc_record(svcp, sbp);
3789 				mutex_enter(&ibcm_svc_info_lock);
3790 				/* if it got marked again, we want to rewrite */
3791 				if (sbp->sbind_rewrite_state ==
3792 				    IBCM_REWRITE_BUSY)
3793 					sbp->sbind_rewrite_state =
3794 					    IBCM_REWRITE_IDLE;
3795 				/* in case there was a waiter... */
3796 				cv_broadcast(&ibcm_svc_info_cv);
3797 				goto check_for_work;
3798 			}
3799 			sbp = sbp->sbind_link;
3800 		}
3801 	}
3802 	/*
3803 	 * If there were no service records to write, and we failed to
3804 	 * have to rewrite any more ATS service records, then we're done.
3805 	 */
3806 	if (ibcm_rewrite_ar() != 0)
3807 		goto check_for_work;
3808 	task_is_running = 0;
3809 	mutex_exit(&ibcm_svc_info_lock);
3810 
3811 	IBTF_DPRINTF_L2(cmlog, "ibcm_service_record_rewrite_task DONE");
3812 	kmem_free(pup, sizeof (ibcm_port_up_t));
3813 }
3814 
3815 
3816 /*
3817  * Function:
3818  * 	ibt_cm_proceed
3819  *
3820  * Verifies the arguments and dispatches the cm state machine processing
3821  * via taskq
3822  */
3823 
3824 ibt_status_t
3825 ibt_cm_proceed(ibt_cm_event_type_t event, void *session_id,
3826     ibt_cm_status_t status, ibt_cm_proceed_reply_t *cm_event_data,
3827     void *priv_data, ibt_priv_data_len_t priv_data_len)
3828 {
3829 	ibcm_state_data_t *statep = (ibcm_state_data_t *)session_id;
3830 	ibcm_proceed_targs_t	*proceed_targs;
3831 	ibcm_proceed_error_t	proceed_error;
3832 
3833 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_proceed chan 0x%p event %x status %x "
3834 	    "session_id %p", statep->channel, event, status, session_id);
3835 
3836 	IBTF_DPRINTF_L5(cmlog, "ibt_cm_proceed chan 0x%p cm_event_data %p, "
3837 	    "priv_data %p priv_data_len %x", statep->channel, cm_event_data,
3838 	    priv_data, priv_data_len);
3839 
3840 	/* validate session_id and status */
3841 	if ((statep == NULL) || (status == IBT_CM_DEFER)) {
3842 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : Invalid Args");
3843 		return (IBT_INVALID_PARAM);
3844 	}
3845 
3846 	/* If priv data len specified, then priv_data cannot be NULL */
3847 	if ((priv_data_len > 0) && (priv_data == NULL))
3848 		return (IBT_INVALID_PARAM);
3849 
3850 	proceed_error = IBCM_PROCEED_INVALID_NONE;
3851 
3852 	mutex_enter(&statep->state_mutex);
3853 	if (event == IBT_CM_EVENT_REQ_RCV) {
3854 
3855 		if ((statep->state != IBCM_STATE_REQ_RCVD) &&
3856 		    (statep->state != IBCM_STATE_MRA_SENT))
3857 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3858 		else if (priv_data_len > IBT_REP_PRIV_DATA_SZ)
3859 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3860 
3861 	} else if (event == IBT_CM_EVENT_REP_RCV) {
3862 		if ((statep->state != IBCM_STATE_REP_RCVD) &&
3863 		    (statep->state != IBCM_STATE_MRA_REP_SENT))
3864 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3865 		else if (priv_data_len > IBT_RTU_PRIV_DATA_SZ)
3866 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3867 	} else if (event == IBT_CM_EVENT_LAP_RCV) {
3868 		if ((statep->ap_state != IBCM_AP_STATE_LAP_RCVD) &&
3869 		    (statep->ap_state != IBCM_AP_STATE_MRA_LAP_SENT))
3870 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3871 		else if (priv_data_len > IBT_APR_PRIV_DATA_SZ)
3872 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3873 	} else if (event == IBT_CM_EVENT_CONN_CLOSED) {
3874 		if (statep->state != IBCM_STATE_DREQ_RCVD)
3875 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3876 		else if (priv_data_len > IBT_DREP_PRIV_DATA_SZ)
3877 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3878 	} else {
3879 			proceed_error = IBCM_PROCEED_INVALID_EVENT;
3880 	}
3881 
3882 	/* if there is an error, print an error message and return */
3883 	if (proceed_error != IBCM_PROCEED_INVALID_NONE) {
3884 		mutex_exit(&statep->state_mutex);
3885 		if (proceed_error == IBCM_PROCEED_INVALID_EVENT_STATE) {
3886 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3887 			    "Invalid Event/State combination specified",
3888 			    statep->channel);
3889 			return (IBT_INVALID_PARAM);
3890 		} else if (proceed_error == IBCM_PROCEED_INVALID_PRIV_SZ) {
3891 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3892 			    "Invalid Event/priv len combination specified",
3893 			    statep->channel);
3894 			return (IBT_INVALID_PARAM);
3895 		} else if (proceed_error == IBCM_PROCEED_INVALID_EVENT) {
3896 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3897 			    "Invalid Event specified", statep->channel);
3898 			return (IBT_INVALID_PARAM);
3899 		} else {
3900 			ASSERT(proceed_error == IBCM_PROCEED_INVALID_LAP);
3901 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3902 			    "IBT_CM_EVENT_LAP_RCV not supported",
3903 			    statep->channel);
3904 			/* UNTIL HCA DRIVER ENABLES AP SUPPORT, FAIL THE CALL */
3905 			return (IBT_APM_NOT_SUPPORTED);
3906 		}
3907 	}
3908 
3909 
3910 	/* wait until client's CM handler returns DEFER status back to CM */
3911 
3912 	while (statep->clnt_proceed == IBCM_BLOCK) {
3913 		IBTF_DPRINTF_L5(cmlog, "ibt_cm_proceed : chan 0x%p blocked for "
3914 		    "return of client's cm handler", statep->channel);
3915 		cv_wait(&statep->block_client_cv, &statep->state_mutex);
3916 	}
3917 
3918 	if (statep->clnt_proceed == IBCM_FAIL) {
3919 		mutex_exit(&statep->state_mutex);
3920 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p Failed as "
3921 		    "client returned non-DEFER status from cm handler",
3922 		    statep->channel);
3923 		return (IBT_CHAN_STATE_INVALID);
3924 	}
3925 
3926 	ASSERT(statep->clnt_proceed == IBCM_UNBLOCK);
3927 	statep->clnt_proceed = IBCM_FAIL;
3928 	mutex_exit(&statep->state_mutex);
3929 
3930 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*proceed_targs))
3931 
3932 	/* the state machine processing is done in a separate thread */
3933 
3934 	/* proceed_targs is freed in ibcm_proceed_via_taskq */
3935 	proceed_targs = kmem_alloc(sizeof (ibcm_proceed_targs_t),
3936 	    KM_SLEEP);
3937 
3938 	proceed_targs->event  = event;
3939 	proceed_targs->status = status;
3940 	proceed_targs->priv_data_len = priv_data_len;
3941 
3942 	bcopy(priv_data, proceed_targs->priv_data, priv_data_len);
3943 
3944 	proceed_targs->tst.rc.statep = statep;
3945 	bcopy(cm_event_data, &proceed_targs->tst.rc.rc_cm_event_data,
3946 	    sizeof (ibt_cm_proceed_reply_t));
3947 
3948 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*proceed_targs))
3949 
3950 	(void) taskq_dispatch(ibcm_taskq, ibcm_proceed_via_taskq,
3951 	    proceed_targs, TQ_SLEEP);
3952 
3953 	return (IBT_SUCCESS);
3954 }
3955 
3956 /*
3957  * Function:
3958  * 	ibcm_proceed_via_taskq
3959  *
3960  * Called from taskq, dispatched by ibt_cm_proceed
3961  * Completes the cm state processing for ibt_cm_proceed
3962  */
3963 void
3964 ibcm_proceed_via_taskq(void *targs)
3965 {
3966 	ibcm_proceed_targs_t	*proceed_targs = (ibcm_proceed_targs_t *)targs;
3967 	ibcm_state_data_t *statep = proceed_targs->tst.rc.statep;
3968 	ibt_cm_reason_t reject_reason;
3969 	uint8_t arej_len;
3970 	ibcm_status_t response;
3971 	ibcm_clnt_reply_info_t clnt_info;
3972 
3973 	clnt_info.reply_event = &proceed_targs->tst.rc.rc_cm_event_data;
3974 	clnt_info.priv_data = proceed_targs->priv_data;
3975 	clnt_info.priv_data_len = proceed_targs->priv_data_len;
3976 
3977 	IBTF_DPRINTF_L4(cmlog, "ibcm_proceed_via_taskq chan 0x%p targs %x",
3978 	    statep->channel, targs);
3979 
3980 	if (proceed_targs->event == IBT_CM_EVENT_REQ_RCV) {
3981 		response =
3982 		    ibcm_process_cep_req_cm_hdlr(statep, proceed_targs->status,
3983 		    &clnt_info, &reject_reason, &arej_len,
3984 		    (ibcm_req_msg_t *)statep->defer_cm_msg);
3985 
3986 		ibcm_handle_cep_req_response(statep, response, reject_reason,
3987 		    arej_len);
3988 
3989 	} else if (proceed_targs->event == IBT_CM_EVENT_REP_RCV) {
3990 		response =
3991 		    ibcm_process_cep_rep_cm_hdlr(statep, proceed_targs->status,
3992 		    &clnt_info, &reject_reason, &arej_len,
3993 		    (ibcm_rep_msg_t *)statep->defer_cm_msg);
3994 
3995 		ibcm_handle_cep_rep_response(statep, response, reject_reason,
3996 		    arej_len, (ibcm_rep_msg_t *)statep->defer_cm_msg);
3997 
3998 	} else if (proceed_targs->event == IBT_CM_EVENT_LAP_RCV) {
3999 		ibcm_process_cep_lap_cm_hdlr(statep, proceed_targs->status,
4000 		    &clnt_info, (ibcm_lap_msg_t *)statep->defer_cm_msg,
4001 		    (ibcm_apr_msg_t *)IBCM_OUT_MSGP(statep->lapr_msg));
4002 
4003 		ibcm_post_apr_mad(statep);
4004 
4005 	} else {
4006 		ASSERT(proceed_targs->event == IBT_CM_EVENT_CONN_CLOSED);
4007 		ibcm_handle_cep_dreq_response(statep, proceed_targs->priv_data,
4008 		    proceed_targs->priv_data_len);
4009 	}
4010 
4011 	kmem_free(targs, sizeof (ibcm_proceed_targs_t));
4012 }
4013 
4014 /*
4015  * Function:
4016  * 	ibt_cm_ud_proceed
4017  *
4018  * Verifies the arguments and dispatches the cm state machine processing
4019  * via taskq
4020  */
4021 ibt_status_t
4022 ibt_cm_ud_proceed(void *session_id, ibt_channel_hdl_t ud_channel,
4023     ibt_cm_status_t status, ibt_redirect_info_t *redirect_infop,
4024     void *priv_data, ibt_priv_data_len_t priv_data_len)
4025 {
4026 	ibcm_ud_state_data_t *ud_statep = (ibcm_ud_state_data_t *)session_id;
4027 	ibcm_proceed_targs_t	*proceed_targs;
4028 	ibt_qp_query_attr_t	qp_attr;
4029 	ibt_status_t		retval;
4030 
4031 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_ud_proceed session_id %p "
4032 	    "ud_channel %p ", session_id, ud_channel);
4033 
4034 	IBTF_DPRINTF_L4(cmlog, "ibt_cm_ud_proceed status %x priv_data %p "
4035 	    "priv_data_len %x",  status, priv_data, priv_data_len);
4036 
4037 	/* validate session_id and status */
4038 	if ((ud_statep == NULL) || (status == IBT_CM_DEFER)) {
4039 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Invalid Args");
4040 		return (IBT_INVALID_PARAM);
4041 	}
4042 
4043 	/* If priv data len specified, then priv_data cannot be NULL */
4044 	if ((priv_data_len > 0) && (priv_data == NULL))
4045 		return (IBT_INVALID_PARAM);
4046 
4047 	if (priv_data_len > IBT_SIDR_REP_PRIV_DATA_SZ)
4048 		return (IBT_INVALID_PARAM);
4049 
4050 	/* retrieve qpn and qkey from ud channel */
4051 
4052 	/* validate event and statep's state */
4053 
4054 	if (status == IBT_CM_ACCEPT) {
4055 		retval = ibt_query_qp(ud_channel, &qp_attr);
4056 		if ((retval != IBT_SUCCESS) ||
4057 		    (qp_attr.qp_info.qp_trans != IBT_UD_SRV)) {
4058 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed: "
4059 			    "Failed to retrieve QPN from the channel: %d",
4060 			    retval);
4061 			return (IBT_INVALID_PARAM);
4062 		}
4063 	}
4064 
4065 
4066 	mutex_enter(&ud_statep->ud_state_mutex);
4067 
4068 	if (ud_statep->ud_state != IBCM_STATE_SIDR_REQ_RCVD) {
4069 		mutex_exit(&ud_statep->ud_state_mutex);
4070 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Invalid State "
4071 		    "specified");
4072 		return (IBT_INVALID_PARAM);
4073 	}
4074 
4075 	/* wait until client's CM handler returns DEFER status back to CM */
4076 
4077 	while (ud_statep->ud_clnt_proceed == IBCM_BLOCK) {
4078 		IBTF_DPRINTF_L5(cmlog, "ibt_cm_ud_proceed : Blocked for return"
4079 		    " of client's ud cm handler");
4080 		cv_wait(&ud_statep->ud_block_client_cv,
4081 		    &ud_statep->ud_state_mutex);
4082 	}
4083 
4084 	if (ud_statep->ud_clnt_proceed == IBCM_FAIL) {
4085 		mutex_exit(&ud_statep->ud_state_mutex);
4086 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Failed as client "
4087 		    "returned non-DEFER status from cm handler");
4088 		return (IBT_INVALID_PARAM);
4089 	}
4090 
4091 	ASSERT(ud_statep->ud_clnt_proceed == IBCM_UNBLOCK);
4092 	ud_statep->ud_clnt_proceed = IBCM_FAIL;
4093 	mutex_exit(&ud_statep->ud_state_mutex);
4094 
4095 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*proceed_targs))
4096 
4097 	/* the state machine processing is done in a separate thread */
4098 
4099 	/* proceed_targs is freed in ibcm_proceed_via_taskq */
4100 	proceed_targs = kmem_zalloc(sizeof (ibcm_proceed_targs_t),
4101 	    KM_SLEEP);
4102 
4103 	proceed_targs->status = status;
4104 	proceed_targs->priv_data_len = priv_data_len;
4105 
4106 	bcopy(priv_data, proceed_targs->priv_data, priv_data_len);
4107 
4108 	if (status == IBT_CM_ACCEPT) {
4109 		proceed_targs->tst.ud.ud_qkey =
4110 		    qp_attr.qp_info.qp_transport.ud.ud_qkey;
4111 		proceed_targs->tst.ud.ud_qpn = qp_attr.qp_qpn;
4112 	}
4113 
4114 	proceed_targs->tst.ud.ud_statep = ud_statep;
4115 
4116 	/* copy redirect info based on status */
4117 	if (status == IBT_CM_REDIRECT)
4118 		bcopy(redirect_infop, &proceed_targs->tst.ud.ud_redirect_info,
4119 		    sizeof (ibt_redirect_info_t));
4120 
4121 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*proceed_targs))
4122 
4123 	(void) taskq_dispatch(ibcm_taskq, ibcm_ud_proceed_via_taskq,
4124 	    proceed_targs, TQ_SLEEP);
4125 
4126 	return (IBT_SUCCESS);
4127 }
4128 
4129 /*
4130  * Function:
4131  * 	ibcm_ud_proceed_via_taskq
4132  *
4133  * Called from taskq, dispatched by ibt_cm_ud_proceed
4134  * Completes the cm state processing for ibt_cm_ud_proceed
4135  */
4136 void
4137 ibcm_ud_proceed_via_taskq(void *targs)
4138 {
4139 	ibcm_proceed_targs_t	*proceed_targs = (ibcm_proceed_targs_t *)targs;
4140 	ibcm_ud_state_data_t	*ud_statep = proceed_targs->tst.ud.ud_statep;
4141 	ibcm_ud_clnt_reply_info_t ud_clnt_info;
4142 	ibt_sidr_status_t	sidr_status;
4143 
4144 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_proceed_via_taskq(%p)", targs);
4145 
4146 	ud_clnt_info.ud_qpn  = proceed_targs->tst.ud.ud_qpn;
4147 	ud_clnt_info.ud_qkey  = proceed_targs->tst.ud.ud_qkey;
4148 	ud_clnt_info.priv_data = proceed_targs->priv_data;
4149 	ud_clnt_info.priv_data_len = proceed_targs->priv_data_len;
4150 	ud_clnt_info.redirect_infop = &proceed_targs->tst.ud.ud_redirect_info;
4151 
4152 	/* validate event and statep's state */
4153 	ibcm_process_sidr_req_cm_hdlr(ud_statep, proceed_targs->status,
4154 	    &ud_clnt_info, &sidr_status,
4155 	    (ibcm_sidr_rep_msg_t *)IBCM_OUT_MSGP(ud_statep->ud_stored_msg));
4156 
4157 	ibcm_post_sidr_rep_mad(ud_statep, sidr_status);
4158 
4159 	/* decr the statep ref cnt incremented in ibcm_process_sidr_req_msg */
4160 	mutex_enter(&ud_statep->ud_state_mutex);
4161 	IBCM_UD_REF_CNT_DECR(ud_statep);
4162 	mutex_exit(&ud_statep->ud_state_mutex);
4163 
4164 	kmem_free(targs, sizeof (ibcm_proceed_targs_t));
4165 }
4166 
4167 /*
4168  * Function:
4169  *	ibt_set_alt_path
4170  * Input:
4171  *	channel		Channel handle returned from ibt_alloc_rc_channel(9F).
4172  *
4173  *	mode		Execute in blocking or non blocking mode.
4174  *
4175  *	alt_path	A pointer to an ibt_alt_path_info_t as returned from an
4176  *			ibt_get_alt_path(9F) call that specifies the new
4177  *			alternate path.
4178  *
4179  *	priv_data       A pointer to a buffer specified by caller for the
4180  *			private data in the outgoing CM Load Alternate Path
4181  *			(LAP) message sent to the remote host. This can be NULL
4182  *			if no private data is available to communicate to the
4183  *			remote node.
4184  *
4185  *	priv_data_len   Length of valid data in priv_data, this should be less
4186  *			than or equal to IBT_LAP_PRIV_DATA_SZ.
4187  *
4188  * Output:
4189  *	ret_args	If called in blocking mode, points to a return argument
4190  *			structure of type ibt_ap_returns_t.
4191  *
4192  * Returns:
4193  *	IBT_SUCCESS on Success else appropriate error.
4194  * Description:
4195  *	Load the specified alternate path. Causes the CM to send an LAP message
4196  *	to the remote node.
4197  *	Can only be called on a previously opened RC channel.
4198  */
4199 ibt_status_t
4200 ibt_set_alt_path(ibt_channel_hdl_t channel, ibt_execution_mode_t mode,
4201     ibt_alt_path_info_t *alt_path, void *priv_data,
4202     ibt_priv_data_len_t priv_data_len, ibt_ap_returns_t *ret_args)
4203 {
4204 	ibmf_handle_t		ibmf_hdl;
4205 	ibt_status_t		status = IBT_SUCCESS;
4206 	ibcm_lap_msg_t		*lap_msgp;
4207 	ibcm_hca_info_t		*hcap;
4208 	ibcm_state_data_t	*statep;
4209 	uint8_t			port_no;
4210 	ib_lid_t		alternate_slid;
4211 	ibt_priv_data_len_t	len;
4212 	ib_lid_t		base_lid;
4213 	boolean_t		alt_grh;
4214 
4215 	IBTF_DPRINTF_L3(cmlog, "ibt_set_alt_path(%p, %x, %p, %p, %x, %p)",
4216 	    channel, mode, alt_path, priv_data, priv_data_len, ret_args);
4217 
4218 	/* validate channel */
4219 	if (IBCM_INVALID_CHANNEL(channel)) {
4220 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: invalid channel");
4221 		return (IBT_CHAN_HDL_INVALID);
4222 	}
4223 
4224 	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
4225 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4226 		    "Invalid Channel type: Applicable only to RC Channel");
4227 		return (IBT_CHAN_SRV_TYPE_INVALID);
4228 	}
4229 
4230 	if (mode == IBT_NONBLOCKING) {
4231 		if (ret_args != NULL) {
4232 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4233 			    "ret_args should be NULL when called in "
4234 			    "non-blocking mode");
4235 			return (IBT_INVALID_PARAM);
4236 		}
4237 	} else if (mode == IBT_BLOCKING) {
4238 		if (ret_args == NULL) {
4239 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4240 			    "ret_args should be Non-NULL when called in "
4241 			    "blocking mode");
4242 			return (IBT_INVALID_PARAM);
4243 		}
4244 		if (ret_args->ap_priv_data_len > IBT_APR_PRIV_DATA_SZ) {
4245 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4246 			    "expected private data length is too large");
4247 			return (IBT_INVALID_PARAM);
4248 		}
4249 		if ((ret_args->ap_priv_data_len > 0) &&
4250 		    (ret_args->ap_priv_data == NULL)) {
4251 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4252 			    "apr_priv_data_len > 0, but apr_priv_data NULL");
4253 			return (IBT_INVALID_PARAM);
4254 		}
4255 	} else { /* any other mode is not valid for ibt_set_alt_path */
4256 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4257 		    "invalid mode %x specified", mode);
4258 		return (IBT_INVALID_PARAM);
4259 	}
4260 
4261 	if ((port_no = alt_path->ap_alt_cep_path.cep_hca_port_num) == 0)
4262 		return (IBT_INVALID_PARAM);
4263 
4264 	/* get the statep */
4265 	IBCM_GET_CHAN_PRIVATE(channel, statep);
4266 	if (statep == NULL) {
4267 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: statep NULL");
4268 		return (IBT_CM_FAILURE);
4269 	}
4270 
4271 	mutex_enter(&statep->state_mutex);
4272 	IBCM_RELEASE_CHAN_PRIVATE(channel);
4273 	IBCM_REF_CNT_INCR(statep);
4274 	mutex_exit(&statep->state_mutex);
4275 
4276 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: statep %p", statep);
4277 
4278 	hcap = statep->hcap;
4279 
4280 	/* HCA must have been in active state. If not, it's a client bug */
4281 	if (!IBCM_ACCESS_HCA_OK(hcap))
4282 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: hca in error state");
4283 
4284 	ASSERT(statep->cm_handler != NULL);
4285 
4286 	/* Check Alternate port */
4287 	status = ibt_get_port_state_byguid(hcap->hca_guid, port_no, NULL,
4288 	    &base_lid);
4289 	if (status != IBT_SUCCESS) {
4290 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4291 		    "ibt_get_port_state_byguid status %d ", status);
4292 		mutex_enter(&statep->state_mutex);
4293 		IBCM_REF_CNT_DECR(statep);
4294 		mutex_exit(&statep->state_mutex);
4295 		return (status);
4296 	}
4297 
4298 	if ((hcap->hca_port_info[port_no - 1].port_ibmf_hdl == NULL) &&
4299 	    ((status = ibcm_hca_reinit_port(hcap, port_no - 1))
4300 	    != IBT_SUCCESS)) {
4301 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4302 		    "ibmf reg or callback setup failed during re-initialize");
4303 		mutex_enter(&statep->state_mutex);
4304 		IBCM_REF_CNT_DECR(statep);
4305 		mutex_exit(&statep->state_mutex);
4306 		return (status);
4307 	}
4308 
4309 	ibmf_hdl = statep->stored_reply_addr.ibmf_hdl;
4310 
4311 	alternate_slid = base_lid +
4312 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_src_path;
4313 
4314 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: alternate SLID = %x",
4315 	    h2b16(alternate_slid));
4316 
4317 	ibcm_lapr_enter();	/* limit how many run simultaneously */
4318 
4319 	/* Allocate MAD for LAP */
4320 	if (statep->lapr_msg == NULL)
4321 		if ((status = ibcm_alloc_out_msg(ibmf_hdl, &statep->lapr_msg,
4322 		    MAD_METHOD_SEND)) != IBT_SUCCESS) {
4323 			ibcm_lapr_exit();
4324 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4325 			    "chan 0x%p ibcm_alloc_out_msg failed", channel);
4326 			mutex_enter(&statep->state_mutex);
4327 			IBCM_REF_CNT_DECR(statep);
4328 			mutex_exit(&statep->state_mutex);
4329 			return (status);
4330 		}
4331 
4332 	mutex_enter(&statep->state_mutex);
4333 
4334 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: connection state is"
4335 	    " %x", statep->state);
4336 
4337 	/* Check state */
4338 	if ((statep->state != IBCM_STATE_ESTABLISHED) ||
4339 	    (statep->ap_state != IBCM_AP_STATE_IDLE)) {
4340 		IBCM_REF_CNT_DECR(statep);
4341 		mutex_exit(&statep->state_mutex);
4342 		(void) ibcm_free_out_msg(ibmf_hdl, &statep->lapr_msg);
4343 		ibcm_lapr_exit();
4344 		return (IBT_CHAN_STATE_INVALID);
4345 	} else {
4346 		/* Set to LAP Sent state */
4347 		statep->ap_state = IBCM_AP_STATE_LAP_SENT;
4348 		statep->ap_done = B_FALSE;
4349 		statep->remaining_retry_cnt = statep->max_cm_retries;
4350 		statep->timer_stored_state = statep->state;
4351 		statep->timer_stored_ap_state = statep->ap_state;
4352 		IBCM_REF_CNT_INCR(statep); /* for ibcm_post_lap_complete */
4353 	}
4354 
4355 	mutex_exit(&statep->state_mutex);
4356 
4357 	/* No more failure returns below */
4358 
4359 	/* Allocate MAD for LAP */
4360 	IBTF_DPRINTF_L5(cmlog, "ibt_set_alt_path:"
4361 	    " statep's mad addr = 0x%p", IBCM_OUT_HDRP(statep->lapr_msg));
4362 
4363 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*lap_msgp))
4364 
4365 	lap_msgp = (ibcm_lap_msg_t *)IBCM_OUT_MSGP(statep->lapr_msg);
4366 
4367 	lap_msgp->lap_alt_l_port_lid = h2b16(alternate_slid);
4368 	lap_msgp->lap_alt_r_port_lid =
4369 	    h2b16(alt_path->ap_alt_cep_path.cep_adds_vect.av_dlid);
4370 
4371 	/* Fill in remote port gid */
4372 	lap_msgp->lap_alt_r_port_gid.gid_prefix =
4373 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_dgid.gid_prefix);
4374 	lap_msgp->lap_alt_r_port_gid.gid_guid =
4375 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_dgid.gid_guid);
4376 
4377 	/* Fill in local port gid */
4378 	lap_msgp->lap_alt_l_port_gid.gid_prefix =
4379 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_sgid.gid_prefix);
4380 	lap_msgp->lap_alt_l_port_gid.gid_guid =
4381 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_sgid.gid_guid);
4382 
4383 	alt_grh = alt_path->ap_alt_cep_path.cep_adds_vect.av_send_grh;
4384 
4385 	/* alternate_flow_label, and alternate srate, alternate traffic class */
4386 	lap_msgp->lap_alt_srate_plus =
4387 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_srate & 0x3f;
4388 	lap_msgp->lap_alt_flow_label_plus = h2b32(((alt_grh == B_TRUE) ?
4389 	    (alt_path->ap_alt_cep_path.cep_adds_vect.av_flow << 12) : 0) |
4390 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_tclass);
4391 
4392 	/* Alternate hop limit, service level */
4393 	lap_msgp->lap_alt_hop_limit = (alt_grh == B_TRUE) ?
4394 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_hop : 1;
4395 	lap_msgp->lap_alt_sl_plus =
4396 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_srvl << 4 |
4397 	    ((alt_grh == B_FALSE) ? 0x8 : 0);
4398 
4399 	lap_msgp->lap_alt_local_acktime_plus = ibt_usec2ib(
4400 	    (2 * statep->rc_alt_pkt_lt) +
4401 	    ibt_ib2usec(hcap->hca_ack_delay)) << 3;
4402 
4403 	lap_msgp->lap_local_comm_id = h2b32(statep->local_comid);
4404 	lap_msgp->lap_remote_comm_id = h2b32(statep->remote_comid);
4405 
4406 	lap_msgp->lap_remote_qpn_eecn_plus =
4407 	    h2b32((statep->remote_qpn << 8) |
4408 	    ibt_usec2ib(ibcm_remote_response_time) << 3);
4409 
4410 	len = min(priv_data_len, IBT_LAP_PRIV_DATA_SZ);
4411 	if ((len > 0) && priv_data) {
4412 		bcopy(priv_data, lap_msgp->lap_private_data, len);
4413 	}
4414 
4415 	/* only rc_alt_pkt_lt and ap_return_data fields are initialized */
4416 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
4417 
4418 	statep->rc_alt_pkt_lt = ibt_ib2usec(alt_path->ap_alt_pkt_lt);
4419 
4420 	/* return_data is filled up in the state machine code */
4421 	statep->ap_return_data = ret_args;
4422 
4423 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*statep))
4424 
4425 	IBCM_OUT_HDRP(statep->lapr_msg)->AttributeID =
4426 	    h2b16(IBCM_INCOMING_LAP + IBCM_ATTR_BASE_ID);
4427 
4428 	IBCM_OUT_HDRP(statep->lapr_msg)->TransactionID =
4429 	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_LAP, statep->local_comid,
4430 	    0));
4431 	IBTF_DPRINTF_L3(cmlog, "ibt_set_alt_path: statep %p, tid %llx",
4432 	    statep, IBCM_OUT_HDRP(statep->lapr_msg)->TransactionID);
4433 
4434 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*lap_msgp))
4435 
4436 	/* Send LAP */
4437 	ibcm_post_rc_mad(statep, statep->lapr_msg, ibcm_post_lap_complete,
4438 	    statep);
4439 
4440 	mutex_enter(&statep->state_mutex);
4441 
4442 	if (mode == IBT_BLOCKING) {
4443 		IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: blocking");
4444 
4445 		/* wait for APR */
4446 		while (statep->ap_done != B_TRUE) {
4447 			cv_wait(&statep->block_client_cv,
4448 			    &statep->state_mutex);
4449 		}
4450 
4451 		IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: done blocking");
4452 
4453 		/*
4454 		 * In the case that ibt_set_alt_path fails,
4455 		 * change retval to IBT_CM_FAILURE
4456 		 */
4457 		if (statep->ap_return_data->ap_status != IBT_CM_AP_LOADED)
4458 			status = IBT_CM_FAILURE;
4459 
4460 	}
4461 
4462 	/* decrement the ref-count before leaving here */
4463 	IBCM_REF_CNT_DECR(statep);
4464 
4465 	mutex_exit(&statep->state_mutex);
4466 
4467 	ibcm_lapr_exit();
4468 
4469 	/* If this message isn't seen then ibt_set_alt_path failed */
4470 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: done");
4471 
4472 	return (status);
4473 }
4474 
4475 
4476 #ifdef DEBUG
4477 
4478 /*
4479  * ibcm_query_classport_info:
4480  *	Query classportinfo
4481  *
4482  * INPUTS:
4483  *	channel		- Channel that is associated with a statep
4484  *
4485  * RETURN VALUE: NONE
4486  * This function is currently used to generate a valid get method classport
4487  * info, and test CM functionality. There is no ibtl client interface to
4488  * generate a classportinfo. It is possible that CM may use classportinfo
4489  * from other nodes in the future, and most of the code below could be re-used.
4490  */
4491 void
4492 ibcm_query_classport_info(ibt_channel_hdl_t channel)
4493 {
4494 	ibcm_state_data_t	*statep;
4495 	ibmf_msg_t		*msgp;
4496 
4497 	IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info(%p)", channel);
4498 
4499 	/* validate channel, first */
4500 	if (IBCM_INVALID_CHANNEL(channel)) {
4501 		IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info: "
4502 		    "invalid channel (%p)", channel);
4503 		return;
4504 	}
4505 
4506 	/* get the statep */
4507 	IBCM_GET_CHAN_PRIVATE(channel, statep);
4508 
4509 	/*
4510 	 * This can happen, if the statep is already gone by a DREQ from
4511 	 * the remote side
4512 	 */
4513 	if (statep == NULL) {
4514 		IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info: "
4515 		    "statep NULL");
4516 		return;
4517 	}
4518 
4519 	mutex_enter(&statep->state_mutex);
4520 	IBCM_RELEASE_CHAN_PRIVATE(channel);
4521 	IBCM_REF_CNT_INCR(statep);
4522 	mutex_exit(&statep->state_mutex);
4523 
4524 	/* Debug/test code, so don't care about return status */
4525 	(void) ibcm_alloc_out_msg(statep->stored_reply_addr.ibmf_hdl, &msgp,
4526 	    MAD_METHOD_GET);
4527 
4528 	IBCM_OUT_HDRP(msgp)->TransactionID = h2b64(ibcm_generate_tranid(
4529 	    MAD_ATTR_ID_CLASSPORTINFO, statep->local_comid, 0));
4530 	IBCM_OUT_HDRP(msgp)->AttributeID = h2b16(MAD_ATTR_ID_CLASSPORTINFO);
4531 
4532 	(void) ibcm_post_mad(msgp, &statep->stored_reply_addr, NULL, NULL);
4533 
4534 	IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info(%p) "
4535 	    "Get method MAD posted ", channel);
4536 
4537 	(void) ibcm_free_out_msg(statep->stored_reply_addr.ibmf_hdl, &msgp);
4538 
4539 	mutex_enter(&statep->state_mutex);
4540 	IBCM_REF_CNT_DECR(statep);
4541 	mutex_exit(&statep->state_mutex);
4542 }
4543 
4544 static void
4545 ibcm_print_reply_addr(ibt_channel_hdl_t channel, ibcm_mad_addr_t *cm_reply_addr)
4546 {
4547 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: chan 0x%p, SLID %x, "
4548 	    "DLID %x", channel, cm_reply_addr->rcvd_addr.ia_local_lid,
4549 	    cm_reply_addr->rcvd_addr.ia_remote_lid);
4550 
4551 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: QKEY %x, PKEY %x, "
4552 	    "RQPN %x SL %x", cm_reply_addr->rcvd_addr.ia_q_key,
4553 	    cm_reply_addr->rcvd_addr.ia_p_key,
4554 	    cm_reply_addr->rcvd_addr.ia_remote_qno,
4555 	    cm_reply_addr->rcvd_addr.ia_service_level);
4556 
4557 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM SGID %llX:%llX ",
4558 	    cm_reply_addr->grh_hdr.ig_sender_gid.gid_prefix,
4559 	    cm_reply_addr->grh_hdr.ig_sender_gid.gid_guid);
4560 
4561 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM DGID %llX:%llX",
4562 	    cm_reply_addr->grh_hdr.ig_recver_gid.gid_prefix,
4563 	    cm_reply_addr->grh_hdr.ig_recver_gid.gid_guid);
4564 
4565 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM FL %x TC %x HL %x",
4566 	    cm_reply_addr->grh_hdr.ig_flow_label,
4567 	    cm_reply_addr->grh_hdr.ig_tclass,
4568 	    cm_reply_addr->grh_hdr.ig_hop_limit);
4569 }
4570 
4571 #endif
4572 
4573 typedef struct ibcm_join_mcg_tqarg_s {
4574 	ib_gid_t		rgid;
4575 	ibt_mcg_attr_t		mcg_attr;
4576 	ibt_mcg_info_t		*mcg_infop;
4577 	ibt_mcg_handler_t	func;
4578 	void			*arg;
4579 } ibcm_join_mcg_tqarg_t;
4580 
4581 _NOTE(READ_ONLY_DATA(ibcm_join_mcg_tqarg_s))
4582 
4583 /*
4584  * Function:
4585  *	ibt_join_mcg
4586  * Input:
4587  *	rgid		The request GID that defines the HCA port from which a
4588  *			contact to SA Access is performed to add the specified
4589  *			endport GID ((mcg_attr->mc_pgid) to a multicast group.
4590  *			If mcg_attr->mc_pgid is null, then this (rgid) will be
4591  *			treated as endport GID that is to be added to the
4592  *			multicast group.
4593  *
4594  *	mcg_attr	A pointer to an ibt_mcg_attr_t structure that defines
4595  *			the attributes of the desired multicast group to be
4596  *			created or joined.
4597  *
4598  *	func		NULL or a pointer to a function to call when
4599  *			ibt_join_mcg() completes. If 'func' is not NULL then
4600  *			ibt_join_mcg() will return as soon as possible after
4601  *			initiating the multicast group join/create process.
4602  *			'func' is then called when the process completes.
4603  *
4604  *	arg		Argument to the 'func'.
4605  *
4606  * Output:
4607  *	mcg_info_p	A pointer to the ibt_mcg_info_t structure, allocated
4608  *			by the caller, where the attributes of the created or
4609  *			joined multicast group are copied.
4610  * Returns:
4611  *	IBT_SUCCESS
4612  *	IBT_INVALID_PARAM
4613  *	IBT_MCG_RECORDS_NOT_FOUND
4614  *	IBT_INSUFF_RESOURCE
4615  * Description:
4616  *	Join a multicast group.  The first full member "join" causes the MCG
4617  *	to be created.
4618  */
4619 ibt_status_t
4620 ibt_join_mcg(ib_gid_t rgid, ibt_mcg_attr_t *mcg_attr,
4621     ibt_mcg_info_t *mcg_info_p, ibt_mcg_handler_t func, void  *arg)
4622 {
4623 	ibcm_join_mcg_tqarg_t	*mcg_tq;
4624 	int			flag = ((func == NULL) ? KM_SLEEP : KM_NOSLEEP);
4625 
4626 	IBTF_DPRINTF_L3(cmlog, "ibt_join_mcg(%llX:%llX, %p)", rgid.gid_prefix,
4627 	    rgid.gid_guid, mcg_attr);
4628 
4629 	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
4630 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Request GID is required");
4631 		return (IBT_INVALID_PARAM);
4632 	}
4633 
4634 	if ((mcg_attr->mc_pkey == IB_PKEY_INVALID_LIMITED) ||
4635 	    (mcg_attr->mc_pkey == IB_PKEY_INVALID_FULL)) {
4636 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Invalid P_Key specified");
4637 		return (IBT_INVALID_PARAM);
4638 	}
4639 
4640 	if (mcg_attr->mc_join_state == 0) {
4641 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: JoinState not specified");
4642 		return (IBT_INVALID_PARAM);
4643 	}
4644 
4645 	if (mcg_info_p == NULL) {
4646 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: mcg_info_p is NULL");
4647 		return (IBT_INVALID_PARAM);
4648 	}
4649 
4650 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mcg_tq))
4651 
4652 	mcg_tq = kmem_alloc(sizeof (ibcm_join_mcg_tqarg_t), flag);
4653 	if (mcg_tq == NULL) {
4654 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: "
4655 		    "Unable to allocate memory for local usage.");
4656 		return (IBT_INSUFF_KERNEL_RESOURCE);
4657 	}
4658 
4659 	mcg_tq->rgid = rgid;
4660 	bcopy(mcg_attr, &mcg_tq->mcg_attr, sizeof (ibt_mcg_attr_t));
4661 	mcg_tq->mcg_infop = mcg_info_p;
4662 	mcg_tq->func = func;
4663 	mcg_tq->arg = arg;
4664 
4665 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mcg_tq))
4666 
4667 	if (func != NULL) {	/* Non-Blocking */
4668 		IBTF_DPRINTF_L3(cmlog, "ibt_join_mcg: Non-Blocking Call");
4669 		if (taskq_dispatch(ibcm_taskq, ibcm_process_async_join_mcg,
4670 		    mcg_tq, TQ_NOSLEEP) == 0) {
4671 			IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Failed to "
4672 			    "Dispatch the TaskQ");
4673 			kmem_free(mcg_tq, sizeof (ibcm_join_mcg_tqarg_t));
4674 			return (IBT_INSUFF_KERNEL_RESOURCE);
4675 		} else
4676 			return (IBT_SUCCESS);
4677 	} else {		/* Blocking */
4678 		return (ibcm_process_join_mcg(mcg_tq));
4679 	}
4680 }
4681 
4682 static void
4683 ibcm_process_async_join_mcg(void *tq_arg)
4684 {
4685 	(void) ibcm_process_join_mcg(tq_arg);
4686 }
4687 
4688 static ibt_status_t
4689 ibcm_process_join_mcg(void *taskq_arg)
4690 {
4691 	sa_mcmember_record_t	mcg_req;
4692 	sa_mcmember_record_t	*mcg_resp;
4693 	ibmf_saa_access_args_t	access_args;
4694 	ibmf_saa_handle_t	saa_handle;
4695 	uint64_t		component_mask = 0;
4696 	ibt_status_t		retval;
4697 	ibtl_cm_hca_port_t	hca_port;
4698 	uint_t			num_records;
4699 	size_t			length;
4700 	ibcm_hca_info_t		*hcap;
4701 	ibcm_join_mcg_tqarg_t	*mcg_arg = (ibcm_join_mcg_tqarg_t *)taskq_arg;
4702 	ibt_mcg_info_t		*mcg_info_p = mcg_arg->mcg_infop;
4703 
4704 	IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg(%p)", mcg_arg);
4705 
4706 	retval = ibtl_cm_get_hca_port(mcg_arg->rgid, 0, &hca_port);
4707 	if (retval != IBT_SUCCESS) {
4708 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: Failed to get "
4709 		    "port info from specified RGID: status = %d", retval);
4710 		goto ibcm_join_mcg_exit1;
4711 	}
4712 
4713 	bzero(&mcg_req, sizeof (sa_mcmember_record_t));
4714 
4715 	if ((mcg_arg->mcg_attr.mc_pgid.gid_prefix == 0) ||
4716 	    (mcg_arg->mcg_attr.mc_pgid.gid_guid == 0)) {
4717 		IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
4718 		    "Request GID is Port GID");
4719 		mcg_req.PortGID = mcg_arg->rgid;
4720 	} else {
4721 		mcg_req.PortGID = mcg_arg->mcg_attr.mc_pgid;
4722 	}
4723 	component_mask |= SA_MC_COMPMASK_PORTGID;
4724 
4725 	mcg_req.Q_Key = mcg_arg->mcg_attr.mc_qkey;
4726 	mcg_req.P_Key = mcg_arg->mcg_attr.mc_pkey;
4727 	mcg_req.JoinState = mcg_arg->mcg_attr.mc_join_state;
4728 	mcg_req.TClass = mcg_arg->mcg_attr.mc_tclass;
4729 	mcg_req.FlowLabel = mcg_arg->mcg_attr.mc_flow;
4730 	mcg_req.SL = mcg_arg->mcg_attr.mc_sl;
4731 
4732 	component_mask |= SA_MC_COMPMASK_QKEY | SA_MC_COMPMASK_PKEY |
4733 	    SA_MC_COMPMASK_JOINSTATE | SA_MC_COMPMASK_TCLASS |
4734 	    SA_MC_COMPMASK_FLOWLABEL | SA_MC_COMPMASK_SL;
4735 
4736 	/* If client has specified MGID, use it else SA will assign one. */
4737 	if ((mcg_arg->mcg_attr.mc_mgid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
4738 		mcg_req.MGID = mcg_arg->mcg_attr.mc_mgid;
4739 		component_mask |= SA_MC_COMPMASK_MGID;
4740 	}
4741 
4742 	IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: PGID=%llX:%llX, "
4743 	    "MGID=%llX:%llX", mcg_req.PortGID.gid_prefix,
4744 	    mcg_req.PortGID.gid_guid, mcg_req.MGID.gid_prefix,
4745 	    mcg_req.MGID.gid_guid);
4746 
4747 	/* Is MTU specified. */
4748 	if (mcg_arg->mcg_attr.mc_mtu_req.r_mtu) {
4749 		mcg_req.MTU = mcg_arg->mcg_attr.mc_mtu_req.r_mtu;
4750 		mcg_req.MTUSelector = mcg_arg->mcg_attr.mc_mtu_req.r_selector;
4751 
4752 		component_mask |= SA_MC_COMPMASK_MTUSELECTOR |
4753 		    SA_MC_COMPMASK_MTU;
4754 	}
4755 
4756 	/* Is RATE specified. */
4757 	if (mcg_arg->mcg_attr.mc_rate_req.r_srate) {
4758 		mcg_req.Rate = mcg_arg->mcg_attr.mc_rate_req.r_srate;
4759 		mcg_req.RateSelector =
4760 		    mcg_arg->mcg_attr.mc_rate_req.r_selector;
4761 
4762 		component_mask |= SA_MC_COMPMASK_RATESELECTOR |
4763 		    SA_MC_COMPMASK_RATE;
4764 	}
4765 
4766 	/* Is Packet Life Time specified. */
4767 	if (mcg_arg->mcg_attr.mc_pkt_lt_req.p_pkt_lt) {
4768 		mcg_req.Rate = mcg_arg->mcg_attr.mc_pkt_lt_req.p_pkt_lt;
4769 		mcg_req.RateSelector =
4770 		    mcg_arg->mcg_attr.mc_pkt_lt_req.p_selector;
4771 
4772 		component_mask |= SA_MC_COMPMASK_PKTLTSELECTOR |
4773 		    SA_MC_COMPMASK_PKTLT;
4774 	}
4775 
4776 	if (mcg_arg->mcg_attr.mc_hop) {
4777 		mcg_req.HopLimit = mcg_arg->mcg_attr.mc_hop;
4778 		component_mask |= SA_MC_COMPMASK_HOPLIMIT;
4779 	}
4780 
4781 	if (mcg_arg->mcg_attr.mc_scope) {
4782 		mcg_req.Scope = mcg_arg->mcg_attr.mc_scope;
4783 		component_mask |= SA_MC_COMPMASK_SCOPE;
4784 	}
4785 
4786 	if (mcg_arg->mcg_attr.mc_mlid) {
4787 		mcg_req.MLID = mcg_arg->mcg_attr.mc_mlid;
4788 		component_mask |= SA_MC_COMPMASK_MLID;
4789 	}
4790 
4791 	/* Get SA Access Handle. */
4792 	hcap = ibcm_find_hca_entry(hca_port.hp_hca_guid);
4793 	if (hcap == NULL) {
4794 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: NO HCA found");
4795 
4796 		retval = IBT_HCA_BUSY_DETACHING;
4797 		goto ibcm_join_mcg_exit1;
4798 	}
4799 
4800 	saa_handle = ibcm_get_saa_handle(hcap, hca_port.hp_port);
4801 	if (saa_handle == NULL) {
4802 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: SA Handle NULL");
4803 
4804 		retval = IBT_HCA_PORT_NOT_ACTIVE;
4805 		goto ibcm_join_mcg_exit;
4806 	}
4807 
4808 	if ((mcg_arg->mcg_attr.mc_pgid.gid_prefix != 0) &&
4809 	    (mcg_arg->mcg_attr.mc_pgid.gid_guid != 0)) {
4810 		retval = ibtl_cm_get_hca_port(mcg_arg->mcg_attr.mc_pgid, 0,
4811 		    &hca_port);
4812 		if (retval != IBT_SUCCESS) {
4813 			IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: Failed "
4814 			    "to get PortInfo of specified PGID: status = %d",
4815 			    retval);
4816 			goto ibcm_join_mcg_exit1;
4817 		}
4818 	}
4819 
4820 	/* Contact SA Access */
4821 	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
4822 	access_args.sq_access_type = IBMF_SAA_UPDATE;
4823 	access_args.sq_component_mask = component_mask;
4824 	access_args.sq_template = &mcg_req;
4825 	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
4826 	access_args.sq_callback = NULL;
4827 	access_args.sq_callback_arg = NULL;
4828 
4829 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
4830 	    (void **)&mcg_resp);
4831 	if (retval != IBT_SUCCESS) {
4832 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: "
4833 		    "SA Access Failed");
4834 		goto ibcm_join_mcg_exit;
4835 	}
4836 
4837 	num_records = length/sizeof (sa_mcmember_record_t);
4838 
4839 	IBTF_DPRINTF_L4(cmlog, "ibcm_process_join_mcg: "
4840 	    "Found %d MCMember Records", num_records);
4841 
4842 	/* Validate the returned number of records. */
4843 	if ((mcg_resp != NULL) && (num_records > 0)) {
4844 		/* Update the return values. */
4845 		mcg_info_p->mc_adds_vect.av_dgid = mcg_resp->MGID;
4846 		mcg_info_p->mc_adds_vect.av_sgid = mcg_resp->PortGID;
4847 		mcg_info_p->mc_adds_vect.av_srate = mcg_resp->Rate;
4848 		mcg_info_p->mc_adds_vect.av_srvl = mcg_resp->SL;
4849 		mcg_info_p->mc_adds_vect.av_flow = mcg_resp->FlowLabel;
4850 		mcg_info_p->mc_adds_vect.av_tclass = mcg_resp->TClass;
4851 		mcg_info_p->mc_adds_vect.av_hop = mcg_resp->HopLimit;
4852 		mcg_info_p->mc_adds_vect.av_send_grh = B_TRUE;
4853 		mcg_info_p->mc_adds_vect.av_dlid = mcg_resp->MLID;
4854 		mcg_info_p->mc_mtu = mcg_resp->MTU;
4855 		mcg_info_p->mc_qkey = mcg_resp->Q_Key;
4856 
4857 		retval = ibt_pkey2index_byguid(hca_port.hp_hca_guid,
4858 		    hca_port.hp_port, mcg_resp->P_Key, &mcg_info_p->mc_pkey_ix);
4859 		if (retval != IBT_SUCCESS) {
4860 			IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
4861 			    "Pkey2Index Conversion failed<%d>", retval);
4862 			mcg_info_p->mc_pkey_ix = 0;
4863 		}
4864 
4865 		mcg_info_p->mc_scope = mcg_resp->Scope;
4866 		mcg_info_p->mc_pkt_lt = mcg_resp->PacketLifeTime;
4867 
4868 		mcg_info_p->mc_adds_vect.av_port_num = hca_port.hp_port;
4869 		mcg_info_p->mc_adds_vect.av_sgid_ix = hca_port.hp_sgid_ix;
4870 		mcg_info_p->mc_adds_vect.av_src_path = 0;
4871 
4872 		/* Deallocate the memory allocated by SA for mcg_resp. */
4873 		kmem_free(mcg_resp, length);
4874 		retval = IBT_SUCCESS;
4875 	} else {
4876 		retval = IBT_MCG_RECORDS_NOT_FOUND;
4877 		IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
4878 		    "MCG RECORDS NOT FOUND");
4879 	}
4880 
4881 ibcm_join_mcg_exit:
4882 	ibcm_dec_hca_acc_cnt(hcap);
4883 
4884 ibcm_join_mcg_exit1:
4885 	if (mcg_arg->func)
4886 		(*(mcg_arg->func))(mcg_arg->arg, retval, mcg_info_p);
4887 
4888 	kmem_free(mcg_arg, sizeof (ibcm_join_mcg_tqarg_t));
4889 
4890 	return (retval);
4891 }
4892 
4893 
4894 /*
4895  * Function:
4896  *	ibt_leave_mcg
4897  * Input:
4898  *	rgid		The request GID that defines the HCA port upon which
4899  *			to send the request to the Subnet Administrator, to
4900  *			remove the specified port (port_gid) from the multicast
4901  *			group.  If 'port_gid' is the Reserved GID (i.e.
4902  *			port_gid.gid_prefix = 0 and port_gid.gid_guid = 0),
4903  *			then the end-port associated with 'rgid' is removed
4904  *			from the multicast group.
4905  *
4906  *	mc_gid		A multicast group GID as returned from ibt_join_mcg()
4907  *			call.  This is optional, if not specified (i.e.
4908  *			mc_gid.gid_prefix has 0xFF in its upper 8 bits to
4909  *			identify this as being a multicast GID), then the
4910  *			port is removed from all the multicast groups of
4911  *			which it is a member.
4912  *
4913  *	port_gid	This is optional, if not the Reserved GID (gid_prefix
4914  *			and gid_guid not equal to 0), then this specifies the
4915  *			endport GID of the multicast group member being deleted
4916  *			from the group. If it is the Reserved GID (gid_prefix
4917  *			and gid_guid equal to 0) then the member endport GID is
4918  *			determined from 'rgid'.
4919  *
4920  *	mc_join_state	The Join State attribute used when the group was joined
4921  *			using ibt_join_mcg(). This Join State component must
4922  *			contains at least one bit set to 1 in the same position
4923  *			as that used during ibt_join_mcg(). i.e. the logical
4924  *			AND of the two JoinState components is not all zeros.
4925  *			This Join State component must not have some bits set
4926  *			which are not set using ibt_join_mcg().
4927  * Output:
4928  *	None.
4929  * Returns:
4930  *	IBT_SUCCESS
4931  *	IBT_INVALID_PARAM
4932  *	IBT_MC_GROUP_INVALID
4933  *	IBT_INSUFF_RESOURCE
4934  * Description:
4935  *	The port associated with the port GID shall be removed from the
4936  *	multicast group specified by MGID (mc_gid) or from all the multicast
4937  *	groups of which it is a member if the MGID (mc_gid) is not specified.
4938  *
4939  *	The last full member to leave causes the destruction of the Multicast
4940  *	Group.
4941  */
4942 ibt_status_t
4943 ibt_leave_mcg(ib_gid_t rgid, ib_gid_t mc_gid, ib_gid_t port_gid,
4944     uint8_t mc_join_state)
4945 {
4946 	sa_mcmember_record_t	mcg_req;
4947 	ibmf_saa_access_args_t	access_args;
4948 	ibmf_saa_handle_t	saa_handle;
4949 	uint64_t		component_mask = 0;
4950 	int			sa_retval;
4951 	ibt_status_t		retval;
4952 	ibtl_cm_hca_port_t	hca_port;
4953 	size_t			length;
4954 	void			*results_p;
4955 	ibcm_hca_info_t		*hcap;
4956 
4957 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg(%llX:%llX, %llX:%llX)",
4958 	    rgid.gid_prefix, rgid.gid_guid, mc_gid.gid_prefix, mc_gid.gid_guid);
4959 
4960 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg(%llX:%llX, 0x%X)",
4961 	    port_gid.gid_prefix, port_gid.gid_guid, mc_join_state);
4962 
4963 	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
4964 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: RequestGID is required");
4965 		return (IBT_INVALID_PARAM);
4966 	}
4967 
4968 	bzero(&mcg_req, sizeof (sa_mcmember_record_t));
4969 
4970 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: MGID: %llX%llX",
4971 	    mc_gid.gid_prefix, mc_gid.gid_guid);
4972 
4973 	/* Validate MGID */
4974 	if ((mc_gid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
4975 		mcg_req.MGID = mc_gid;
4976 		component_mask |= SA_MC_COMPMASK_MGID;
4977 	} else if ((mc_gid.gid_prefix != 0) || (mc_gid.gid_guid != 0)) {
4978 		IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Invalid MGID specified");
4979 		return (IBT_MC_MGID_INVALID);
4980 	}
4981 
4982 	if ((port_gid.gid_prefix == 0) || (port_gid.gid_guid == 0)) {
4983 		mcg_req.PortGID = rgid;
4984 	} else {
4985 		IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Performing PROXY Leave");
4986 		mcg_req.PortGID = port_gid;
4987 	}
4988 	component_mask |= SA_MC_COMPMASK_PORTGID;
4989 
4990 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Port GID <%llX:%llX>",
4991 	    mcg_req.PortGID.gid_prefix, mcg_req.PortGID.gid_guid);
4992 
4993 	/* Join State */
4994 	mcg_req.JoinState = mc_join_state;
4995 	component_mask |= SA_MC_COMPMASK_JOINSTATE;
4996 
4997 	retval = ibtl_cm_get_hca_port(rgid, 0, &hca_port);
4998 	if (retval != IBT_SUCCESS) {
4999 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: Failed to get port info "
5000 		    "from specified RGID : status = %d", retval);
5001 		return (retval);
5002 	}
5003 
5004 	/* Get SA Access Handle. */
5005 	hcap = ibcm_find_hca_entry(hca_port.hp_hca_guid);
5006 	if (hcap == NULL) {
5007 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: "
5008 		    "NO HCA found");
5009 		return (IBT_HCA_BUSY_DETACHING);
5010 	}
5011 
5012 	saa_handle = ibcm_get_saa_handle(hcap, hca_port.hp_port);
5013 	if (saa_handle == NULL) {
5014 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: saa_handle is NULL");
5015 		ibcm_dec_hca_acc_cnt(hcap);
5016 		return (IBT_HCA_PORT_NOT_ACTIVE);
5017 	}
5018 
5019 	/* Contact SA Access */
5020 	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
5021 	access_args.sq_access_type = IBMF_SAA_DELETE;
5022 	access_args.sq_component_mask = component_mask;
5023 	access_args.sq_template = &mcg_req;
5024 	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
5025 	access_args.sq_callback = NULL;
5026 	access_args.sq_callback_arg = NULL;
5027 
5028 	ibcm_sa_access_enter();
5029 
5030 	sa_retval = ibmf_sa_access(saa_handle, &access_args, 0, &length,
5031 	    &results_p);
5032 	if (sa_retval != IBMF_SUCCESS) {
5033 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: SA access Failed: %d",
5034 		    sa_retval);
5035 		(void) ibcm_ibmf_analyze_error(sa_retval);
5036 		retval = IBT_MC_GROUP_INVALID;
5037 	}
5038 
5039 	ibcm_sa_access_exit();
5040 
5041 	ibcm_dec_hca_acc_cnt(hcap);
5042 
5043 	return (retval);
5044 }
5045 
5046 
5047 /*
5048  * Function:
5049  *	ibt_query_mcg
5050  * Input:
5051  *	rgid		The request GID that defines the HCA port upon which
5052  *			to send the request to the Subnet Administrator, to
5053  *			retrieve Multicast Records matching attributes as
5054  *			specified through 'mcg_attr' argument.
5055  *
5056  *	mcg_attr	NULL or a pointer to an ibt_mcg_attr_t structure that
5057  *			specifies MCG attributes that are to be matched.
5058  *			Attributes that are not required can be wild carded
5059  *			by specifying as '0'.
5060  *
5061  *	mcgs_max_num	The maximum number of matching multicast groups to
5062  *			return.  If zero, then all available matching multicast
5063  *			groups are returned.
5064  * Output:
5065  *	mcgs_info_p	The address of an ibt_mcg_info_t pointer, where
5066  *			multicast group information is returned. The actual
5067  *			number of entries filled in the array is returned in
5068  *			entries_p.
5069  *
5070  *	entries_p	The number of ibt_mcg_attr_t entries returned.
5071  * Returns:
5072  *	IBT_SUCCESS
5073  *	IBT_INVALID_PARAM
5074  *	IBT_MCG_RECORDS_NOT_FOUND
5075  * Description:
5076  *	Request information on multicast groups that match the parameters
5077  *	specified in mcg_attr. Information on each multicast group is returned
5078  *	to the caller in the form of an array of ibt_mcg_info_t.
5079  *	ibt_query_mcg() allocates the memory for this array and returns a
5080  *	pointer to the array (mcgs_p) and the number of entries in the array
5081  *	(entries_p). This memory should be freed by the client using
5082  *	ibt_free_mcg_info().
5083  */
5084 ibt_status_t
5085 ibt_query_mcg(ib_gid_t rgid, ibt_mcg_attr_t *mcg_attr, uint_t mcgs_max_num,
5086     ibt_mcg_info_t **mcgs_info_p, uint_t *entries_p)
5087 {
5088 	sa_mcmember_record_t	mcg_req;
5089 	sa_mcmember_record_t	*mcg_resp;
5090 	ibt_mcg_info_t		*mcg_infop;
5091 	ibmf_saa_access_args_t	access_args;
5092 	ibmf_saa_handle_t	saa_handle;
5093 	uint64_t		component_mask = 0;
5094 	ibt_status_t		retval;
5095 	ibtl_cm_hca_port_t	hport;
5096 	uint_t			num_records;
5097 	size_t			length;
5098 	void			*results_p;
5099 	ib_gid_t		port_gid;
5100 	ibcm_hca_info_t		*hcap;
5101 
5102 	IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg(%p, %d)", mcg_attr, mcgs_max_num);
5103 
5104 	if ((entries_p == NULL) || (mcgs_info_p == NULL)) {
5105 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: "
5106 		    "entries_p or mcgs_info_p is NULL");
5107 		return (IBT_INVALID_PARAM);
5108 	}
5109 
5110 	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
5111 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: RequestGID is required");
5112 		return (IBT_INVALID_PARAM);
5113 	}
5114 	IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: Request GID <%llX:%llX>",
5115 	    rgid.gid_prefix, rgid.gid_guid);
5116 
5117 	bzero(&mcg_req, sizeof (sa_mcmember_record_t));
5118 	port_gid.gid_prefix = port_gid.gid_guid = 0;
5119 
5120 	if (mcg_attr != NULL) {
5121 		port_gid = mcg_attr->mc_pgid;
5122 
5123 		if ((port_gid.gid_prefix != 0) && (port_gid.gid_guid != 0)) {
5124 			mcg_req.PortGID = mcg_attr->mc_pgid;
5125 			component_mask |= SA_MC_COMPMASK_PORTGID;
5126 
5127 			IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: PGID %llX:%llX",
5128 			    port_gid.gid_prefix, port_gid.gid_guid);
5129 		}
5130 
5131 		/* Is Q_Key specified. */
5132 		if (mcg_attr->mc_qkey != 0) {
5133 			mcg_req.Q_Key = mcg_attr->mc_qkey;
5134 			component_mask |= SA_MC_COMPMASK_QKEY;
5135 		}
5136 
5137 		/* Is P_Key specified. */
5138 		if (mcg_attr->mc_pkey != 0) {
5139 			mcg_req.P_Key = mcg_attr->mc_pkey;
5140 			component_mask |= SA_MC_COMPMASK_PKEY;
5141 		}
5142 
5143 		/* Is MGID specified. */
5144 		if ((mcg_attr->mc_mgid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
5145 			mcg_req.MGID = mcg_attr->mc_mgid;
5146 			component_mask |= SA_MC_COMPMASK_MGID;
5147 		}
5148 
5149 		/* Is MTU specified. */
5150 		if (mcg_attr->mc_mtu_req.r_mtu) {
5151 			mcg_req.MTU = mcg_attr->mc_mtu_req.r_mtu;
5152 			mcg_req.MTUSelector = mcg_attr->mc_mtu_req.r_selector;
5153 
5154 			component_mask |= SA_MC_COMPMASK_MTUSELECTOR |
5155 			    SA_MC_COMPMASK_MTU;
5156 		}
5157 
5158 		if (mcg_attr->mc_tclass) {
5159 			mcg_req.TClass = mcg_attr->mc_tclass;
5160 			component_mask |= SA_MC_COMPMASK_TCLASS;
5161 		}
5162 
5163 		/* Is RATE specified. */
5164 		if (mcg_attr->mc_rate_req.r_srate) {
5165 			mcg_req.Rate = mcg_attr->mc_rate_req.r_srate;
5166 			mcg_req.RateSelector = mcg_attr->mc_rate_req.r_selector;
5167 
5168 			component_mask |= SA_MC_COMPMASK_RATESELECTOR |
5169 			    SA_MC_COMPMASK_RATE;
5170 		}
5171 
5172 		/* Is Packet Life Time specified. */
5173 		if (mcg_attr->mc_pkt_lt_req.p_pkt_lt) {
5174 			mcg_req.Rate = mcg_attr->mc_pkt_lt_req.p_pkt_lt;
5175 			mcg_req.RateSelector =
5176 			    mcg_attr->mc_pkt_lt_req.p_selector;
5177 
5178 			component_mask |= SA_MC_COMPMASK_PKTLTSELECTOR |
5179 			    SA_MC_COMPMASK_PKTLT;
5180 		}
5181 
5182 		if (mcg_attr->mc_hop) {
5183 			mcg_req.HopLimit = mcg_attr->mc_hop;
5184 			component_mask |= SA_MC_COMPMASK_HOPLIMIT;
5185 		}
5186 
5187 		if (mcg_attr->mc_flow) {
5188 			mcg_req.FlowLabel = mcg_attr->mc_flow;
5189 			component_mask |= SA_MC_COMPMASK_FLOWLABEL;
5190 		}
5191 
5192 		if (mcg_attr->mc_sl) {
5193 			mcg_req.SL = mcg_attr->mc_sl;
5194 			component_mask |= SA_MC_COMPMASK_SL;
5195 		}
5196 
5197 		if (mcg_attr->mc_scope) {
5198 			mcg_req.Scope = mcg_attr->mc_scope;
5199 			component_mask |= SA_MC_COMPMASK_SCOPE;
5200 		}
5201 
5202 		if (mcg_attr->mc_join_state) {
5203 			mcg_req.JoinState = mcg_attr->mc_join_state;
5204 			component_mask |= SA_MC_COMPMASK_JOINSTATE;
5205 		}
5206 
5207 		if (mcg_attr->mc_mlid) {
5208 			mcg_req.MLID = mcg_attr->mc_mlid;
5209 			component_mask |= SA_MC_COMPMASK_MLID;
5210 		}
5211 	}
5212 
5213 	retval = ibtl_cm_get_hca_port(rgid, 0, &hport);
5214 	if (retval != IBT_SUCCESS) {
5215 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: Failed to get port info "
5216 		    "from specified RGID : status = %d", retval);
5217 		return (retval);
5218 	}
5219 
5220 	/* Get SA Access Handle. */
5221 	hcap = ibcm_find_hca_entry(hport.hp_hca_guid);
5222 	if (hcap == NULL) {
5223 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: NO HCA found");
5224 		return (IBT_HCA_BUSY_DETACHING);
5225 	}
5226 
5227 	saa_handle = ibcm_get_saa_handle(hcap, hport.hp_port);
5228 	if (saa_handle == NULL) {
5229 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: saa_handle is NULL");
5230 		ibcm_dec_hca_acc_cnt(hcap);
5231 		return (IBT_HCA_PORT_NOT_ACTIVE);
5232 	}
5233 
5234 	/* Contact SA Access */
5235 	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
5236 	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
5237 	access_args.sq_component_mask = component_mask;
5238 	access_args.sq_template = &mcg_req;
5239 	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
5240 	access_args.sq_callback = NULL;
5241 	access_args.sq_callback_arg = NULL;
5242 
5243 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
5244 	    &results_p);
5245 	if (retval != IBT_SUCCESS) {
5246 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: SA access Failed");
5247 		ibcm_dec_hca_acc_cnt(hcap);
5248 		return (retval);
5249 	}
5250 
5251 	num_records = length/sizeof (sa_mcmember_record_t);
5252 
5253 	IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: Found %d MCMember Records",
5254 	    num_records);
5255 
5256 	/* Validate the returned number of records. */
5257 	if ((results_p != NULL) && (num_records > 0)) {
5258 		uint_t	i;
5259 
5260 		/*
5261 		 * If mcgs_max_num is zero, then return all records else
5262 		 * return only requested number of records
5263 		 */
5264 		if ((mcgs_max_num != 0) && (num_records > mcgs_max_num)) {
5265 			/* we are interested in only mcgs_max_num records */
5266 			num_records = mcgs_max_num;
5267 		}
5268 
5269 		/*
5270 		 * The SGID returned in "mcg_info_p" buffer should be PortGID,
5271 		 * (mcg_attr->mc_pgid), if 'mcg_attr->mc_pgid' was specified,
5272 		 * else RequestGID (rgid) should be returned.
5273 		 */
5274 		if ((port_gid.gid_prefix != 0) && (port_gid.gid_guid != 0)) {
5275 
5276 			/* Get sgid_ix and port number of 'port_gid' */
5277 			retval = ibtl_cm_get_hca_port(port_gid, 0, &hport);
5278 			if (retval != IBT_SUCCESS) {
5279 				IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: "
5280 				    "Failed to Get Portinfo for PortGID :"
5281 				    "status = %d", retval);
5282 				return (retval);
5283 			}
5284 		} else {
5285 			/*
5286 			 * The sgid_ix and port number related to RequestGID
5287 			 * are already obtained at the beginning.
5288 			 */
5289 			port_gid = rgid;
5290 		}
5291 
5292 		/*
5293 		 * Allocate memory for return buffer, to be freed in
5294 		 * ibt_free_mcg_info().
5295 		 */
5296 		mcg_infop = kmem_alloc((num_records * sizeof (ibt_mcg_info_t)),
5297 		    KM_SLEEP);
5298 
5299 		*mcgs_info_p = mcg_infop;
5300 		*entries_p = num_records;
5301 
5302 		/* Update the return values. */
5303 		for (i = 0; i < num_records; i++) {
5304 
5305 			mcg_resp = (sa_mcmember_record_t *)((uchar_t *)
5306 			    results_p + i * sizeof (sa_mcmember_record_t));
5307 
5308 			mcg_infop[i].mc_adds_vect.av_dgid = mcg_resp->MGID;
5309 			mcg_infop[i].mc_adds_vect.av_sgid = port_gid;
5310 			mcg_infop[i].mc_adds_vect.av_srate = mcg_resp->Rate;
5311 			mcg_infop[i].mc_adds_vect.av_srvl = mcg_resp->SL;
5312 			mcg_infop[i].mc_adds_vect.av_flow = mcg_resp->FlowLabel;
5313 			mcg_infop[i].mc_adds_vect.av_tclass = mcg_resp->TClass;
5314 			mcg_infop[i].mc_adds_vect.av_hop = mcg_resp->HopLimit;
5315 			mcg_infop[i].mc_adds_vect.av_port_num = hport.hp_port;
5316 			mcg_infop[i].mc_adds_vect.av_send_grh = B_TRUE;
5317 			mcg_infop[i].mc_adds_vect.av_dlid = mcg_resp->MLID;
5318 			mcg_infop[i].mc_adds_vect.av_sgid_ix = hport.hp_sgid_ix;
5319 			mcg_infop[i].mc_adds_vect.av_src_path = 0;
5320 			mcg_infop[i].mc_mtu = mcg_resp->MTU;
5321 			mcg_infop[i].mc_qkey = mcg_resp->Q_Key;
5322 			mcg_infop[i].mc_scope = mcg_resp->Scope;
5323 			mcg_infop[i].mc_pkt_lt = mcg_resp->PacketLifeTime;
5324 
5325 			if (ibt_pkey2index_byguid(hport.hp_hca_guid,
5326 			    hport.hp_port, mcg_resp->P_Key,
5327 			    &mcg_infop[i].mc_pkey_ix) != IBT_SUCCESS) {
5328 				IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: "
5329 				    "Pkey2Index Conversion failed");
5330 				mcg_infop[i].mc_pkey_ix = 0;
5331 			}
5332 		}
5333 
5334 		/*
5335 		 * Deallocate the memory allocated by SA for results_p.
5336 		 */
5337 		kmem_free(results_p, length);
5338 		retval = IBT_SUCCESS;
5339 
5340 		IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: returning %d MCGRecords",
5341 		    num_records);
5342 
5343 	} else {
5344 		retval = IBT_MCG_RECORDS_NOT_FOUND;
5345 		*entries_p = 0;
5346 
5347 		IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: MCG RECORDS NOT FOUND");
5348 	}
5349 
5350 	ibcm_dec_hca_acc_cnt(hcap);
5351 
5352 	return (retval);
5353 }
5354 
5355 
5356 /*
5357  * ibt_free_mcg_info()
5358  *	Free the memory allocated by successful ibt_query_mcg()
5359  *
5360  *	mcgs_info	Pointer returned by ibt_query_mcg().
5361  *
5362  *	entries		The number of ibt_mcg_info_t entries to free.
5363  */
5364 void
5365 ibt_free_mcg_info(ibt_mcg_info_t *mcgs_info, uint_t entries)
5366 {
5367 	IBTF_DPRINTF_L3(cmlog, "ibt_free_mcg_info: "
5368 	    "Free <%d> entries from 0x%p", entries, mcgs_info);
5369 
5370 	if ((mcgs_info != NULL) && (entries > 0))
5371 		kmem_free(mcgs_info, entries * sizeof (ibt_mcg_info_t));
5372 	else
5373 		IBTF_DPRINTF_L2(cmlog, "ibt_free_mcg_info: "
5374 		    "ERROR: NULL buf pointer or length specified.");
5375 }
5376 
5377 
5378 /*
5379  * Function:
5380  *	ibt_gid_to_node_info()
5381  * Input:
5382  *	gid		Identifies the IB Node and port for which to obtain
5383  *			Node information.
5384  * Output:
5385  *	node_info_p	A pointer to an ibt_node_info_t structure (allocated
5386  *			by the caller) in which to return the node information.
5387  * Returns:
5388  *	IBT_SUCCESS
5389  *	IBT_INVALID_PARAM
5390  *	IBT_NODE_RECORDS_NOT_FOUND
5391  *	IBT_NO_HCAS_AVAILABLE
5392  * Description:
5393  *	Retrieve Node Information for the specified GID.
5394  */
5395 ibt_status_t
5396 ibt_gid_to_node_info(ib_gid_t gid, ibt_node_info_t *node_info_p)
5397 {
5398 	sa_node_record_t	nr_req, *nr_resp;
5399 	ibmf_saa_handle_t	saa_handle;
5400 	ibt_status_t		retval;
5401 	ibcm_hca_info_t		*hcap;
5402 	ibtl_cm_hca_port_t	hport;
5403 	int			i, j;
5404 	uint_t			num_rec;
5405 	ib_guid_t		*guid_array = NULL;
5406 	sa_path_record_t	*path;
5407 	size_t			len;
5408 	uint8_t			npaths;
5409 	uint32_t		num_hcas = 0;
5410 	ib_lid_t		node_lid;
5411 	boolean_t		local_node = B_FALSE;
5412 	void			*res_p;
5413 	uint8_t			num_ports = 0;
5414 
5415 
5416 	IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info(%llX:%llX, %p)",
5417 	    gid.gid_prefix, gid.gid_guid, node_info_p);
5418 
5419 	if ((gid.gid_prefix == 0) || (gid.gid_guid == 0)) {
5420 		IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: GID is required");
5421 		return (IBT_INVALID_PARAM);
5422 	}
5423 
5424 	if (node_info_p == NULL) {
5425 		IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5426 		    "Return Buf (node_info_p) is NULL.");
5427 		return (IBT_INVALID_PARAM);
5428 	}
5429 
5430 	/*
5431 	 * If 'gid' is on local node, then get node lid (i.e. base lid of the
5432 	 * associated port) info via ibtl_cm_get_hca_port() call.
5433 	 */
5434 	bzero(&hport, sizeof (ibtl_cm_hca_port_t));
5435 	if (ibtl_cm_get_hca_port(gid, 0, &hport) == IBT_SUCCESS) {
5436 
5437 		hcap = ibcm_find_hca_entry(hport.hp_hca_guid);
5438 		if (hcap == NULL) {
5439 			IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5440 			    "HCA(%llX) info not found", hport.hp_hca_guid);
5441 			return (IBT_NO_HCAS_AVAILABLE);
5442 		}
5443 		num_ports = 1;
5444 		num_hcas = 1;
5445 		node_lid = hport.hp_base_lid;
5446 		local_node = B_TRUE;
5447 		IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info: Local Node: "
5448 		    "LID = 0x%X", node_lid);
5449 	} else {
5450 		/* Get the number of HCAs and their GUIDs */
5451 		num_hcas = ibt_get_hca_list(&guid_array);
5452 		IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info: ibt_get_hca_list "
5453 		    "returned %d hcas", num_hcas);
5454 
5455 		if (num_hcas == 0) {
5456 			IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5457 			    "NO HCA's Found on this system");
5458 			return (IBT_NO_HCAS_AVAILABLE);
5459 		}
5460 	}
5461 
5462 	for (i = 0; i < num_hcas; i++) {
5463 		if (local_node == B_FALSE) {
5464 			hcap = ibcm_find_hca_entry(guid_array[i]);
5465 			if (hcap == NULL) {
5466 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5467 				    "HCA(%llX) info not found", guid_array[i]);
5468 				retval = IBT_NO_HCAS_AVAILABLE;
5469 				continue;
5470 			}
5471 			num_ports = hcap->hca_num_ports;
5472 		}
5473 
5474 		for (j = 0; j < num_ports; j++) {
5475 			uint8_t		port = 0;
5476 
5477 			if (local_node == B_TRUE)
5478 				port = hport.hp_port;
5479 			else
5480 				port = j + 1;
5481 
5482 			/* Get SA Access Handle. */
5483 			saa_handle = ibcm_get_saa_handle(hcap, port);
5484 			if (saa_handle == NULL) {
5485 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5486 				    "Port %d of HCA (%llX) is NOT ACTIVE",
5487 				    port, hport.hp_hca_guid);
5488 				retval = IBT_NODE_RECORDS_NOT_FOUND;
5489 				continue;
5490 			}
5491 
5492 			if (local_node == B_FALSE) {
5493 				ib_gid_t	sgid;
5494 				int		sa_ret;
5495 
5496 				/*
5497 				 * Check whether 'gid' and this port has same
5498 				 * subnet prefix. If not, then there is no use
5499 				 * in searching from this port.
5500 				 */
5501 				sgid = hcap->hca_port_info[j].port_sgid0;
5502 				if (gid.gid_prefix != sgid.gid_prefix) {
5503 					IBTF_DPRINTF_L3(cmlog,
5504 					    "ibt_gid_to_node_info:Sn_Prefix of "
5505 					    "GID(%llX) and Port's(%llX) differ",
5506 					    gid.gid_prefix, sgid.gid_prefix);
5507 					retval = IBT_NODE_RECORDS_NOT_FOUND;
5508 					continue;
5509 				}
5510 
5511 				/*
5512 				 * First Get Path Records for the specified DGID
5513 				 * from this port (SGID). From Path Records,
5514 				 * note down DLID, then use this DLID as Input
5515 				 * attribute to get NodeRecords from SA Access.
5516 				 */
5517 				npaths = 1;
5518 				path = NULL;
5519 
5520 				sa_ret = ibmf_saa_gid_to_pathrecords(saa_handle,
5521 				    sgid, gid, 0, 0, B_TRUE, &npaths, 0, &len,
5522 				    &path);
5523 				if (sa_ret != IBMF_SUCCESS) {
5524 					IBTF_DPRINTF_L2(cmlog,
5525 					    "ibt_gid_to_node_info: "
5526 					    "ibmf_saa_gid_to_pathrecords() "
5527 					    "returned error: %d ", sa_ret);
5528 					retval =
5529 					    ibcm_ibmf_analyze_error(sa_ret);
5530 					continue;
5531 				} else if ((npaths == 0) || (path == NULL)) {
5532 					IBTF_DPRINTF_L3(cmlog,
5533 					    "ibt_gid_to_node_info: failed (%d) "
5534 					    "to get path records for the DGID "
5535 					    "0x%llX from SGID 0x%llX", sa_ret,
5536 					    gid.gid_guid, sgid.gid_guid);
5537 					retval = IBT_NODE_RECORDS_NOT_FOUND;
5538 					continue;
5539 				}
5540 				node_lid = path->DLID;	/* LID */
5541 
5542 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5543 				    "Remote Node: LID = 0x%X", node_lid);
5544 
5545 				/* Free SA_Access memory for path record. */
5546 				kmem_free(path, len);
5547 			}
5548 
5549 			/* Retrieve Node Records from SA Access. */
5550 			bzero(&nr_req, sizeof (sa_node_record_t));
5551 
5552 			nr_req.LID = node_lid;	/* LID */
5553 
5554 			retval = ibcm_get_node_rec(saa_handle, &nr_req,
5555 			    SA_NODEINFO_COMPMASK_NODELID, &res_p, &len);
5556 			if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
5557 				IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5558 				    "failed (%d) to get Node records", retval);
5559 				continue;
5560 			} else if (retval != IBT_SUCCESS) {
5561 				IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5562 				    "failed (%d) to get Node records", retval);
5563 				ibcm_dec_hca_acc_cnt(hcap);
5564 				goto gid_to_ni_exit;
5565 			}
5566 
5567 			num_rec = len/sizeof (sa_node_record_t);
5568 			nr_resp = (sa_node_record_t *)(uchar_t *)res_p;
5569 
5570 			/* Validate the returned number of records. */
5571 			if ((nr_resp != NULL) && (num_rec > 0)) {
5572 
5573 				IBCM_DUMP_NODE_REC(nr_resp);
5574 
5575 				_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(
5576 				    *node_info_p))
5577 
5578 				node_info_p->n_sys_img_guid =
5579 				    nr_resp->NodeInfo.SystemImageGUID;
5580 				node_info_p->n_node_guid =
5581 				    nr_resp->NodeInfo.NodeGUID;
5582 				node_info_p->n_port_guid =
5583 				    nr_resp->NodeInfo.PortGUID;
5584 				node_info_p->n_dev_id =
5585 				    nr_resp->NodeInfo.DeviceID;
5586 				node_info_p->n_revision =
5587 				    nr_resp->NodeInfo.Revision;
5588 				node_info_p->n_vendor_id =
5589 				    nr_resp->NodeInfo.VendorID;
5590 				node_info_p->n_num_ports =
5591 				    nr_resp->NodeInfo.NumPorts;
5592 				node_info_p->n_port_num =
5593 				    nr_resp->NodeInfo.LocalPortNum;
5594 				node_info_p->n_node_type =
5595 				    nr_resp->NodeInfo.NodeType;
5596 				(void) strncpy(node_info_p->n_description,
5597 				    (char *)&nr_resp->NodeDescription, 64);
5598 
5599 				_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(
5600 				    *node_info_p))
5601 
5602 				/*
5603 				 * Deallocate the memory allocated by SA for
5604 				 * 'nr_resp'.
5605 				 */
5606 				ibcm_dec_hca_acc_cnt(hcap);
5607 				kmem_free(nr_resp, len);
5608 				retval = IBT_SUCCESS;
5609 
5610 				goto gid_to_ni_exit;
5611 			} else {
5612 				retval = IBT_NODE_RECORDS_NOT_FOUND;
5613 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5614 				    "Node Records NOT found - PortGUID %016llX",
5615 				    gid.gid_guid);
5616 			}
5617 		}
5618 		ibcm_dec_hca_acc_cnt(hcap);
5619 
5620 		if (local_node == B_TRUE)
5621 			break;
5622 	}
5623 
5624 gid_to_ni_exit:
5625 	if (guid_array)
5626 		ibt_free_hca_list(guid_array, num_hcas);
5627 
5628 	IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: done. Status %d", retval);
5629 
5630 	return (retval);
5631 }
5632 
5633 
5634 static ibt_status_t
5635 ibcm_get_node_rec(ibmf_saa_handle_t saa_handle, sa_node_record_t *nr_req,
5636     uint64_t component_mask, void *result_p, size_t *len)
5637 {
5638 	ibmf_saa_access_args_t  args;
5639 	size_t			length;
5640 	ibt_status_t		retval;
5641 
5642 	args.sq_attr_id = SA_NODERECORD_ATTRID;
5643 	args.sq_template = nr_req;
5644 	args.sq_access_type = IBMF_SAA_RETRIEVE;
5645 	args.sq_template_length = sizeof (sa_node_record_t);
5646 	args.sq_component_mask = component_mask;
5647 	args.sq_callback = NULL;
5648 	args.sq_callback_arg = NULL;
5649 
5650 	retval = ibcm_contact_sa_access(saa_handle, &args, &length, result_p);
5651 	if (retval != IBT_SUCCESS) {
5652 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_node_rec: SA Call Failed");
5653 		return (retval);
5654 	}
5655 
5656 	*len = length;
5657 
5658 	/* Validate the returned number of records. */
5659 	if ((result_p != NULL) && (length > 0)) {
5660 		IBTF_DPRINTF_L3(cmlog, "ibcm_get_node_rec: Node Records FOUND");
5661 
5662 		/* Got it, done!. */
5663 		return (IBT_SUCCESS);
5664 	} else {
5665 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_node_rec: Node Rec NOT found");
5666 		return (IBT_NODE_RECORDS_NOT_FOUND);
5667 	}
5668 }
5669 
5670 
5671 /*
5672  * Function:
5673  *	ibt_get_companion_port_gids()
5674  * Description:
5675  *	Get list of GID's available on a companion port(s) of the specified
5676  *	GID or list of GIDs available on a specified Node GUID/SystemImage GUID.
5677  */
5678 ibt_status_t
5679 ibt_get_companion_port_gids(ib_gid_t gid, ib_guid_t hca_guid,
5680     ib_guid_t sysimg_guid, ib_gid_t **gids_p, uint_t *num_gids_p)
5681 {
5682 	sa_node_record_t	nr_req, *nr_resp;
5683 	void			*res_p;
5684 	ibmf_saa_handle_t	saa_handle;
5685 	int			sa_ret;
5686 	ibt_status_t		retval = IBT_SUCCESS;
5687 	ibcm_hca_info_t		*hcap;
5688 	ibtl_cm_hca_port_t	hport;
5689 	int			i, j;
5690 	uint_t			num_rec;
5691 	ib_guid_t		*guid_array = NULL;
5692 	sa_path_record_t	*path;
5693 	size_t			len;
5694 	uint8_t			npaths;
5695 	uint32_t		num_hcas = 0;
5696 	boolean_t		local_node = B_FALSE;
5697 	boolean_t		local_hca = B_FALSE;
5698 	ib_guid_t		h_guid = hca_guid;
5699 	ib_gid_t		*gidp = NULL, *t_gidp = NULL;
5700 	int			multi_hca_loop = 0;
5701 
5702 	IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids(%llX:%llX, %llX, "
5703 	    "%llX)", gid.gid_prefix, gid.gid_guid, hca_guid, sysimg_guid);
5704 
5705 	if (((gid.gid_prefix == 0) || (gid.gid_guid == 0)) && (hca_guid == 0) &&
5706 	    (sysimg_guid == 0)) {
5707 		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
5708 		    "Null Input attribute specified.");
5709 		return (IBT_INVALID_PARAM);
5710 	}
5711 
5712 	if ((num_gids_p == NULL) || (gids_p == NULL)) {
5713 		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
5714 		    "num_gids_p or gids_p is NULL");
5715 		return (IBT_INVALID_PARAM);
5716 	}
5717 
5718 	*num_gids_p = 0;
5719 
5720 	/* Get the number of HCAs and their GUIDs */
5721 	if ((num_hcas = ibt_get_hca_list(&guid_array)) == 0) {
5722 		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
5723 		    "NO HCA's Found on this system");
5724 		return (IBT_NO_HCAS_AVAILABLE);
5725 	}
5726 
5727 	IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids: "
5728 	    "ibt_get_hca_list() returned %d hcas", num_hcas);
5729 
5730 	/*
5731 	 * If 'gid' is on local node, then get node lid (i.e. base lid of the
5732 	 * associated port) info via ibtl_cm_get_hca_port() call.
5733 	 */
5734 	bzero(&hport, sizeof (ibtl_cm_hca_port_t));
5735 	if ((gid.gid_prefix != 0) && (gid.gid_guid != 0) &&
5736 	    (ibtl_cm_get_hca_port(gid, 0, &hport) == IBT_SUCCESS)) {
5737 
5738 		if ((hca_guid != 0) && (hca_guid != hport.hp_hca_guid)) {
5739 			IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
5740 			    "Invalid GID<->HCAGUID combination specified.");
5741 			retval = IBT_INVALID_PARAM;
5742 			goto get_comp_pgid_exit;
5743 		}
5744 		h_guid = hport.hp_hca_guid;
5745 		local_node = B_TRUE;
5746 
5747 		IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids: "
5748 		    "Local Node: HCA (0x%llX)", h_guid);
5749 	} else if (h_guid) {	/* Is specified HCA GUID - local? */
5750 		for (i = 0; i < num_hcas; i++) {
5751 			if (h_guid == guid_array[i]) {
5752 				local_hca = B_TRUE;
5753 				break;
5754 			}
5755 		}
5756 	} else if (sysimg_guid) { /* Is specified SystemImage GUID - local? */
5757 		for (i = 0; i < num_hcas; i++) {
5758 			ibt_status_t	ret;
5759 			ibt_hca_attr_t	hca_attr;
5760 
5761 			ret = ibt_query_hca_byguid(guid_array[i], &hca_attr);
5762 			if (ret != IBT_SUCCESS) {
5763 				IBTF_DPRINTF_L2(cmlog,
5764 				    "ibt_get_companion_port_gids: HCA(%llX) "
5765 				    "info not found", guid_array[i]);
5766 				retval = IBT_NO_HCAS_AVAILABLE;
5767 				continue;
5768 			}
5769 			if (hca_attr.hca_si_guid == sysimg_guid) {
5770 				if ((hca_guid != 0) &&
5771 				    (hca_guid != hca_attr.hca_node_guid)) {
5772 					IBTF_DPRINTF_L2(cmlog,
5773 					    "ibt_get_companion_port_gids: "
5774 					    "Invalid SysImg<->HCA GUID "
5775 					    "combination specified.");
5776 					retval = IBT_INVALID_PARAM;
5777 					goto get_comp_pgid_exit;
5778 				}
5779 				local_hca = B_TRUE;
5780 				h_guid = hca_attr.hca_node_guid;
5781 				break;
5782 			}
5783 		}
5784 	}
5785 
5786 	if ((local_node == B_TRUE) || (local_hca == B_TRUE)) {
5787 		retval = ibtl_cm_get_local_comp_gids(h_guid, gid, gids_p,
5788 		    num_gids_p);
5789 		goto get_comp_pgid_exit;
5790 	}
5791 
5792 get_comp_for_multihca:
5793 	/* We will be here, if request is for remote node */
5794 	for (i = 0; i < num_hcas; i++) {
5795 		int		multism;
5796 		uint_t		count = 0;
5797 		int		multi_sm_loop = 0;
5798 		uint_t		k = 0, l;
5799 
5800 		hcap = ibcm_find_hca_entry(guid_array[i]);
5801 		if (hcap == NULL) {
5802 			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
5803 			    "HCA(%llX) info not found", guid_array[i]);
5804 			retval = IBT_NO_HCAS_AVAILABLE;
5805 			continue;
5806 		}
5807 
5808 		/* 1 - MultiSM, 0 - Single SM */
5809 		multism = ibtl_cm_is_multi_sm(guid_array[i]);
5810 
5811 		for (j = 0; j < hcap->hca_num_ports; j++) {
5812 			ib_gid_t	sgid;
5813 			uint64_t	c_mask = 0;
5814 			ib_guid_t	pg;
5815 			uint_t		port = j;
5816 
5817 get_comp_for_multism:
5818 			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
5819 			    "Port %d, HCA %llX, MultiSM= %d, Loop=%d",
5820 			    port + 1, h_guid, multism, multi_sm_loop);
5821 
5822 			/* Get SA Access Handle. */
5823 			saa_handle = ibcm_get_saa_handle(hcap, port + 1);
5824 			if (saa_handle == NULL) {
5825 				IBTF_DPRINTF_L2(cmlog,
5826 				    "ibt_get_companion_port_gids: "
5827 				    "Port (%d)  - NOT ACTIVE", port + 1);
5828 				retval = IBT_GIDS_NOT_FOUND;
5829 				continue;
5830 			}
5831 
5832 			/*
5833 			 * Check whether 'gid' and this port has same subnet
5834 			 * prefix. If not, then there is no use in searching
5835 			 * from this port.
5836 			 */
5837 			sgid = hcap->hca_port_info[port].port_sgid0;
5838 			if ((h_guid == 0) && (gid.gid_prefix != 0) &&
5839 			    (multi_sm_loop == 0) &&
5840 			    (gid.gid_prefix != sgid.gid_prefix)) {
5841 				IBTF_DPRINTF_L2(cmlog,
5842 				    "ibt_get_companion_port_gids: SnPrefix of "
5843 				    "GID(%llX) and Port SN_Pfx(%llX) differ",
5844 				    gid.gid_prefix, sgid.gid_prefix);
5845 				retval = IBT_GIDS_NOT_FOUND;
5846 				continue;
5847 			}
5848 
5849 			/*
5850 			 * If HCA GUID or System Image GUID is specified, then
5851 			 * we can achieve our goal sooner!.
5852 			 */
5853 			if ((h_guid == 0) && (sysimg_guid == 0)) {
5854 				/* So only GID info is provided. */
5855 
5856 				/*
5857 				 * First Get Path Records for the specified DGID
5858 				 * from this port (SGID). From Path Records,
5859 				 * note down DLID, then use this DLID as Input
5860 				 * attribute to get NodeRecords.
5861 				 */
5862 				npaths = 1;
5863 				path = NULL;
5864 
5865 				sa_ret = ibmf_saa_gid_to_pathrecords(saa_handle,
5866 				    sgid, gid, 0, 0, B_TRUE, &npaths, 0, &len,
5867 				    &path);
5868 				if (sa_ret != IBMF_SUCCESS) {
5869 					IBTF_DPRINTF_L2(cmlog,
5870 					    "ibt_get_companion_port_gids: "
5871 					    "ibmf_saa_gid_to_pathrecords() "
5872 					    "returned error: %d ", sa_ret);
5873 					retval =
5874 					    ibcm_ibmf_analyze_error(sa_ret);
5875 					ibcm_dec_hca_acc_cnt(hcap);
5876 					goto get_comp_pgid_exit;
5877 				} else if ((npaths == 0) || (path == NULL)) {
5878 					IBTF_DPRINTF_L2(cmlog,
5879 					    "ibt_get_companion_port_gids: "
5880 					    "failed (%d) to get path records "
5881 					    "for the DGID (0x%llX) from SGID "
5882 					    "(0x%llX)", sa_ret, gid.gid_guid,
5883 					    sgid.gid_guid);
5884 					retval = IBT_GIDS_NOT_FOUND;
5885 					continue;
5886 				}
5887 
5888 				bzero(&nr_req, sizeof (sa_node_record_t));
5889 				nr_req.LID = path->DLID;	/* LID */
5890 
5891 				IBTF_DPRINTF_L3(cmlog,
5892 				    "ibt_get_companion_port_gids: "
5893 				    "Remote Node: LID = 0x%X", nr_req.LID);
5894 
5895 				/* Free SA_Access memory for path record. */
5896 				kmem_free(path, len);
5897 
5898 				IBTF_DPRINTF_L3(cmlog,
5899 				    "ibt_get_companion_port_gids: SAA Call: "
5900 				    "based on LID ");
5901 
5902 				retval = ibcm_get_node_rec(saa_handle, &nr_req,
5903 				    SA_NODEINFO_COMPMASK_NODELID, &res_p, &len);
5904 				if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
5905 					IBTF_DPRINTF_L2(cmlog,
5906 					    "ibt_get_companion_port_gids: "
5907 					    "failed (%d) to get Node records",
5908 					    retval);
5909 					continue;
5910 				} else if (retval != IBT_SUCCESS) {
5911 					IBTF_DPRINTF_L2(cmlog,
5912 					    "ibt_get_companion_port_gids: "
5913 					    "failed (%d) to get Node records",
5914 					    retval);
5915 					ibcm_dec_hca_acc_cnt(hcap);
5916 					goto get_comp_pgid_exit;
5917 				}
5918 
5919 				nr_resp = (sa_node_record_t *)(uchar_t *)res_p;
5920 				/* Note down HCA GUID info. */
5921 				h_guid = nr_resp->NodeInfo.NodeGUID;
5922 
5923 				IBTF_DPRINTF_L3(cmlog,
5924 				    "ibt_get_companion_port_gids: "
5925 				    "Remote HCA GUID: 0x%llX", h_guid);
5926 
5927 				IBCM_DUMP_NODE_REC(nr_resp);
5928 
5929 				kmem_free(res_p, len);
5930 			}
5931 
5932 			bzero(&nr_req, sizeof (sa_node_record_t));
5933 			if (h_guid != 0) {
5934 				nr_req.NodeInfo.NodeGUID = h_guid;
5935 				c_mask = SA_NODEINFO_COMPMASK_NODEGUID;
5936 			}
5937 
5938 			if (sysimg_guid != 0) {
5939 				nr_req.NodeInfo.SystemImageGUID = sysimg_guid;
5940 				c_mask |= SA_NODEINFO_COMPMASK_SYSIMAGEGUID;
5941 			}
5942 
5943 			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
5944 			    "SAA Call: CMASK= 0x%llX", c_mask);
5945 
5946 			retval = ibcm_get_node_rec(saa_handle, &nr_req, c_mask,
5947 			    &res_p, &len);
5948 			if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
5949 				IBTF_DPRINTF_L3(cmlog,
5950 				    "ibt_get_companion_port_gids: "
5951 				    "failed (%d) to get Node records", retval);
5952 				continue;
5953 			} else if (retval != IBT_SUCCESS) {
5954 				IBTF_DPRINTF_L2(cmlog,
5955 				    "ibt_get_companion_port_gids: Error: (%d) "
5956 				    "while getting Node records", retval);
5957 				ibcm_dec_hca_acc_cnt(hcap);
5958 				goto get_comp_pgid_exit;
5959 			}
5960 
5961 			num_rec = len/sizeof (sa_node_record_t);
5962 
5963 			/* We will be here, only if we found some NodeRec */
5964 			if (gid.gid_prefix && gid.gid_guid) {
5965 				nr_resp = (sa_node_record_t *)res_p;
5966 				for (l = 0; l < num_rec; l++, nr_resp++) {
5967 					pg = nr_resp->NodeInfo.PortGUID;
5968 					if (gid.gid_guid != pg)
5969 						count++;
5970 				}
5971 			} else {
5972 				count = num_rec;
5973 			}
5974 
5975 			if (count != 0) {
5976 				if (multi_sm_loop == 1) {
5977 					count += k;
5978 					t_gidp = kmem_zalloc(count *
5979 					    sizeof (ib_gid_t), KM_SLEEP);
5980 
5981 					if ((k != 0) && (gidp != NULL)) {
5982 						bcopy(gidp, t_gidp,
5983 						    k * sizeof (ib_gid_t));
5984 						kmem_free(gidp,
5985 						    k * sizeof (ib_gid_t));
5986 					}
5987 					gidp = t_gidp;
5988 				} else {
5989 					gidp = kmem_zalloc(count *
5990 					    sizeof (ib_gid_t), KM_SLEEP);
5991 				}
5992 				*num_gids_p = count;
5993 				*gids_p = gidp;
5994 
5995 				nr_resp = (sa_node_record_t *)res_p;
5996 				for (l = 0; l < num_rec; l++, nr_resp++) {
5997 					IBCM_DUMP_NODE_REC(nr_resp);
5998 
5999 					pg = nr_resp->NodeInfo.PortGUID;
6000 					IBTF_DPRINTF_L4(cmlog,
6001 					    "ibt_get_companion_port_gids: "
6002 					    "PortGID %llX", pg);
6003 
6004 					if (pg != gid.gid_guid) {
6005 						gidp[k].gid_prefix =
6006 						    sgid.gid_prefix;
6007 						gidp[k].gid_guid = pg;
6008 
6009 						IBTF_DPRINTF_L3(cmlog,
6010 						    "ibt_get_companion_pgids: "
6011 						    "GID[%d] = %llX:%llX", k,
6012 						    gidp[k].gid_prefix,
6013 						    gidp[k].gid_guid);
6014 
6015 						k++;
6016 						if (k == count)
6017 							break;
6018 					}
6019 				}
6020 				retval = IBT_SUCCESS;	/* done!. */
6021 				kmem_free(res_p, len);
6022 				ibcm_dec_hca_acc_cnt(hcap);
6023 				goto get_comp_pgid_exit;
6024 			} else {
6025 				IBTF_DPRINTF_L2(cmlog,
6026 				    "ibt_get_companion_port_gids: "
6027 				    "Companion PortGIDs not available");
6028 				retval = IBT_GIDS_NOT_FOUND;
6029 			}
6030 			/* Deallocate the memory for 'res_p'. */
6031 			kmem_free(res_p, len);
6032 
6033 			/*
6034 			 * If we are on MultiSM setup, then we need to lookout
6035 			 * from that subnet port too.
6036 			 */
6037 			if (multism) {
6038 				/* break if already searched both the subnet */
6039 				if (multi_sm_loop == 1)
6040 					break;
6041 
6042 				port = (j == 0) ? 1 : 0;
6043 				multi_sm_loop = 1;
6044 				goto get_comp_for_multism;
6045 			} else {
6046 				break;
6047 			}
6048 		}
6049 		ibcm_dec_hca_acc_cnt(hcap);
6050 
6051 		/*
6052 		 * We may be on dual HCA with dual SM configured system.  And
6053 		 * the input attr GID was visible from second HCA. So in order
6054 		 * to get the companion portgid we need to re-look from the
6055 		 * first HCA ports.
6056 		 */
6057 		if ((num_hcas > 1) && (i > 0) && (h_guid != 0) &&
6058 		    (multi_hca_loop != 1)) {
6059 			multi_hca_loop = 1;
6060 			goto get_comp_for_multihca;
6061 		}
6062 	}
6063 	if (*num_gids_p == 0)
6064 		retval = IBT_GIDS_NOT_FOUND;
6065 
6066 get_comp_pgid_exit:
6067 	if (guid_array)
6068 		ibt_free_hca_list(guid_array, num_hcas);
6069 
6070 	if ((retval != IBT_SUCCESS) && (*num_gids_p != 0)) {
6071 		retval = IBT_SUCCESS;
6072 	}
6073 
6074 	IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: done. Status %d, "
6075 	    "Found %d GIDs", retval, *num_gids_p);
6076 
6077 	return (retval);
6078 }
6079 
6080 /* RDMA IP CM Support routines */
6081 ibt_status_t
6082 ibt_get_src_ip(ib_gid_t gid, ib_pkey_t pkey, ibt_ip_addr_t *src_ip)
6083 {
6084 	ibcm_arp_ip_t		*ipp;
6085 	ibcm_arp_ibd_insts_t	ibds;
6086 	int			i;
6087 	boolean_t		found = B_FALSE;
6088 	ibt_status_t		retval = IBT_SUCCESS;
6089 
6090 	IBTF_DPRINTF_L4(cmlog, "ibt_get_src_ip(%llX:%llX, %X, %p)",
6091 	    gid.gid_prefix, gid.gid_guid, pkey, src_ip);
6092 
6093 	if (gid.gid_prefix == 0 || gid.gid_guid == 0) {
6094 		IBTF_DPRINTF_L3(cmlog, "ibt_get_src_ip: Invalid GID.");
6095 		return (IBT_INVALID_PARAM);
6096 	}
6097 
6098 	if (src_ip == NULL) {
6099 		IBTF_DPRINTF_L3(cmlog, "ibt_get_src_ip: ERROR: src_ip NULL");
6100 		return (IBT_INVALID_PARAM);
6101 	}
6102 
6103 	bzero(&ibds, sizeof (ibcm_arp_ibd_insts_t));
6104 	ibds.ibcm_arp_ibd_alloc = IBCM_ARP_IBD_INSTANCES;
6105 	ibds.ibcm_arp_ibd_cnt = 0;
6106 	ibds.ibcm_arp_ip = (ibcm_arp_ip_t *)kmem_zalloc(
6107 	    ibds.ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t), KM_SLEEP);
6108 
6109 	retval = ibcm_arp_get_ibds(&ibds);
6110 	if (retval != IBT_SUCCESS) {
6111 		IBTF_DPRINTF_L2(cmlog, "ibt_get_src_ip: ibcm_arp_get_ibds "
6112 		    "failed to get IBD Instances: ret 0x%x", retval);
6113 		goto get_src_ip_end;
6114 	}
6115 
6116 	for (i = 0, ipp = ibds.ibcm_arp_ip; i < ibds.ibcm_arp_ibd_cnt;
6117 	    i++, ipp++) {
6118 		if (ipp->ip_port_gid.gid_prefix == gid.gid_prefix &&
6119 		    ipp->ip_port_gid.gid_guid == gid.gid_guid) {
6120 			if (pkey) {
6121 				if (ipp->ip_pkey == pkey) {
6122 					found = B_TRUE;
6123 					break;
6124 				} else
6125 					continue;
6126 			}
6127 			found = B_TRUE;
6128 			break;
6129 		}
6130 	}
6131 
6132 	if (found == B_FALSE) {
6133 		retval = IBT_SRC_IP_NOT_FOUND;
6134 	} else {
6135 		src_ip->family = ipp->ip_inet_family;
6136 		if (src_ip->family == AF_INET) {
6137 			bcopy(&ipp->ip_cm_sin.sin_addr, &src_ip->un.ip4addr,
6138 			    sizeof (in_addr_t));
6139 			IBCM_PRINT_IP("ibt_get_src_ip", src_ip);
6140 		} else if (src_ip->family == AF_INET6) {
6141 			bcopy(&ipp->ip_cm_sin6.sin6_addr, &src_ip->un.ip6addr,
6142 			    sizeof (in6_addr_t));
6143 		}
6144 	}
6145 
6146 get_src_ip_end:
6147 	if (ibds.ibcm_arp_ip)
6148 		kmem_free(ibds.ibcm_arp_ip,
6149 		    ibds.ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t));
6150 
6151 	return (retval);
6152 }
6153 
6154 ib_svc_id_t
6155 ibt_get_ip_sid(uint8_t protocol_num, in_port_t dst_port)
6156 {
6157 	ib_svc_id_t	sid;
6158 
6159 	IBTF_DPRINTF_L4(cmlog, "ibt_get_ip_sid(%X, %lX)", protocol_num,
6160 	    dst_port);
6161 
6162 	/*
6163 	 * If protocol_num is non-zero, then formulate the SID and return it.
6164 	 * If protocol_num is zero, then we need to assign a locally generated
6165 	 * IP SID with IB_SID_IPADDR_PREFIX.
6166 	 */
6167 	if (protocol_num) {
6168 		sid = IB_SID_IPADDR_PREFIX | protocol_num << 16 | dst_port;
6169 	} else {
6170 		sid = ibcm_alloc_ip_sid();
6171 	}
6172 
6173 	IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_sid: SID: 0x%016llX", sid);
6174 	return (sid);
6175 }
6176 
6177 ibt_status_t
6178 ibt_release_ip_sid(ib_svc_id_t ip_sid)
6179 {
6180 	IBTF_DPRINTF_L4(cmlog, "ibt_release_ip_sid(%llX)", ip_sid);
6181 
6182 	if (((ip_sid & IB_SID_IPADDR_PREFIX_MASK) != 0) ||
6183 	    (!(ip_sid & IB_SID_IPADDR_PREFIX))) {
6184 		IBTF_DPRINTF_L2(cmlog, "ibt_release_ip_sid(0x%016llX): ERROR: "
6185 		    "Called for Non-RDMA IP SID", ip_sid);
6186 		return (IBT_INVALID_PARAM);
6187 	}
6188 
6189 	/*
6190 	 * If protocol_num in ip_sid are all ZEROs, then this SID is allocated
6191 	 * by IBTF. If not, then the specified ip_sid is invalid.
6192 	 */
6193 	if (ip_sid & IB_SID_IPADDR_IPNUM_MASK) {
6194 		IBTF_DPRINTF_L2(cmlog, "ibt_release_ip_sid(0x%016llX): ERROR: "
6195 		    "Called for Non-IBTF assigned RDMA IP SID", ip_sid);
6196 		return (IBT_INVALID_PARAM);
6197 	}
6198 
6199 	ibcm_free_ip_sid(ip_sid);
6200 
6201 	return (IBT_SUCCESS);
6202 }
6203 
6204 
6205 uint8_t
6206 ibt_get_ip_protocol_num(ib_svc_id_t sid)
6207 {
6208 	return ((sid & IB_SID_IPADDR_IPNUM_MASK) >> 16);
6209 }
6210 
6211 in_port_t
6212 ibt_get_ip_dst_port(ib_svc_id_t sid)
6213 {
6214 	return (sid & IB_SID_IPADDR_PORTNUM_MASK);
6215 }
6216 
6217 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibt_ip_cm_info_t))
6218 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_ip_pvtdata_t))
6219 
6220 ibt_status_t
6221 ibt_format_ip_private_data(ibt_ip_cm_info_t *ip_cm_info,
6222     ibt_priv_data_len_t priv_data_len, void *priv_data_p)
6223 {
6224 	ibcm_ip_pvtdata_t	ip_data;
6225 
6226 	IBTF_DPRINTF_L4(cmlog, "ibt_format_ip_private_data(%p, %d, %p)",
6227 	    ip_cm_info, priv_data_len, priv_data_p);
6228 
6229 	if ((ip_cm_info == NULL) || (priv_data_p == NULL) ||
6230 	    (priv_data_len < IBT_IP_HDR_PRIV_DATA_SZ)) {
6231 		IBTF_DPRINTF_L2(cmlog, "ibt_format_ip_private_data: ERROR "
6232 		    "Invalid Inputs.");
6233 		return (IBT_INVALID_PARAM);
6234 	}
6235 
6236 	bzero(&ip_data, sizeof (ibcm_ip_pvtdata_t));
6237 	ip_data.ip_srcport = ip_cm_info->src_port; /* Source Port */
6238 
6239 	/* IPV = 0x4, if IP-Addr are IPv4 format, else 0x6 for IPv6 */
6240 	if (ip_cm_info->src_addr.family == AF_INET) {
6241 		ip_data.ip_ipv = IBT_CM_IP_IPV_V4;
6242 		ip_data.ip_srcv4 = ip_cm_info->src_addr.un.ip4addr;
6243 		ip_data.ip_dstv4 = ip_cm_info->dst_addr.un.ip4addr;
6244 		IBCM_PRINT_IP("format: src", &ip_cm_info->src_addr);
6245 		IBCM_PRINT_IP("format: dst", &ip_cm_info->dst_addr);
6246 	} else if (ip_cm_info->src_addr.family == AF_INET6) {
6247 		ip_data.ip_ipv = IBT_CM_IP_IPV_V6;
6248 		bcopy(&ip_cm_info->src_addr.un.ip6addr,
6249 		    &ip_data.ip_srcv6, sizeof (in6_addr_t));
6250 		bcopy(&ip_cm_info->dst_addr.un.ip6addr,
6251 		    &ip_data.ip_dstv6, sizeof (in6_addr_t));
6252 	} else {
6253 		IBTF_DPRINTF_L2(cmlog, "ibt_format_ip_private_data: ERROR "
6254 		    "IP Addr needs to be either AF_INET or AF_INET6 family.");
6255 		return (IBT_INVALID_PARAM);
6256 	}
6257 
6258 	ip_data.ip_MajV = IBT_CM_IP_MAJ_VER;
6259 	ip_data.ip_MinV = IBT_CM_IP_MIN_VER;
6260 
6261 	bcopy(&ip_data, priv_data_p, IBT_IP_HDR_PRIV_DATA_SZ);
6262 
6263 	return (IBT_SUCCESS);
6264 }
6265 
6266 
6267 ibt_status_t
6268 ibt_get_ip_data(ibt_priv_data_len_t priv_data_len, void *priv_data,
6269     ibt_ip_cm_info_t *ip_cm_infop)
6270 {
6271 	ibcm_ip_pvtdata_t	ip_data;
6272 
6273 	IBTF_DPRINTF_L4(cmlog, "ibt_get_ip_data(%d, %p, %p)",
6274 	    priv_data_len, priv_data, ip_cm_infop);
6275 
6276 	if ((ip_cm_infop == NULL) || (priv_data == NULL) ||
6277 	    (priv_data_len < IBT_IP_HDR_PRIV_DATA_SZ)) {
6278 		IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_data: ERROR Invalid Inputs");
6279 		return (IBT_INVALID_PARAM);
6280 	}
6281 
6282 	bcopy(priv_data, &ip_data, IBT_IP_HDR_PRIV_DATA_SZ);
6283 	ip_cm_infop->src_port = ip_data.ip_srcport; /* Source Port */
6284 
6285 	/* IPV = 0x4, if IP Address are IPv4 format, else 0x6 for IPv6 */
6286 	if (ip_data.ip_ipv == IBT_CM_IP_IPV_V4) {
6287 		/* Copy IPv4 Addr */
6288 		ip_cm_infop->src_addr.family = AF_INET;
6289 		ip_cm_infop->src_addr.un.ip4addr = ip_data.ip_srcv4;
6290 		ip_cm_infop->dst_addr.family = AF_INET;
6291 		ip_cm_infop->dst_addr.un.ip4addr = ip_data.ip_dstv4;
6292 		IBCM_PRINT_IP("get_ip_data: src", &ip_cm_infop->src_addr);
6293 		IBCM_PRINT_IP("get_ip_data: dst", &ip_cm_infop->dst_addr);
6294 	} else if (ip_data.ip_ipv == IBT_CM_IP_IPV_V6) {
6295 		/* Copy IPv6 Addr */
6296 		ip_cm_infop->src_addr.family = AF_INET6;
6297 		bcopy(&ip_data.ip_srcv6, &ip_cm_infop->src_addr.un.ip6addr,
6298 		    sizeof (in6_addr_t));
6299 		ip_cm_infop->dst_addr.family = AF_INET6;
6300 		bcopy(&ip_data.ip_dstv6, &ip_cm_infop->dst_addr.un.ip6addr,
6301 		    sizeof (in6_addr_t));
6302 	} else {
6303 		IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_data: ERROR: IP Addr needs"
6304 		    " to be either AF_INET or AF_INET6 family.");
6305 		return (IBT_INVALID_PARAM);
6306 	}
6307 
6308 	return (IBT_SUCCESS);
6309 }
6310 
6311 
6312 /* Routines for warlock */
6313 
6314 /* ARGSUSED */
6315 static void
6316 ibcm_dummy_mcg_handler(void *arg, ibt_status_t retval, ibt_mcg_info_t *minfo)
6317 {
6318 	ibcm_join_mcg_tqarg_t	dummy_mcg;
6319 
6320 	dummy_mcg.func = ibcm_dummy_mcg_handler;
6321 
6322 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_mcg_handler: "
6323 	    "dummy_mcg.func %p", dummy_mcg.func);
6324 }
6325 
6326 
6327 /* ARGSUSED */
6328 static void
6329 ibcm_dummy_recycle_rc_handler(ibt_status_t retval, void *arg)
6330 {
6331 	ibcm_taskq_recycle_arg_t	dummy_rc_recycle;
6332 
6333 	dummy_rc_recycle.func = ibcm_dummy_recycle_rc_handler;
6334 
6335 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_recycle_rc_handler: "
6336 	    "dummy_rc_recycle.func %p", dummy_rc_recycle.func);
6337 }
6338 
6339 
6340 /* ARGSUSED */
6341 static ibt_cm_status_t
6342 ibcm_dummy_ud_handler(void *priv, ibt_cm_ud_event_t *event,
6343     ibt_cm_ud_return_args_t *ret_args,
6344     void *priv_data, ibt_priv_data_len_t len)
6345 {
6346 	/*
6347 	 * Let warlock see that ibcm_local_handler_s::actual_cm_handler
6348 	 * points to this routine.
6349 	 */
6350 	ibcm_local_handler_t	p;
6351 	ibcm_ud_state_data_t	dummy_ud;
6352 
6353 	p.actual_cm_handler = ibcm_dummy_ud_handler;
6354 	dummy_ud.ud_cm_handler = ibcm_dummy_ud_handler;
6355 
6356 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_ud_handler: p.actual_cm_handler %p"
6357 	    "dummy_ud.ud_cm_handler %p", p.actual_cm_handler,
6358 	    dummy_ud.ud_cm_handler);
6359 	/*
6360 	 * Call all routines that the client's callback routine could call.
6361 	 */
6362 
6363 	return (IBT_CM_ACCEPT);
6364 }
6365 
6366 /* ARGSUSED */
6367 static ibt_cm_status_t
6368 ibcm_dummy_rc_handler(void *priv, ibt_cm_event_t *event,
6369     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6370 {
6371 	ibcm_state_data_t	dummy_rc;
6372 
6373 	dummy_rc.cm_handler = ibcm_dummy_rc_handler;
6374 
6375 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_rc_handler: "
6376 	    "dummy_ud.ud_cm_handler %p", dummy_rc.cm_handler);
6377 	/*
6378 	 * Call all routines that the client's callback routine could call.
6379 	 */
6380 
6381 	return (IBT_CM_ACCEPT);
6382 }
6383