xref: /illumos-gate/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c (revision 9c468ea9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/ib/mgt/ibcm/ibcm_impl.h>
27 #include <sys/ib/ibtl/ibti.h>
28 #include <sys/ib/mgt/ibcm/ibcm_arp.h>
29 
30 /*
31  * ibcm_ti.c
32  *	These routines implement the Communication Manager's interfaces to IBTL.
33  */
34 
35 /* CM rc recycle task args structure definition */
36 typedef struct ibcm_taskq_recycle_arg_s {
37 	ibt_channel_hdl_t	rc_chan;
38 	ibt_cep_flags_t		control;
39 	uint8_t			hca_port_num;
40 	ibt_recycle_handler_t	func;
41 	void			*arg;
42 } ibcm_taskq_recycle_arg_t;
43 
44 _NOTE(READ_ONLY_DATA(ibcm_taskq_recycle_arg_s))
45 
46 static ibt_status_t	ibcm_init_reply_addr(ibcm_hca_info_t *hcap,
47     ibcm_mad_addr_t *reply_addr, ibt_chan_open_args_t *chan_args,
48     ibt_chan_open_flags_t flags, ib_time_t *cm_pkt_lt, ib_lid_t prim_slid);
49 static void		ibcm_process_abort_via_taskq(void *args);
50 static ibt_status_t	ibcm_process_rc_recycle_ret(void *recycle_arg);
51 static ibt_status_t	ibcm_process_join_mcg(void *taskq_arg);
52 static void		ibcm_process_async_join_mcg(void *tq_arg);
53 
54 ibt_status_t ibcm_get_node_rec(ibmf_saa_handle_t, sa_node_record_t *,
55     uint64_t c_mask, void *, size_t *);
56 
57 static ibt_status_t ibcm_close_rc_channel(ibt_channel_hdl_t channel,
58     ibcm_state_data_t *statep, ibt_execution_mode_t mode);
59 
60 /* Address Record management definitions */
61 #define	IBCM_DAPL_ATS_NAME	"DAPL Address Translation Service"
62 #define	IBCM_DAPL_ATS_SID	0x10000CE100415453ULL
63 #define	IBCM_DAPL_ATS_NBYTES	16
64 ibcm_svc_info_t *ibcm_ar_svcinfop;
65 ibcm_ar_t	*ibcm_ar_list;
66 
67 /*
68  * Tunable parameter to turnoff the overriding of pi_path_mtu value.
69  *	1 	By default override the path record's pi_path_mtu value to
70  *		IB_MTU_1K for all RC channels. This is done only for the
71  *		channels established on Tavor HCA and the path's pi_path_mtu
72  *		is greater than IB_MTU_1K.
73  *	0	Do not override, use pi_path_mtu by default.
74  */
75 int	ibcm_override_path_mtu = 1;
76 
77 #ifdef DEBUG
78 static void	ibcm_print_reply_addr(ibt_channel_hdl_t channel,
79 		    ibcm_mad_addr_t *cm_reply_addr);
80 #endif
81 
82 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_port_info_s::{port_ibmf_hdl}))
83 
84 /* access is controlled between ibcm_sm.c and ibcm_ti.c by CVs */
85 _NOTE(SCHEME_PROTECTS_DATA("Serialized access by CV", {ibt_rc_returns_t
86     ibt_ud_returns_t ibt_ap_returns_t ibt_ar_t}))
87 
88 /*
89  * Typically, clients initialize these args in one api call, and use in
90  * another api
91  */
92 _NOTE(SCHEME_PROTECTS_DATA("Expected usage of ibtl api by client",
93     {ibt_path_info_s ibt_cep_path_s ibt_adds_vect_s ibt_mcg_info_s ib_gid_s
94     ibt_ud_dest_attr_s ibt_ud_dest_s ibt_srv_data_s ibt_redirect_info_s}))
95 
96 /*
97  * ibt_open_rc_channel()
98  *	ibt_open_rc_channel opens a communication channel on the specified
99  *	channel to the specified service. For connection service type qp's
100  *	the CM initiates the CEP to establish the connection and transitions
101  *	the QP/EEC to the "Ready to send" State modifying the QP/EEC's
102  *	attributes as necessary.
103  *	The implementation of this function assumes that alt path is different
104  *	from primary path. It is assumed that the Path functions ensure that.
105  *
106  * RETURN VALUES:
107  *	IBT_SUCCESS	on success (or respective failure on error)
108  */
109 ibt_status_t
110 ibt_open_rc_channel(ibt_channel_hdl_t channel, ibt_chan_open_flags_t flags,
111     ibt_execution_mode_t mode, ibt_chan_open_args_t *chan_args,
112     ibt_rc_returns_t *ret_args)
113 {
114 	/* all fields that are related to REQ MAD formation */
115 
116 	ib_pkey_t		prim_pkey;
117 	ib_lid_t		primary_slid, alternate_slid;
118 	ib_qpn_t		local_qpn = 0;
119 	ib_guid_t		hca_guid;
120 	ib_qkey_t		local_qkey = 0;
121 	ib_eecn_t		local_eecn = 0;
122 	ib_eecn_t		remote_eecn = 0;
123 	boolean_t		primary_grh;
124 	boolean_t		alternate_grh = B_FALSE;
125 	ib_lid_t		base_lid;
126 	ib_com_id_t		local_comid;
127 	ibmf_msg_t		*ibmf_msg, *ibmf_msg_dreq;
128 	ibcm_req_msg_t		*req_msgp;
129 
130 	uint8_t			rdma_in, rdma_out;
131 	uint8_t			cm_retries;
132 	uint64_t		local_cm_proc_time;	/* In usec */
133 	uint8_t			local_cm_resp_time;	/* IB time */
134 	uint64_t		remote_cm_resp_time;	/* In usec */
135 	uint32_t		starting_psn = 0;
136 
137 	/* CM path related fields */
138 	ibmf_handle_t		ibmf_hdl;
139 	ibcm_qp_list_t		*cm_qp_entry;
140 	ibcm_mad_addr_t		cm_reply_addr;
141 
142 	uint8_t			cm_pkt_lt;
143 
144 	/* Local args for ibtl/internal CM functions called within */
145 	ibt_status_t		status;
146 	ibcm_status_t		lkup_status;
147 	ibt_qp_query_attr_t	qp_query_attr;
148 
149 	/* Other misc local args */
150 	ibt_priv_data_len_t	len;
151 	ibcm_hca_info_t		*hcap;
152 	ibcm_state_data_t	*statep;
153 	uint8_t			port_no;
154 
155 	IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel(chan %p, %X, %x, %p, %p)",
156 	    channel, flags, mode, chan_args, ret_args);
157 
158 	if (IBCM_INVALID_CHANNEL(channel)) {
159 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: invalid channel");
160 		return (IBT_CHAN_HDL_INVALID);
161 	}
162 
163 	/* cm handler should always be specified */
164 	if (chan_args->oc_cm_handler == NULL) {
165 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
166 		    "CM handler is not be specified", channel);
167 		return (IBT_INVALID_PARAM);
168 	}
169 
170 	if (mode == IBT_NONBLOCKING) {
171 		if (ret_args != NULL) {
172 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
173 			    " ret_args should be NULL when called in "
174 			    "non-blocking mode", channel);
175 			return (IBT_INVALID_PARAM);
176 		}
177 	} else if (mode == IBT_BLOCKING) {
178 		if (ret_args == NULL) {
179 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
180 			    " ret_args should be Non-NULL when called in "
181 			    "blocking mode", channel);
182 			return (IBT_INVALID_PARAM);
183 		}
184 		if (ret_args->rc_priv_data_len > IBT_REP_PRIV_DATA_SZ) {
185 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
186 			    " private data length is too large", channel);
187 			return (IBT_INVALID_PARAM);
188 		}
189 		if ((ret_args->rc_priv_data_len > 0) &&
190 		    (ret_args->rc_priv_data == NULL)) {
191 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
192 			    " rc_priv_data_len > 0, but rc_priv_data NULL",
193 			    channel);
194 			return (IBT_INVALID_PARAM);
195 		}
196 	} else { /* any other mode is not valid for ibt_open_rc_channel */
197 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
198 		    "invalid mode %x specified", channel, mode);
199 		return (IBT_INVALID_PARAM);
200 	}
201 
202 	/*
203 	 * XXX: no support yet for ibt_chan_open_flags_t - IBT_OCHAN_DUP
204 	 */
205 	if (flags & IBT_OCHAN_DUP) {
206 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
207 		    "Unsupported Flags specified: 0x%X", channel, flags);
208 		return (IBT_INVALID_PARAM);
209 	}
210 
211 	if ((flags & IBT_OCHAN_REDIRECTED) &&
212 	    (flags & IBT_OCHAN_PORT_REDIRECTED)) {
213 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
214 		    "Illegal to specify IBT_OCHAN_REDIRECTED and "
215 		    "IBT_OCHAN_PORT_REDIRECTED flags together", channel);
216 		return (IBT_INVALID_PARAM);
217 	}
218 
219 	if (((flags & IBT_OCHAN_REDIRECTED) &&
220 	    (chan_args->oc_cm_redirect_info == NULL)) ||
221 	    ((flags & IBT_OCHAN_PORT_REDIRECTED) &&
222 	    (chan_args->oc_cm_cep_path == NULL))) {
223 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
224 		    "Redirect flag specified, but respective arg is NULL",
225 		    channel);
226 		return (IBT_INVALID_PARAM);
227 	}
228 
229 	if ((flags & IBT_OCHAN_REDIRECTED) &&
230 	    (chan_args->oc_cm_redirect_info->rdi_dlid == 0) &&
231 	    (chan_args->oc_cm_redirect_info->rdi_gid.gid_guid == 0)) {
232 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
233 		    "Either rdi_dlid or rdi_gid must be specified for"
234 		    " IBT_OCHAN_REDIRECTED", channel);
235 		return (IBT_INVALID_PARAM);
236 	}
237 
238 	/* primary dlid and hca_port_num should never be zero */
239 	port_no = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;
240 
241 	if ((IBCM_PRIM_ADDS_VECT(chan_args).av_dlid == 0) && (port_no == 0)) {
242 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
243 		    "Primary Path's information is not valid", channel);
244 		return (IBT_INVALID_PARAM);
245 	}
246 
247 	/* validate SID */
248 	if (chan_args->oc_path->pi_sid == 0) {
249 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
250 		    "ERROR: Service ID in path information is 0", channel);
251 		return (IBT_INVALID_PARAM);
252 	}
253 	IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p  SID %llX",
254 	    channel, chan_args->oc_path->pi_sid);
255 
256 	/* validate rnr_retry_cnt (enum has more than 3 bits) */
257 	if ((uint_t)chan_args->oc_path_rnr_retry_cnt > IBT_RNR_INFINITE_RETRY) {
258 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
259 		    "ERROR: oc_path_rnr_retry_cnt(%d) is out of range",
260 		    channel, chan_args->oc_path_rnr_retry_cnt);
261 		return (IBT_INVALID_PARAM);
262 	}
263 
264 	/*
265 	 * Ensure that client is not re-using a QP that is still associated
266 	 * with a statep
267 	 */
268 	IBCM_GET_CHAN_PRIVATE(channel, statep);
269 	if (statep != NULL) {
270 		IBCM_RELEASE_CHAN_PRIVATE(channel);
271 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
272 		    "Channel being re-used on active side", channel);
273 		return (IBT_CHAN_IN_USE);
274 	}
275 
276 	/* Get GUID from Channel */
277 	hca_guid = ibt_channel_to_hca_guid(channel);
278 
279 	/* validate QP's hca guid with that from primary path  */
280 	if (hca_guid != chan_args->oc_path->pi_hca_guid) {
281 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
282 		    "GUID from Channel and primary path don't match", channel);
283 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
284 		    "Channel GUID %llX primary path GUID %llX", channel,
285 		    hca_guid, chan_args->oc_path->pi_hca_guid);
286 		return (IBT_CHAN_HDL_INVALID);
287 	}
288 
289 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
290 	    "Local HCA GUID %llX", channel, hca_guid);
291 
292 	status = ibt_query_qp(channel, &qp_query_attr);
293 	if (status != IBT_SUCCESS) {
294 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
295 		    "ibt_query_qp failed %d", channel, status);
296 		return (status);
297 	}
298 
299 	/* If client specified "no port change on QP" */
300 	if ((qp_query_attr.qp_info.qp_transport.rc.rc_path.cep_hca_port_num !=
301 	    port_no) && (flags & IBT_OCHAN_PORT_FIXED)) {
302 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
303 		    "chan port %d and path port %d does not match", channel,
304 		    qp_query_attr.qp_info.qp_transport.rc.rc_path. \
305 		    cep_hca_port_num, port_no);
306 		return (IBT_INVALID_PARAM);
307 	}
308 
309 	if (qp_query_attr.qp_info.qp_trans != IBT_RC_SRV) {
310 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
311 		    "Invalid Channel type: Applicable only to RC Channel",
312 		    channel);
313 		return (IBT_CHAN_SRV_TYPE_INVALID);
314 	}
315 
316 	/* Check if QP is in INIT state or not */
317 	if (qp_query_attr.qp_info.qp_state != IBT_STATE_INIT) {
318 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
319 		    "QP is not in INIT state %x", channel,
320 		    qp_query_attr.qp_info.qp_state);
321 		return (IBT_CHAN_STATE_INVALID);
322 	}
323 
324 	local_qpn = qp_query_attr.qp_qpn;
325 
326 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p Active QPN 0x%x",
327 	    channel, local_qpn);
328 
329 #ifdef	NO_EEC_SUPPORT_YET
330 
331 	if (flags & IBT_OCHAN_RDC_EXISTS) {
332 		ibt_eec_query_attr_t	eec_query_attr;
333 
334 		local_qkey = qp_query_attr.qp_info.qp_transport.rd_qkey;
335 
336 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: RD");
337 
338 		status = ibt_query_eec(channel, &eec_query_attr);
339 		if (status != IBT_SUCCESS) {
340 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
341 			    " ibt_query_eec failed %d", channel, status);
342 			return (status);
343 		}
344 		local_eecn = eec_query_attr.eec_eecn;
345 	}
346 
347 #endif
348 	if (chan_args->oc_path->pi_prim_pkt_lt > ibcm_max_ib_pkt_lt) {
349 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
350 		    "Huge PktLifeTime %d, Max is %d", channel,
351 		    chan_args->oc_path->pi_prim_pkt_lt, ibcm_max_ib_pkt_lt);
352 		return (IBT_PATH_PKT_LT_TOO_HIGH);
353 	}
354 
355 	/* If no HCA found return failure */
356 	if ((hcap = ibcm_find_hca_entry(hca_guid)) == NULL) {
357 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
358 		    "hcap is NULL. Probably hca is not in active state",
359 		    channel);
360 		return (IBT_CHAN_HDL_INVALID);
361 	}
362 
363 	rdma_out = chan_args->oc_rdma_ra_out;
364 	rdma_in = chan_args->oc_rdma_ra_in;
365 
366 	if ((rdma_in > hcap->hca_max_rdma_in_qp) ||
367 	    (rdma_out > hcap->hca_max_rdma_out_qp)) {
368 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
369 		    "rdma in %d/out %d values exceed hca limits(%d/%d)",
370 		    channel, rdma_in, rdma_out, hcap->hca_max_rdma_in_qp,
371 		    hcap->hca_max_rdma_out_qp);
372 		ibcm_dec_hca_acc_cnt(hcap);
373 		return (IBT_INVALID_PARAM);
374 	}
375 
376 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
377 	    "rdma_in %d rdma_out %d", channel, rdma_in, rdma_out);
378 
379 	status = ibt_get_port_state_byguid(hcap->hca_guid, port_no,
380 	    NULL, &base_lid);
381 	if (status != IBT_SUCCESS) {
382 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
383 		    "primary port_num %d not active", channel, port_no);
384 		ibcm_dec_hca_acc_cnt(hcap);
385 		return (status);
386 	}
387 
388 	/* Validate P_KEY Index */
389 	status = ibt_index2pkey_byguid(hcap->hca_guid, port_no,
390 	    IBCM_PRIM_CEP_PATH(chan_args).cep_pkey_ix, &prim_pkey);
391 	if (status != IBT_SUCCESS) {
392 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
393 		    "Invalid Primary PKeyIx %x", channel,
394 		    IBCM_PRIM_CEP_PATH(chan_args).cep_pkey_ix);
395 		ibcm_dec_hca_acc_cnt(hcap);
396 		return (status);
397 	}
398 
399 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
400 	    "primary_port_num %d primary_pkey 0x%x", channel, port_no,
401 	    prim_pkey);
402 
403 	if ((hcap->hca_port_info[port_no - 1].port_ibmf_hdl == NULL) &&
404 	    ((status = ibcm_hca_reinit_port(hcap, port_no - 1))
405 	    != IBT_SUCCESS)) {
406 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
407 		    "ibmf reg or callback setup failed during re-initialize",
408 		    channel);
409 		ibcm_dec_hca_acc_cnt(hcap);
410 		return (status);
411 	}
412 
413 	ibmf_hdl = hcap->hca_port_info[port_no - 1].port_ibmf_hdl;
414 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
415 	    "primary ibmf_hdl = 0x%p", channel, ibmf_hdl);
416 
417 	primary_slid = base_lid + IBCM_PRIM_ADDS_VECT(chan_args).av_src_path;
418 
419 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: channel 0x%p "
420 	    "primary SLID = %x", channel, primary_slid);
421 
422 	/* check first if alternate path exists or not as it is OPTIONAL */
423 	if (IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num != 0) {
424 		uint8_t	alt_port_no;
425 
426 		alt_port_no = IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num;
427 
428 		if (chan_args->oc_path->pi_alt_pkt_lt > ibcm_max_ib_pkt_lt) {
429 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
430 			    "Huge Alt Pkt lt %d", channel,
431 			    chan_args->oc_path->pi_alt_pkt_lt);
432 			ibcm_dec_hca_acc_cnt(hcap);
433 			return (IBT_PATH_PKT_LT_TOO_HIGH);
434 		}
435 
436 		if (port_no != alt_port_no) {
437 
438 			status = ibt_get_port_state_byguid(hcap->hca_guid,
439 			    alt_port_no, NULL, &base_lid);
440 			if (status != IBT_SUCCESS) {
441 
442 				IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
443 				    "chan 0x%p alt_port_num %d inactive %d",
444 				    channel, alt_port_no, status);
445 				ibcm_dec_hca_acc_cnt(hcap);
446 				return (status);
447 			}
448 
449 		}
450 		alternate_slid =
451 		    base_lid + IBCM_ALT_ADDS_VECT(chan_args).av_src_path;
452 
453 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
454 		    "alternate SLID = %x", channel, alternate_slid);
455 	}
456 
457 	/*
458 	 * only pkey needs to be zero'ed, because all other fields are set in
459 	 * in ibcm_init_reply_addr. But, let's bzero the complete struct for
460 	 * any future modifications.
461 	 */
462 	bzero(&cm_reply_addr, sizeof (cm_reply_addr));
463 
464 	/* Initialize the MAD destination address in stored_reply_addr */
465 	if ((status = ibcm_init_reply_addr(hcap, &cm_reply_addr, chan_args,
466 	    flags, &cm_pkt_lt, primary_slid)) != IBT_SUCCESS) {
467 
468 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
469 		    "ibcm_init_reply_addr failed status %d ", channel, status);
470 		ibcm_dec_hca_acc_cnt(hcap);
471 		return (status);
472 	}
473 
474 
475 	/* Initialize the pkey for CM MAD communication */
476 	if (cm_reply_addr.rcvd_addr.ia_p_key == 0)
477 		cm_reply_addr.rcvd_addr.ia_p_key = prim_pkey;
478 
479 #ifdef DEBUG
480 	ibcm_print_reply_addr(channel, &cm_reply_addr);
481 #endif
482 
483 	/* Retrieve an ibmf qp for sending CM MADs */
484 	if ((cm_qp_entry = ibcm_find_qp(hcap, port_no,
485 	    cm_reply_addr.rcvd_addr.ia_p_key)) == NULL) {
486 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
487 		    "unable to allocate ibmf qp for CM MADs", channel);
488 		ibcm_dec_hca_acc_cnt(hcap);
489 		return (IBT_INSUFF_RESOURCE);
490 	}
491 
492 
493 	if (ibcm_alloc_comid(hcap, &local_comid) != IBCM_SUCCESS) {
494 		ibcm_release_qp(cm_qp_entry);
495 		ibcm_dec_hca_acc_cnt(hcap);
496 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
497 		    " Unable to allocate comid", channel);
498 		return (IBT_INSUFF_KERNEL_RESOURCE);
499 	}
500 
501 	/* allocate an IBMF mad buffer (REQ) */
502 	if ((status = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg,
503 	    MAD_METHOD_SEND)) != IBT_SUCCESS) {
504 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
505 		    "chan 0x%p ibcm_alloc_out_msg failed", channel);
506 		ibcm_release_qp(cm_qp_entry);
507 		ibcm_free_comid(hcap, local_comid);
508 		ibcm_dec_hca_acc_cnt(hcap);
509 		return (status);
510 	}
511 
512 	/* allocate an IBMF mad buffer (DREQ) */
513 	if ((status = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg_dreq,
514 	    MAD_METHOD_SEND)) != IBT_SUCCESS) {
515 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
516 		    "chan 0x%p ibcm_alloc_out_msg failed", channel);
517 		(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg);
518 		ibcm_release_qp(cm_qp_entry);
519 		ibcm_free_comid(hcap, local_comid);
520 		ibcm_dec_hca_acc_cnt(hcap);
521 		return (status);
522 	}
523 
524 	/* Init to Init, if QP's port does not match with path information */
525 	if (qp_query_attr.qp_info.qp_transport.rc.rc_path.cep_hca_port_num !=
526 	    IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num) {
527 
528 		ibt_qp_info_t		qp_info;
529 		ibt_cep_modify_flags_t	cep_flags;
530 
531 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
532 		    "chan 0x%p chan port %d", channel,
533 		    qp_query_attr.qp_info.qp_transport.rc.rc_path.\
534 		    cep_hca_port_num);
535 
536 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
537 		    "chan 0x%p path port %d", channel, port_no);
538 
539 		bzero(&qp_info, sizeof (qp_info));
540 		/* For now, set it to RC type */
541 
542 		qp_info.qp_trans = IBT_RC_SRV;
543 		qp_info.qp_state = IBT_STATE_INIT;
544 		qp_info.qp_transport.rc.rc_path.cep_hca_port_num = port_no;
545 
546 		cep_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_PORT;
547 
548 		status = ibt_modify_qp(channel, cep_flags, &qp_info, NULL);
549 
550 		if (status != IBT_SUCCESS) {
551 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
552 			    "chan 0x%p ibt_modify_qp() = %d", channel, status);
553 			ibcm_release_qp(cm_qp_entry);
554 			ibcm_free_comid(hcap, local_comid);
555 			ibcm_dec_hca_acc_cnt(hcap);
556 			(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg);
557 			(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg_dreq);
558 			return (status);
559 		} else
560 			IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
561 			    "chan 0x%p ibt_modify_qp() = %d", channel, status);
562 	}
563 
564 	/* allocate ibcm_state_data_t before grabbing the WRITER lock */
565 	statep = kmem_zalloc(sizeof (ibcm_state_data_t), KM_SLEEP);
566 	rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
567 	lkup_status = ibcm_lookup_msg(IBCM_OUTGOING_REQ, local_comid, 0, 0,
568 	    hcap, &statep);
569 	rw_exit(&hcap->hca_state_rwlock);
570 
571 	/* CM should be seeing this for the first time */
572 	ASSERT(lkup_status == IBCM_LOOKUP_NEW);
573 
574 	/* Increment the hca's resource count */
575 	ibcm_inc_hca_res_cnt(hcap);
576 
577 	/* Once a resource created on hca, no need to hold the acc cnt */
578 	ibcm_dec_hca_acc_cnt(hcap);
579 
580 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
581 
582 	statep->timerid = 0;
583 	statep->local_hca_guid = hca_guid;
584 	statep->local_qpn = local_qpn;
585 	statep->stored_reply_addr.cm_qp_entry = cm_qp_entry;
586 	statep->prim_port = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;
587 	statep->alt_port = IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num;
588 
589 
590 	/* Save "statep" as channel's CM private data.  */
591 	statep->channel = channel;
592 	IBCM_SET_CHAN_PRIVATE(statep->channel, statep);
593 
594 	statep->stored_msg = ibmf_msg;
595 	statep->dreq_msg = ibmf_msg_dreq;
596 
597 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*req_msgp))
598 
599 	/* Start filling in the REQ MAD */
600 	req_msgp = (ibcm_req_msg_t *)IBCM_OUT_MSGP(statep->stored_msg);
601 	req_msgp->req_local_comm_id = h2b32(local_comid);
602 	req_msgp->req_svc_id = h2b64(chan_args->oc_path->pi_sid);
603 	req_msgp->req_local_ca_guid = h2b64(hca_guid);
604 	req_msgp->req_local_qkey = h2b32(local_qkey);	/* for EEC/RD */
605 
606 	/* Bytes 32-35 are req_local_qpn and req_off_resp_resources */
607 	req_msgp->req_local_qpn_plus = h2b32(local_qpn << 8 | rdma_in);
608 
609 	/* Bytes 36-39 are req_local_eec_no and req_off_initiator_depth */
610 	req_msgp->req_local_eec_no_plus = h2b32(local_eecn << 8 | rdma_out);
611 
612 	if (flags & IBT_OCHAN_REMOTE_CM_TM)
613 		remote_cm_resp_time = chan_args->oc_remote_cm_time;
614 	else
615 		remote_cm_resp_time = ibcm_remote_response_time;
616 
617 	/*
618 	 * Bytes 40-43 - remote_eecn, remote_cm_resp_time, tran_type,
619 	 * IBT_CM_FLOW_CONTROL is always set by default.
620 	 */
621 	req_msgp->req_remote_eecn_plus = h2b32(
622 	    remote_eecn << 8 | (ibt_usec2ib(remote_cm_resp_time) & 0x1f) << 3 |
623 	    IBT_RC_SRV << 1 | IBT_CM_FLOW_CONTROL);
624 
625 	if (flags & IBT_OCHAN_LOCAL_CM_TM)
626 		local_cm_proc_time = chan_args->oc_local_cm_time;
627 	else
628 		local_cm_proc_time = ibcm_local_processing_time;
629 
630 	local_cm_resp_time = ibt_usec2ib(local_cm_proc_time +
631 	    2 * ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt) +
632 	    ibcm_sw_delay);
633 
634 	/* save retry count */
635 	statep->cep_retry_cnt = chan_args->oc_path_retry_cnt;
636 
637 	if (flags & IBT_OCHAN_STARTING_PSN)
638 		starting_psn = chan_args->oc_starting_psn;
639 
640 	if (local_cm_resp_time > 0x1f)
641 		local_cm_resp_time = 0x1f;
642 
643 	/* Bytes 44-47 are req_starting_psn, local_cm_resp_time and retry_cnt */
644 	req_msgp->req_starting_psn_plus = h2b32(starting_psn << 8 |
645 	    local_cm_resp_time << 3 | statep->cep_retry_cnt);
646 
647 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
648 	    "Prim Pkt lt (IB time) 0x%x", channel,
649 	    chan_args->oc_path->pi_prim_pkt_lt);
650 
651 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
652 	    "local_cm_proc_time(usec) %d ", channel, local_cm_proc_time);
653 
654 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
655 	    "local_cm_resp_time(ib_time) %d", channel, local_cm_resp_time);
656 
657 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
658 	    "remote_cm_resp_time (usec) %d", channel, remote_cm_resp_time);
659 
660 	statep->starting_psn = starting_psn;
661 
662 	/* Pkey - bytes 48-49 */
663 	req_msgp->req_part_key = h2b16(prim_pkey);
664 
665 	if (flags & IBT_OCHAN_CM_RETRY)
666 		cm_retries = chan_args->oc_cm_retry_cnt;
667 	else
668 		cm_retries = ibcm_max_retries;
669 
670 	statep->max_cm_retries = statep->remaining_retry_cnt = cm_retries;
671 	req_msgp->req_max_cm_retries_plus = statep->max_cm_retries << 4;
672 
673 	/*
674 	 * Check whether SRQ is associated with this Channel, if yes, then
675 	 * set the SRQ Exists bit in the REQ.
676 	 */
677 	if (qp_query_attr.qp_srq != NULL) {
678 		req_msgp->req_max_cm_retries_plus |= (1 << 3);
679 	}
680 
681 	/*
682 	 * By default on Tavor, we override the PathMTU to 1K.
683 	 * To turn this off, set ibcm_override_path_mtu = 0.
684 	 */
685 	if (ibcm_override_path_mtu && IBCM_IS_HCA_TAVOR(hcap) &&
686 	    (chan_args->oc_path->pi_path_mtu > IB_MTU_1K)) {
687 		req_msgp->req_mtu_plus = IB_MTU_1K << 4 |
688 		    chan_args->oc_path_rnr_retry_cnt;
689 		IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p PathMTU"
690 		    " overridden to IB_MTU_1K(%d) from %d", channel, IB_MTU_1K,
691 		    chan_args->oc_path->pi_path_mtu);
692 	} else
693 		req_msgp->req_mtu_plus = chan_args->oc_path->pi_path_mtu << 4 |
694 		    chan_args->oc_path_rnr_retry_cnt;
695 
696 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p CM retry cnt %d"
697 	    " staring PSN %x", channel, cm_retries, starting_psn);
698 
699 
700 #ifdef	NO_EEC_SUPPORT_YET
701 	if (flags & IBT_OCHAN_RDC_EXISTS)
702 		req_msgp->req_mtu_plus |= 8;
703 #endif
704 
705 	/* Initialize the "primary" port stuff next - bytes 52-95 */
706 	req_msgp->req_primary_l_port_lid = h2b16(primary_slid);
707 	req_msgp->req_primary_r_port_lid =
708 	    h2b16(IBCM_PRIM_ADDS_VECT(chan_args).av_dlid);
709 	req_msgp->req_primary_l_port_gid.gid_prefix =
710 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_sgid.gid_prefix);
711 	req_msgp->req_primary_l_port_gid.gid_guid =
712 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_sgid.gid_guid);
713 	req_msgp->req_primary_r_port_gid.gid_prefix =
714 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_prefix);
715 	req_msgp->req_primary_r_port_gid.gid_guid =
716 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid);
717 	primary_grh = IBCM_PRIM_ADDS_VECT(chan_args).av_send_grh;
718 
719 	statep->remote_hca_guid = /* not correct, but helpful for debugging */
720 	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid;
721 
722 	/* Bytes 88-91 - primary_flowlbl, and primary_srate */
723 	req_msgp->req_primary_flow_label_plus =
724 	    h2b32(((primary_grh == B_TRUE) ?
725 	    (IBCM_PRIM_ADDS_VECT(chan_args).av_flow << 12) : 0) |
726 	    IBCM_PRIM_ADDS_VECT(chan_args).av_srate);
727 	req_msgp->req_primary_traffic_class = (primary_grh == B_TRUE) ?
728 	    IBCM_PRIM_ADDS_VECT(chan_args).av_tclass : 0;
729 	req_msgp->req_primary_hop_limit = (primary_grh == B_TRUE) ?
730 	    IBCM_PRIM_ADDS_VECT(chan_args).av_hop : 1;
731 	req_msgp->req_primary_sl_plus =
732 	    IBCM_PRIM_ADDS_VECT(chan_args).av_srvl << 4 |
733 	    ((primary_grh == B_TRUE) ? 0 : 8);
734 
735 	req_msgp->req_primary_localtime_plus =
736 	    ibt_usec2ib((2 * ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt)) +
737 	    ibt_ib2usec(hcap->hca_ack_delay)) << 3;
738 
739 	IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan %p statep %p",
740 	    channel, statep);
741 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
742 	    "active hca_ack_delay (usec) %d", channel,
743 	    req_msgp->req_primary_localtime_plus);
744 
745 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
746 	    "Sent primary cep timeout (IB Time) %d", channel,
747 	    hcap->hca_ack_delay);
748 
749 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p prim_dlid %x ",
750 	    channel, IBCM_PRIM_ADDS_VECT(chan_args).av_dlid);
751 
752 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
753 	    "prim GID %llX:%llX", channel,
754 	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_prefix,
755 	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid);
756 
757 	/* Initialize the "alternate" port stuff - optional */
758 	if (chan_args->oc_path->pi_alt_cep_path.cep_hca_port_num != 0) {
759 		ib_gid_t	tmp_gid;
760 
761 		req_msgp->req_alt_l_port_lid = h2b16(alternate_slid);
762 		req_msgp->req_alt_r_port_lid =
763 		    h2b16(IBCM_ALT_ADDS_VECT(chan_args).av_dlid);
764 		/*
765 		 * doing all this as req_alt_r/l_port_gid is at offset
766 		 * 100, 116 which is not divisible by 8
767 		 */
768 
769 		tmp_gid.gid_prefix =
770 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_prefix);
771 		tmp_gid.gid_guid =
772 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_guid);
773 		bcopy(&tmp_gid, &req_msgp->req_alt_r_port_gid[0],
774 		    sizeof (ib_gid_t));
775 		tmp_gid.gid_prefix =
776 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_sgid.gid_prefix);
777 		tmp_gid.gid_guid =
778 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_sgid.gid_guid);
779 
780 		bcopy(&tmp_gid, &req_msgp->req_alt_l_port_gid[0],
781 		    sizeof (ib_gid_t));
782 		alternate_grh = IBCM_ALT_ADDS_VECT(chan_args).av_send_grh;
783 
784 		/* Bytes 132-135 - alternate_flow_label, and alternate srate */
785 		req_msgp->req_alt_flow_label_plus = h2b32(
786 		    (((alternate_grh == B_TRUE) ?
787 		    (IBCM_ALT_ADDS_VECT(chan_args).av_flow << 12) : 0) |
788 		    IBCM_ALT_ADDS_VECT(chan_args).av_srate));
789 		req_msgp->req_alt_traffic_class = (alternate_grh == B_TRUE) ?
790 		    IBCM_ALT_ADDS_VECT(chan_args).av_tclass : 0;
791 		req_msgp->req_alt_hop_limit = (alternate_grh == B_TRUE) ?
792 		    IBCM_ALT_ADDS_VECT(chan_args).av_hop : 1;
793 		req_msgp->req_alt_sl_plus =
794 		    IBCM_ALT_ADDS_VECT(chan_args).av_srvl << 4 |
795 		    ((alternate_grh == B_TRUE) ? 0 : 8);
796 		req_msgp->req_alt_localtime_plus = ibt_usec2ib((2 *
797 		    ibt_ib2usec(chan_args->oc_path->pi_alt_pkt_lt)) +
798 		    ibt_ib2usec(hcap->hca_ack_delay)) << 3;
799 
800 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
801 		    "alt_dlid %x ", channel,
802 		    IBCM_ALT_ADDS_VECT(chan_args).av_dlid);
803 
804 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
805 		    "alt GID %llX:%llX", channel,
806 		    IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_prefix,
807 		    IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_guid);
808 	}
809 
810 	len = min(chan_args->oc_priv_data_len, IBT_REQ_PRIV_DATA_SZ);
811 	if ((len > 0) && chan_args->oc_priv_data)
812 		bcopy(chan_args->oc_priv_data, req_msgp->req_private_data, len);
813 
814 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*req_msgp))
815 
816 	/* return_data is filled up in the state machine code */
817 	if (ret_args != NULL) {
818 		statep->open_return_data = ret_args;
819 	}
820 
821 	/* initialize some statep fields here */
822 	statep->mode = IBCM_ACTIVE_MODE;
823 	statep->hcap = hcap;
824 
825 	statep->cm_handler = chan_args->oc_cm_handler;
826 	statep->state_cm_private = chan_args->oc_cm_clnt_private;
827 
828 	statep->pkt_life_time =
829 	    ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt);
830 
831 	statep->timer_value = ibt_ib2usec(ibt_usec2ib(
832 	    2 * ibt_ib2usec(cm_pkt_lt) + remote_cm_resp_time));
833 
834 	/* Initialize statep->stored_reply_addr */
835 	statep->stored_reply_addr.ibmf_hdl = ibmf_hdl;
836 
837 	/* Initialize stored reply addr fields */
838 	statep->stored_reply_addr.grh_hdr = cm_reply_addr.grh_hdr;
839 	statep->stored_reply_addr.rcvd_addr = cm_reply_addr.rcvd_addr;
840 	statep->stored_reply_addr.grh_exists = cm_reply_addr.grh_exists;
841 	statep->stored_reply_addr.port_num = cm_reply_addr.port_num;
842 
843 	/*
844 	 * The IPD on local/active side is calculated by path functions,
845 	 * hence available in the args of ibt_open_rc_channel
846 	 */
847 	statep->local_srate = IBCM_PRIM_ADDS_VECT(chan_args).av_srate;
848 	statep->local_alt_srate = IBCM_ALT_ADDS_VECT(chan_args).av_srate;
849 
850 	/* Store the source path bits for primary and alt paths */
851 	statep->prim_src_path_bits = IBCM_PRIM_ADDS_VECT(chan_args).av_src_path;
852 	statep->alt_src_path_bits = IBCM_ALT_ADDS_VECT(chan_args).av_src_path;
853 
854 	statep->open_flow = 1;
855 	statep->open_done = B_FALSE;
856 	statep->state = statep->timer_stored_state = IBCM_STATE_REQ_SENT;
857 	IBCM_REF_CNT_INCR(statep);	/* Decremented before return */
858 	IBCM_REF_CNT_INCR(statep);	/* Decremented after REQ is posted */
859 	statep->send_mad_flags |= IBCM_REQ_POST_BUSY;
860 
861 	/*
862 	 * Skip moving channel to error state during close, for OFUV clients.
863 	 * OFUV clients transition the channel to error state by itself.
864 	 */
865 	if (flags & IBT_OCHAN_OFUV)
866 		statep->is_this_ofuv_chan = B_TRUE;
867 
868 	IBCM_OUT_HDRP(statep->stored_msg)->AttributeID =
869 	    h2b16(IBCM_INCOMING_REQ + IBCM_ATTR_BASE_ID);
870 
871 	IBCM_OUT_HDRP(statep->stored_msg)->TransactionID =
872 	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_REQ, statep->local_comid,
873 	    0));
874 
875 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*statep))
876 
877 	ibtl_cm_chan_is_opening(channel);
878 
879 	ibcm_open_enqueue(statep);
880 
881 	mutex_enter(&statep->state_mutex);
882 
883 	if (mode == IBT_BLOCKING) {
884 
885 		/* wait for REQ/REP/RTU */
886 		while (statep->open_done != B_TRUE) {
887 			cv_wait(&statep->block_client_cv, &statep->state_mutex);
888 		}
889 
890 		/*
891 		 * In the case that open_channel() fails because of a
892 		 * REJ or timeout, change retval to IBT_CM_FAILURE
893 		 */
894 		if (statep->open_return_data->rc_status != IBT_CM_ACCEPT)
895 			status = IBT_CM_FAILURE;
896 
897 		IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p "
898 		    "ret status %d cm status %d", channel, status,
899 		    statep->open_return_data->rc_status);
900 	}
901 
902 	/* decrement the ref-count before leaving here */
903 	IBCM_REF_CNT_DECR(statep);
904 
905 	mutex_exit(&statep->state_mutex);
906 
907 	IBTF_DPRINTF_L4(cmlog, "ibt_open_rc_channel: chan 0x%p done", channel);
908 	return (status);
909 }
910 
911 /*
912  * ibcm_init_reply_addr:
913  *
914  * The brief description of functionality below.
915  *
916  * For IBT_OCHAN_PORT_REDIRECTED (ie., port redirected case):
917  *	Build CM path from chan_args->oc_cm_cep_path
918  *	Set CM pkt lt (ie.,life time) to chan_args->oc_cm_pkt_lt
919  *
920  * For IBT_OCHAN_REDIRECTED (ie., port and CM redirected case):
921  *	If Redirect LID is specified,
922  *		If Redirect GID is not specified or specified to be on the same
923  *		    subnet, then
924  *			Build CM path from chan_args->oc_cm_redirect_info
925  *			Set CM pkt lt to subnet timeout
926  *		Else (ie., GID specified, but on a different subnet)
927  *			Do a path lookup to build CM Path and set CM pkt lt
928  *
929  */
930 static ibt_status_t
931 ibcm_init_reply_addr(ibcm_hca_info_t *hcap, ibcm_mad_addr_t *reply_addr,
932     ibt_chan_open_args_t *chan_args, ibt_chan_open_flags_t flags,
933     ib_time_t *cm_pkt_lt, ib_lid_t prim_slid)
934 {
935 	ibt_adds_vect_t	*cm_adds;
936 	ibt_path_info_t	path;
937 	boolean_t	cm_grh;
938 	ibt_status_t	status;
939 
940 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_reply_addr:");
941 
942 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*reply_addr))
943 
944 	/*
945 	 * sending side CM lid/gid/port num are not based on any redirect
946 	 * params. These values are set to primary RC path lid/gid/port num.
947 	 * In the future, these values can be set based on framework policy
948 	 * decisions ensuring reachability.
949 	 */
950 	reply_addr->grh_hdr.ig_sender_gid =
951 	    IBCM_PRIM_ADDS_VECT(chan_args).av_sgid;
952 	reply_addr->rcvd_addr.ia_local_lid = prim_slid;
953 	reply_addr->port_num = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;
954 
955 	if (flags & IBT_OCHAN_PORT_REDIRECTED) {
956 		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: "
957 		    "IBT_OCHAN_PORT_REDIRECTED specified");
958 
959 		status = ibt_index2pkey_byguid(hcap->hca_guid,
960 		    chan_args->oc_cm_cep_path->cep_hca_port_num,
961 		    chan_args->oc_cm_cep_path->cep_pkey_ix,
962 		    &reply_addr->rcvd_addr.ia_p_key);
963 
964 		if (status != IBT_SUCCESS) {
965 			IBTF_DPRINTF_L2(cmlog, "ibcm_init_rely_addr: Invalid "
966 			    "CM PKeyIx %x port_num %x",
967 			    chan_args->oc_cm_cep_path->cep_pkey_ix,
968 			    chan_args->oc_cm_cep_path->cep_hca_port_num);
969 			return (status);
970 		}
971 
972 		cm_adds = &(chan_args->oc_cm_cep_path->cep_adds_vect);
973 		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: dlid = %x",
974 		    cm_adds->av_dlid);
975 
976 		reply_addr->rcvd_addr.ia_q_key = IB_GSI_QKEY;
977 		reply_addr->rcvd_addr.ia_remote_qno = 1;
978 		*cm_pkt_lt = chan_args->oc_cm_pkt_lt;
979 
980 	} else if (flags & IBT_OCHAN_REDIRECTED) {
981 		ibt_redirect_info_t	*redirect_info;
982 		ibt_hca_portinfo_t	*port_infop;
983 		uint_t			psize, nports;
984 
985 		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: "
986 		    "IBT_OCHAN_REDIRECTED specified");
987 
988 		redirect_info = chan_args->oc_cm_redirect_info;
989 
990 		if ((redirect_info->rdi_gid.gid_prefix == 0) ||
991 		    (redirect_info->rdi_gid.gid_guid == 0)) {
992 			IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
993 			    "ERROR: Re-direct GID value NOT Provided.");
994 			return (IBT_INVALID_PARAM);
995 		}
996 
997 		/* As per spec definition 1.1, it's always IB_GSI_QKEY */
998 		reply_addr->rcvd_addr.ia_q_key = redirect_info->rdi_qkey;
999 		reply_addr->rcvd_addr.ia_remote_qno = redirect_info->rdi_qpn;
1000 		reply_addr->rcvd_addr.ia_p_key = redirect_info->rdi_pkey;
1001 
1002 		/*
1003 		 * if LID is non-zero in classportinfo then use classportinfo
1004 		 * fields to form CM MAD destination address.
1005 		 */
1006 		if (redirect_info->rdi_dlid != 0) {
1007 			status = ibtl_cm_query_hca_ports_byguid(hcap->hca_guid,
1008 			    reply_addr->port_num, &port_infop, &nports, &psize);
1009 			if ((status != IBT_SUCCESS) || (nports == 0)) {
1010 				IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
1011 				    "Query Ports Failed: %d", status);
1012 				return (status);
1013 			} else if (port_infop->p_subnet_timeout >
1014 			    ibcm_max_ib_pkt_lt) {
1015 				IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
1016 				    "large subnet timeout %x port_no %x",
1017 				    port_infop->p_subnet_timeout,
1018 				    reply_addr->port_num);
1019 				ibt_free_portinfo(port_infop, psize);
1020 				return (IBT_PATH_PKT_LT_TOO_HIGH);
1021 			} else {
1022 				IBTF_DPRINTF_L3(cmlog, "ibcm_init_reply_addr: "
1023 				    "subnet timeout %x port_no %x",
1024 				    port_infop->p_subnet_timeout,
1025 				    reply_addr->port_num);
1026 
1027 				*cm_pkt_lt =
1028 				    ibt_ib2usec(min(ibcm_max_ib_mad_pkt_lt,
1029 				    port_infop->p_subnet_timeout));
1030 
1031 				ibt_free_portinfo(port_infop, psize);
1032 			}
1033 
1034 			reply_addr->rcvd_addr.ia_remote_lid =
1035 			    redirect_info->rdi_dlid;
1036 			reply_addr->rcvd_addr.ia_service_level =
1037 			    redirect_info->rdi_sl;
1038 			reply_addr->grh_exists = B_TRUE;
1039 			reply_addr->grh_hdr.ig_recver_gid =
1040 			    redirect_info->rdi_gid;
1041 			reply_addr->grh_hdr.ig_tclass =
1042 			    redirect_info->rdi_tclass;
1043 			reply_addr->grh_hdr.ig_flow_label =
1044 			    redirect_info->rdi_flow;
1045 
1046 			/* Classportinfo doesn't have hoplimit field */
1047 			reply_addr->grh_hdr.ig_hop_limit = 1;
1048 			return (IBT_SUCCESS);
1049 
1050 		} else {
1051 			ibt_path_attr_t	path_attr;
1052 			ib_gid_t	path_dgid[1];
1053 
1054 			/*
1055 			 * If GID is specified, and LID is zero in classportinfo
1056 			 * do a path lookup using specified GID, Pkey,
1057 			 * in classportinfo
1058 			 */
1059 
1060 			bzero(&path_attr, sizeof (path_attr));
1061 
1062 			path_attr.pa_dgids = &path_dgid[0];
1063 			path_attr.pa_dgids[0] = redirect_info->rdi_gid;
1064 
1065 			/*
1066 			 * use reply_addr below, as sender_gid in reply_addr
1067 			 * may have been set above based on some policy decision
1068 			 * for originating end point for CM MADs above
1069 			 */
1070 			path_attr.pa_sgid = reply_addr->grh_hdr.ig_sender_gid;
1071 			path_attr.pa_num_dgids = 1;
1072 			path_attr.pa_pkey = redirect_info->rdi_pkey;
1073 
1074 			if ((status = ibt_get_paths(ibcm_ibt_handle,
1075 			    IBT_PATH_PKEY, &path_attr, 1, &path, NULL)) !=
1076 			    IBT_SUCCESS)
1077 				return (status);
1078 
1079 			/* Initialize cm_adds */
1080 			cm_adds = &path.pi_prim_cep_path.cep_adds_vect;
1081 			*cm_pkt_lt = path.pi_prim_pkt_lt;
1082 		}
1083 
1084 	} else	{ /* cm_pkey initialized in ibt_open_rc_channel */
1085 		reply_addr->rcvd_addr.ia_q_key = IB_GSI_QKEY;
1086 		reply_addr->rcvd_addr.ia_remote_qno = 1;
1087 		*cm_pkt_lt = chan_args->oc_path->pi_prim_pkt_lt;
1088 		cm_adds = &(IBCM_PRIM_ADDS_VECT(chan_args));
1089 	}
1090 
1091 
1092 	cm_grh = cm_adds->av_send_grh;
1093 	reply_addr->grh_exists = cm_grh;
1094 
1095 	reply_addr->rcvd_addr.ia_remote_lid =
1096 	    cm_adds->av_dlid;
1097 	reply_addr->grh_hdr.ig_recver_gid =
1098 	    cm_adds->av_dgid;
1099 	reply_addr->grh_hdr.ig_flow_label =
1100 	    cm_adds->av_flow & IB_GRH_FLOW_LABEL_MASK;
1101 	reply_addr->grh_hdr.ig_tclass =
1102 	    (cm_grh == B_TRUE) ? cm_adds->av_tclass : 0;
1103 	reply_addr->grh_hdr.ig_hop_limit =
1104 	    (cm_grh == B_TRUE) ? cm_adds->av_hop : 1;
1105 	reply_addr->rcvd_addr.ia_service_level =
1106 	    cm_adds->av_srvl;
1107 
1108 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*reply_addr))
1109 
1110 	return (IBT_SUCCESS);
1111 }
1112 
1113 
1114 /*
1115  * ibt_prime_close_rc_channel()
1116  *	It allocates resources required for close channel operation, so
1117  *	ibt_close_rc_channel can be called from interrupt routine.
1118  *
1119  * INPUTS:
1120  *	channel			The address of an ibt_channel_t struct that
1121  *				specifies the channel to open.
1122  *
1123  * RETURN VALUES:
1124  *	IBT_SUCCESS	on success(or respective failure on error)
1125  *
1126  * Clients are typically expected to call this function in established state
1127  */
1128 ibt_status_t
1129 ibt_prime_close_rc_channel(ibt_channel_hdl_t channel)
1130 {
1131 	ibcm_state_data_t	*statep;
1132 	ibt_status_t		status = IBT_SUCCESS;
1133 
1134 	IBTF_DPRINTF_L3(cmlog, "ibt_prime_close_rc_channel(%p)", channel);
1135 
1136 	/* validate channel, first */
1137 	if (IBCM_INVALID_CHANNEL(channel)) {
1138 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1139 		    "invalid channel", channel);
1140 		return (IBT_CHAN_HDL_INVALID);
1141 	}
1142 
1143 	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
1144 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1145 		    "Invalid Channel type: Applicable only to RC Channel",
1146 		    channel);
1147 		return (IBT_CHAN_SRV_TYPE_INVALID);
1148 	}
1149 
1150 	/* get the statep */
1151 	IBCM_GET_CHAN_PRIVATE(channel, statep);
1152 
1153 	/*
1154 	 * This can happen, if the statep is already gone by a DREQ from
1155 	 * the remote side
1156 	 */
1157 
1158 	if (statep == NULL) {
1159 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1160 		    "statep NULL", channel);
1161 		return (IBT_SUCCESS);
1162 	}
1163 
1164 	mutex_enter(&statep->state_mutex);
1165 	IBCM_RELEASE_CHAN_PRIVATE(channel);
1166 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1167 		mutex_exit(&statep->state_mutex);
1168 		return (IBT_CHAN_STATE_INVALID);
1169 	}
1170 	IBCM_REF_CNT_INCR(statep);
1171 	IBTF_DPRINTF_L4(cmlog, "ibt_prime_close_rc_channel: chan 0x%p statep %p"
1172 	    " state %x", channel, statep, statep->state);
1173 	mutex_exit(&statep->state_mutex);
1174 
1175 	/* clients could pre-allocate dreq mad, even before connection est */
1176 	if (statep->dreq_msg == NULL)
1177 		status = ibcm_alloc_out_msg(statep->stored_reply_addr.ibmf_hdl,
1178 		    &statep->dreq_msg, MAD_METHOD_SEND);
1179 
1180 	mutex_enter(&statep->state_mutex);
1181 	IBCM_REF_CNT_DECR(statep);
1182 	mutex_exit(&statep->state_mutex);
1183 
1184 	if (status != IBT_SUCCESS) {
1185 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1186 		    "ibcm_alloc_out_msg failed ", channel);
1187 		return (status);
1188 	}
1189 
1190 	/* If this message isn't seen then ibt_prime_close_rc_channel failed */
1191 	IBTF_DPRINTF_L5(cmlog, "ibt_prime_close_rc_channel: chan 0x%p done",
1192 	    channel);
1193 
1194 	return (IBT_SUCCESS);
1195 }
1196 
1197 /*
1198  * ibt_close_rc_channel()
1199  *	It closes an established channel.
1200  *
1201  * RETURN VALUES:
1202  *	IBT_SUCCESS	on success(or respective failure on error)
1203  */
1204 ibt_status_t
1205 ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode,
1206     void *priv_data, ibt_priv_data_len_t priv_data_len, uint8_t *ret_status,
1207     void *ret_priv_data, ibt_priv_data_len_t *ret_priv_data_len_p)
1208 {
1209 	ibcm_state_data_t	*statep;
1210 
1211 	IBTF_DPRINTF_L3(cmlog, "ibt_close_rc_channel(%p, %x, %p, %d, %p)",
1212 	    channel, mode, priv_data, priv_data_len,
1213 	    (ret_priv_data_len_p == NULL) ? 0 : *ret_priv_data_len_p);
1214 
1215 	/* validate channel, first */
1216 	if (IBCM_INVALID_CHANNEL(channel)) {
1217 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1218 		    "invalid channel", channel);
1219 		return (IBT_CHAN_HDL_INVALID);
1220 	}
1221 
1222 	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
1223 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1224 		    "Invalid Channel type: Applicable only to RC Channel",
1225 		    channel);
1226 		return (IBT_CHAN_SRV_TYPE_INVALID);
1227 	}
1228 
1229 	if (mode == IBT_BLOCKING) {
1230 		/* valid only for BLOCKING MODE */
1231 		if ((ret_priv_data_len_p != NULL) &&
1232 		    (*ret_priv_data_len_p > IBT_DREP_PRIV_DATA_SZ)) {
1233 			IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p"
1234 			    " private data len %d is too large", channel,
1235 			    *ret_priv_data_len_p);
1236 			return (IBT_INVALID_PARAM);
1237 		}
1238 	} else if ((mode != IBT_NONBLOCKING) && (mode != IBT_NOCALLBACKS)) {
1239 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1240 		    "invalid mode %x specified", channel, mode);
1241 		return (IBT_INVALID_PARAM);
1242 	}
1243 
1244 	if (ibtl_cm_is_chan_closing(channel) ||
1245 	    ibtl_cm_is_chan_closed(channel)) {
1246 		if (ret_status)
1247 			*ret_status = IBT_CM_CLOSED_ALREADY;
1248 
1249 		/* No private data to return to the client */
1250 		if (ret_priv_data_len_p != NULL)
1251 			*ret_priv_data_len_p = 0;
1252 
1253 		if ((mode == IBT_BLOCKING) ||
1254 		    (mode == IBT_NOCALLBACKS)) {
1255 			IBCM_GET_CHAN_PRIVATE(channel, statep);
1256 			if (statep == NULL)
1257 				return (IBT_SUCCESS);
1258 			mutex_enter(&statep->state_mutex);
1259 			IBCM_RELEASE_CHAN_PRIVATE(channel);
1260 			IBCM_REF_CNT_INCR(statep);
1261 			while (statep->close_done != B_TRUE)
1262 				cv_wait(&statep->block_client_cv,
1263 				    &statep->state_mutex);
1264 			IBCM_REF_CNT_DECR(statep);
1265 			mutex_exit(&statep->state_mutex);
1266 		}
1267 
1268 		IBTF_DPRINTF_L3(cmlog, "ibt_close_rc_channel: chan 0x%p "
1269 		    "already marked for closing", channel);
1270 
1271 		return (IBT_SUCCESS);
1272 	}
1273 
1274 	/* get the statep */
1275 	IBCM_GET_CHAN_PRIVATE(channel, statep);
1276 	if (statep == NULL) {
1277 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1278 		    "statep NULL", channel);
1279 		return (IBT_CHAN_STATE_INVALID);
1280 	}
1281 
1282 	mutex_enter(&statep->state_mutex);
1283 
1284 	if (statep->dreq_msg == NULL) {
1285 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1286 		    "Fatal Error: dreq_msg is NULL", channel);
1287 		IBCM_RELEASE_CHAN_PRIVATE(channel);
1288 		mutex_exit(&statep->state_mutex);
1289 		return (IBT_CHAN_STATE_INVALID);
1290 	}
1291 
1292 	if ((ret_priv_data == NULL) || (ret_priv_data_len_p == NULL)) {
1293 		statep->close_ret_priv_data = NULL;
1294 		statep->close_ret_priv_data_len = NULL;
1295 	} else {
1296 		statep->close_ret_priv_data = ret_priv_data;
1297 		statep->close_ret_priv_data_len = ret_priv_data_len_p;
1298 	}
1299 
1300 	priv_data_len = min(priv_data_len, IBT_DREQ_PRIV_DATA_SZ);
1301 	if ((priv_data != NULL) && (priv_data_len > 0)) {
1302 		bcopy(priv_data, ((ibcm_dreq_msg_t *)
1303 		    IBCM_OUT_MSGP(statep->dreq_msg))->dreq_private_data,
1304 		    priv_data_len);
1305 	}
1306 	statep->close_ret_status = ret_status;
1307 
1308 	IBCM_RELEASE_CHAN_PRIVATE(channel);
1309 	IBCM_REF_CNT_INCR(statep);
1310 
1311 	if (mode != IBT_NONBLOCKING) {
1312 		return (ibcm_close_rc_channel(channel, statep, mode));
1313 	}
1314 
1315 	/* IBT_NONBLOCKING */
1316 	ibcm_close_enqueue(statep);
1317 	mutex_exit(&statep->state_mutex);
1318 
1319 	return (IBT_SUCCESS);
1320 }
1321 
1322 void
1323 ibcm_close_start(ibcm_state_data_t *statep)
1324 {
1325 	mutex_enter(&statep->state_mutex);
1326 	(void) ibcm_close_rc_channel(statep->channel, statep, IBT_NONBLOCKING);
1327 }
1328 
1329 static
1330 ibt_status_t
1331 ibcm_close_rc_channel(ibt_channel_hdl_t channel, ibcm_state_data_t *statep,
1332     ibt_execution_mode_t mode)
1333 {
1334 	ibcm_hca_info_t		*hcap;
1335 
1336 	_NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&statep->state_mutex));
1337 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1338 
1339 	IBTF_DPRINTF_L3(cmlog, "ibcm_close_rc_channel: chan 0x%p statep %p",
1340 	    channel, statep);
1341 
1342 	hcap = statep->hcap;
1343 
1344 	/* HCA must have been in active state. If not, it's a client bug */
1345 	if (!IBCM_ACCESS_HCA_OK(hcap)) {
1346 		IBTF_DPRINTF_L2(cmlog, "ibcm_close_rc_channel: chan 0x%p "
1347 		    "hcap 0x%p not active", channel, hcap);
1348 		IBCM_REF_CNT_DECR(statep);
1349 		mutex_exit(&statep->state_mutex);
1350 		return (IBT_CHAN_HDL_INVALID);
1351 	}
1352 
1353 	if (statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) {
1354 		while (statep->cep_in_rts == IBCM_BLOCK)
1355 			cv_wait(&statep->block_mad_cv, &statep->state_mutex);
1356 	}
1357 
1358 	/* Do TRANSIENT_DREQ check after TRANSIENT_ESTABLISHED check */
1359 	while (statep->state == IBCM_STATE_TRANSIENT_DREQ_SENT)
1360 		cv_wait(&statep->block_mad_cv, &statep->state_mutex);
1361 
1362 	IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: chan 0x%p "
1363 	    "connection state is %x", channel, statep->state);
1364 
1365 	/* If state is in pre-established states, abort the connection est */
1366 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1367 		statep->cm_retries++;	/* ensure connection trace is dumped */
1368 
1369 		/* No DREP private data possible */
1370 		if (statep->close_ret_priv_data_len != NULL)
1371 			*statep->close_ret_priv_data_len = 0;
1372 
1373 		/*
1374 		 * If waiting for a response mad, then cancel the timer,
1375 		 * and delete the connection
1376 		 */
1377 		if (statep->state == IBCM_STATE_REQ_SENT ||
1378 		    statep->state == IBCM_STATE_REP_SENT ||
1379 		    statep->state == IBCM_STATE_REP_WAIT ||
1380 		    statep->state == IBCM_STATE_MRA_REP_RCVD) {
1381 			timeout_id_t		timer_val = statep->timerid;
1382 			ibcm_conn_state_t	old_state;
1383 
1384 			IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
1385 			    "chan 0x%p connection aborted in state %x", channel,
1386 			    statep->state);
1387 
1388 			old_state = statep->state;
1389 			statep->state = IBCM_STATE_DELETE;
1390 
1391 			if (mode == IBT_NONBLOCKING) {
1392 				if (taskq_dispatch(ibcm_taskq,
1393 				    ibcm_process_abort_via_taskq, statep,
1394 				    TQ_NOSLEEP) == 0) {
1395 
1396 					IBCM_REF_CNT_DECR(statep);
1397 					statep->state = old_state;
1398 					mutex_exit(&statep->state_mutex);
1399 					return (IBT_INSUFF_KERNEL_RESOURCE);
1400 				}	/* if taskq_dispatch succeeds */
1401 				/* Cancel the timer */
1402 				statep->timerid = 0;
1403 				mutex_exit(&statep->state_mutex);
1404 			} else {
1405 				/* Cancel the timer */
1406 				statep->timerid = 0;
1407 				mutex_exit(&statep->state_mutex);
1408 				(void) taskq_dispatch(ibcm_taskq,
1409 				    ibcm_process_abort_via_taskq, statep,
1410 				    TQ_SLEEP);
1411 			}
1412 
1413 			/* cancel the currently running timer */
1414 			if (timer_val != 0)
1415 				(void) untimeout(timer_val);
1416 
1417 			/* wait until cm handler returns for BLOCKING cases */
1418 			mutex_enter(&statep->state_mutex);
1419 			if ((mode == IBT_BLOCKING) ||
1420 			    (mode == IBT_NOCALLBACKS)) {
1421 				while (statep->close_done != B_TRUE)
1422 					cv_wait(&statep->block_client_cv,
1423 					    &statep->state_mutex);
1424 			}
1425 
1426 			if (statep->close_ret_status)
1427 				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
1428 			mutex_exit(&statep->state_mutex);
1429 
1430 			/*
1431 			 * It would ideal to post a REJ MAD, but that would
1432 			 * be non-conformance to spec. Hence, delete the state
1433 			 * data. Assuming that happens quickly, any retransmits
1434 			 * from the remote are replied by CM with reject
1435 			 * reason " no valid com id". That would stop remote
1436 			 * sending any more MADs.
1437 			 */
1438 			ibcm_delete_state_data(statep);
1439 			return (IBT_SUCCESS);
1440 
1441 		/* if CM busy in cm handler, wait until cm handler returns */
1442 		} else if (statep->state == IBCM_STATE_REQ_RCVD ||
1443 		    statep->state == IBCM_STATE_REP_RCVD ||
1444 		    statep->state == IBCM_STATE_MRA_SENT ||
1445 		    statep->state == IBCM_STATE_MRA_REP_SENT) {
1446 
1447 			/* take control of statep */
1448 			statep->abort_flag |= IBCM_ABORT_CLIENT;
1449 
1450 			IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
1451 			    "chan 0x%p connection aborted in state = %x",
1452 			    channel, statep->state);
1453 
1454 			/*
1455 			 * wait until state machine modifies qp state to error,
1456 			 * including disassociating statep and QP
1457 			 */
1458 			if ((mode == IBT_BLOCKING) || (mode == IBT_NOCALLBACKS))
1459 				while (statep->close_done != B_TRUE)
1460 					cv_wait(&statep->block_client_cv,
1461 					    &statep->state_mutex);
1462 
1463 			/* a sanity setting */
1464 			if (mode == IBT_NOCALLBACKS)
1465 				statep->cm_handler = NULL;
1466 			IBCM_REF_CNT_DECR(statep);
1467 
1468 			/*
1469 			 * In rare situations, connection attempt could be
1470 			 * terminated for some other reason, before abort is
1471 			 * processed, but CM still returns ret_status as abort
1472 			 */
1473 			if (statep->close_ret_status)
1474 				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
1475 			mutex_exit(&statep->state_mutex);
1476 
1477 			/*
1478 			 * REJ MAD is posted by the CM state machine for this
1479 			 * case, hence state structure is deleted in the
1480 			 * state machine processing.
1481 			 */
1482 			return (IBT_SUCCESS);
1483 
1484 		} else if ((statep->state == IBCM_STATE_TIMEWAIT) ||
1485 		    (statep->state == IBCM_STATE_DELETE)) {
1486 
1487 			/* State already in timewait, so no return priv data */
1488 			IBCM_REF_CNT_DECR(statep);
1489 
1490 			/* The teardown has already been done */
1491 			if (statep->close_ret_status)
1492 				*statep->close_ret_status =
1493 				    IBT_CM_CLOSED_ALREADY;
1494 			mutex_exit(&statep->state_mutex);
1495 
1496 			return (IBT_SUCCESS);
1497 
1498 		} else if ((statep->state == IBCM_STATE_DREQ_RCVD) ||
1499 		    (statep->state == IBCM_STATE_DREQ_SENT) ||
1500 		    (statep->state == IBCM_STATE_DREP_RCVD) ||
1501 		    ((statep->state == IBCM_STATE_TIMED_OUT) &&
1502 		    (statep->timedout_state == IBCM_STATE_DREQ_SENT))) {
1503 
1504 			/*
1505 			 * Either the remote or local client has already
1506 			 * initiated the teardown.  IBCM_STATE_DREP_RCVD is
1507 			 * possible, if CM initiated teardown without client's
1508 			 * knowledge, for stale handling, etc.,
1509 			 */
1510 			if (mode == IBT_NOCALLBACKS) {
1511 				if (statep->close_nocb_state == IBCM_UNBLOCK) {
1512 					statep->close_nocb_state = IBCM_FAIL;
1513 					/* enable free qp after return */
1514 					ibtl_cm_chan_is_closing(
1515 					    statep->channel);
1516 				} else while (statep->close_nocb_state ==
1517 				    IBCM_BLOCK)
1518 					cv_wait(&statep->block_client_cv,
1519 					    &statep->state_mutex);
1520 				statep->cm_handler = NULL; /* sanity setting */
1521 				if (statep->close_ret_status)
1522 					*statep->close_ret_status =
1523 					    IBT_CM_CLOSED_ALREADY;
1524 			} else if (mode == IBT_BLOCKING) {
1525 				/* wait until state is moved to timewait */
1526 				while (statep->close_done != B_TRUE)
1527 					cv_wait(&statep->block_client_cv,
1528 					    &statep->state_mutex);
1529 			}
1530 
1531 			IBCM_REF_CNT_DECR(statep);
1532 			mutex_exit(&statep->state_mutex);
1533 
1534 			/* ret_status is set in state machine code */
1535 			return (IBT_SUCCESS);
1536 
1537 		} else if (statep->state == IBCM_STATE_TIMED_OUT) {
1538 
1539 			if ((mode == IBT_BLOCKING) ||
1540 			    (mode == IBT_NOCALLBACKS)) {
1541 
1542 				/*
1543 				 * wait until cm handler invocation and
1544 				 * disassociation between statep and channel
1545 				 * is complete
1546 				 */
1547 				while (statep->close_done != B_TRUE)
1548 					cv_wait(&statep->block_client_cv,
1549 					    &statep->state_mutex);
1550 			}
1551 
1552 			if (statep->close_ret_status)
1553 				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
1554 			IBCM_REF_CNT_DECR(statep);
1555 			mutex_exit(&statep->state_mutex);
1556 
1557 			return (IBT_SUCCESS);
1558 		} else {
1559 			IBCM_REF_CNT_DECR(statep);
1560 			mutex_exit(&statep->state_mutex);
1561 
1562 			return (IBT_CM_FAILURE);
1563 		}
1564 	}
1565 
1566 	ASSERT(statep->close_nocb_state != IBCM_BLOCK);
1567 
1568 	if (mode == IBT_NOCALLBACKS) {
1569 		statep->close_nocb_state = IBCM_FAIL;
1570 		statep->cm_handler = NULL;
1571 		ibtl_cm_chan_is_closing(statep->channel);
1572 		IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
1573 		    "NOCALLBACKS on in statep = %p", statep);
1574 	}
1575 
1576 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1577 		goto lost_race;
1578 	}
1579 
1580 	/*
1581 	 * Cancel/wait for any pending ibt_set_alt_path, and
1582 	 * release state mutex
1583 	 */
1584 	ibcm_sync_lapr_idle(statep);
1585 
1586 	ibcm_close_enter();
1587 
1588 	mutex_enter(&statep->state_mutex);
1589 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1590 		ibcm_close_exit();
1591 		goto lost_race;
1592 	}
1593 
1594 	statep->state = IBCM_STATE_TRANSIENT_DREQ_SENT;
1595 	statep->timerid = 0;
1596 	statep->close_done = B_FALSE;
1597 	statep->close_flow = 1;
1598 	mutex_exit(&statep->state_mutex);
1599 
1600 	ibcm_post_dreq_mad(statep);
1601 
1602 	mutex_enter(&statep->state_mutex);
1603 
1604 lost_race:
1605 	if (mode == IBT_BLOCKING) {
1606 
1607 		/* wait for DREP */
1608 		while (statep->close_done != B_TRUE)
1609 			cv_wait(&statep->block_client_cv,
1610 			    &statep->state_mutex);
1611 
1612 		IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: chan 0x%p "
1613 		    "done blocking", channel);
1614 	}
1615 
1616 	IBCM_REF_CNT_DECR(statep);
1617 	mutex_exit(&statep->state_mutex);
1618 
1619 	/* If this message isn't seen then ibt_close_rc_channel failed */
1620 	IBTF_DPRINTF_L5(cmlog, "ibcm_close_rc_channel: chan 0x%p done",
1621 	    channel);
1622 
1623 	return (IBT_SUCCESS);
1624 }
1625 
1626 ibt_status_t
1627 ibt_recycle_rc(ibt_channel_hdl_t rc_chan, ibt_cep_flags_t control,
1628     uint8_t hca_port_num, ibt_recycle_handler_t func, void *arg)
1629 {
1630 	ibcm_state_data_t		*statep;
1631 	ibcm_taskq_recycle_arg_t	*ibcm_tq_recycle_arg;
1632 	ibt_qp_query_attr_t		qp_attr;
1633 	ibt_status_t			retval;
1634 
1635 	IBTF_DPRINTF_L3(cmlog, "ibt_recycle_rc (%p, 0x%X, %d, %p, %p)", rc_chan,
1636 	    control, hca_port_num, func, arg);
1637 
1638 	if (IBCM_INVALID_CHANNEL(rc_chan)) {
1639 		IBTF_DPRINTF_L2(cmlog, "ibt_recycle_rc: invalid channel");
1640 		return (IBT_CHAN_HDL_INVALID);
1641 	}
1642 
1643 	/* check qp state */
1644 	retval = ibt_query_qp(rc_chan, &qp_attr);
1645 
1646 	if (retval != IBT_SUCCESS)
1647 		return (retval);
1648 
1649 	if (qp_attr.qp_info.qp_trans != IBT_RC_SRV)
1650 		return (IBT_CHAN_SRV_TYPE_INVALID);
1651 
1652 	if (qp_attr.qp_info.qp_state != IBT_STATE_ERROR)
1653 		return (IBT_CHAN_STATE_INVALID);
1654 
1655 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibcm_tq_recycle_arg))
1656 
1657 	ibcm_tq_recycle_arg = kmem_alloc(sizeof (ibcm_taskq_recycle_arg_t),
1658 	    KM_SLEEP);
1659 
1660 	ibcm_tq_recycle_arg->rc_chan		= rc_chan;
1661 	ibcm_tq_recycle_arg->control		= control;
1662 	ibcm_tq_recycle_arg->hca_port_num	= hca_port_num;
1663 	ibcm_tq_recycle_arg->func		= func;
1664 	ibcm_tq_recycle_arg->arg		= arg;
1665 
1666 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ibcm_tq_recycle_arg))
1667 
1668 	IBCM_GET_CHAN_PRIVATE(rc_chan, statep);
1669 
1670 	/*
1671 	 * If non-blocking ie., func specified and channel has not yet completed
1672 	 * the timewait, then schedule the work for later
1673 	 */
1674 	if ((func != NULL) && (statep != NULL)) {
1675 		IBCM_RELEASE_CHAN_PRIVATE(rc_chan);
1676 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(statep->recycle_arg))
1677 		statep->recycle_arg = ibcm_tq_recycle_arg;
1678 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(statep->recycle_arg))
1679 		return (IBT_SUCCESS);
1680 	}
1681 
1682 	/*
1683 	 * if blocking ie., func specified, and channel has not yet completed
1684 	 * the timewait, then block until the channel completes the timewait
1685 	 */
1686 	if (statep != NULL)
1687 		IBCM_RELEASE_CHAN_PRIVATE(rc_chan);
1688 	IBCM_WAIT_CHAN_PRIVATE(rc_chan);
1689 
1690 	if (func) {	/* NON BLOCKING case. Taskq for QP state change */
1691 		(void) taskq_dispatch(ibcm_taskq, ibcm_process_rc_recycle,
1692 		    ibcm_tq_recycle_arg, TQ_SLEEP);
1693 		return (IBT_SUCCESS);
1694 	} else	/* BLOCKING case */
1695 		return (ibcm_process_rc_recycle_ret(ibcm_tq_recycle_arg));
1696 }
1697 
1698 void
1699 ibcm_process_rc_recycle(void *recycle_arg)
1700 {
1701 	(void) ibcm_process_rc_recycle_ret(recycle_arg);
1702 }
1703 
1704 static ibt_status_t
1705 ibcm_process_rc_recycle_ret(void *recycle_arg)
1706 {
1707 	ibt_qp_info_t			qp_info;
1708 	ibt_status_t			ibt_status = IBT_SUCCESS;
1709 	ibt_cep_modify_flags_t		cep_flags;
1710 	ibt_qp_query_attr_t		qp_attr;
1711 	ibcm_taskq_recycle_arg_t	*ibcm_tq_recycle_arg =
1712 	    (ibcm_taskq_recycle_arg_t *)recycle_arg;
1713 
1714 	/* QP must have been in error state */
1715 	ibt_status = ibt_query_qp(ibcm_tq_recycle_arg->rc_chan, &qp_attr);
1716 	if (ibt_status != IBT_SUCCESS)
1717 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
1718 		    "chanp %p ibt_query_qp() = %d",
1719 		    ibcm_tq_recycle_arg->rc_chan, ibt_status);
1720 	else {
1721 		/* perform the QP state change from ERROR to RESET */
1722 		bzero(&qp_info, sizeof (qp_info));
1723 
1724 		qp_info.qp_trans = IBT_RC_SRV;
1725 		qp_info.qp_state = IBT_STATE_RESET;
1726 
1727 		/* Call modify_qp to move to RESET state */
1728 		ibt_status = ibt_modify_qp(ibcm_tq_recycle_arg->rc_chan,
1729 		    IBT_CEP_SET_STATE, &qp_info, NULL);
1730 
1731 		if (ibt_status != IBT_SUCCESS)
1732 			IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
1733 			    "chanp %p ibt_modify_qp() = %d for ERROR to RESET",
1734 			    ibcm_tq_recycle_arg->rc_chan, ibt_status);
1735 	}
1736 
1737 	if (ibt_status == IBT_SUCCESS) {
1738 
1739 		qp_info.qp_state = IBT_STATE_INIT;
1740 
1741 		/* set flags for all mandatory args from RESET to INIT */
1742 		cep_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_PORT;
1743 		cep_flags |= IBT_CEP_SET_RDMA_R | IBT_CEP_SET_RDMA_W;
1744 		cep_flags |= IBT_CEP_SET_ATOMIC;
1745 
1746 		qp_info.qp_transport.rc.rc_path.cep_hca_port_num =
1747 		    ibcm_tq_recycle_arg->hca_port_num;
1748 		qp_info.qp_flags |=
1749 		    ibcm_tq_recycle_arg->control & IBT_CEP_RDMA_RD;
1750 		qp_info.qp_flags |=
1751 		    ibcm_tq_recycle_arg->control & IBT_CEP_RDMA_WR;
1752 		qp_info.qp_flags |=
1753 		    ibcm_tq_recycle_arg->control & IBT_CEP_ATOMIC;
1754 
1755 		/* Always use the existing pkey */
1756 		qp_info.qp_transport.rc.rc_path.cep_pkey_ix =
1757 		    qp_attr. qp_info.qp_transport.rc.rc_path.cep_pkey_ix;
1758 
1759 		/* Call modify_qp to move to INIT state */
1760 		ibt_status = ibt_modify_qp(ibcm_tq_recycle_arg->rc_chan,
1761 		    cep_flags, &qp_info, NULL);
1762 
1763 		if (ibt_status != IBT_SUCCESS)
1764 			IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
1765 			    "chanp %p ibt_modify_qp() = %d for RESET to INIT",
1766 			    ibcm_tq_recycle_arg->rc_chan, ibt_status);
1767 	}
1768 
1769 	/* Change the QP CM state to indicate QP being re-used */
1770 	if (ibt_status == IBT_SUCCESS)
1771 		ibtl_cm_chan_is_reused(ibcm_tq_recycle_arg->rc_chan);
1772 
1773 	/* Call func, if defined */
1774 	if (ibcm_tq_recycle_arg->func)
1775 		(*(ibcm_tq_recycle_arg->func))(ibt_status,
1776 		    ibcm_tq_recycle_arg->arg);
1777 
1778 	kmem_free(ibcm_tq_recycle_arg, sizeof (ibcm_taskq_recycle_arg_t));
1779 
1780 	return (ibt_status);
1781 }
1782 
1783 static void
1784 ibcm_process_abort_via_taskq(void *args)
1785 {
1786 	ibcm_state_data_t	*statep = (ibcm_state_data_t *)args;
1787 
1788 	ibcm_process_abort(statep);
1789 	mutex_enter(&statep->state_mutex);
1790 	IBCM_REF_CNT_DECR(statep);
1791 	mutex_exit(&statep->state_mutex);
1792 }
1793 
1794 /*
1795  * Local UD CM Handler's private data, used during ibt_request_ud_dest() in
1796  * Non-Blocking mode operations.
1797  */
1798 typedef struct ibcm_local_handler_s {
1799 	ibt_cm_ud_handler_t	actual_cm_handler;
1800 	void			*actual_cm_private;
1801 	ibt_ud_dest_t		*dest_hdl;
1802 } ibcm_local_handler_t;
1803 
1804 _NOTE(READ_ONLY_DATA(ibcm_local_handler_s))
1805 
1806 /*
1807  * Local UD CM Handler, used when ibt_alloc_ud_dest() is issued in
1808  * NON-Blocking mode.
1809  *
1810  * Out here, we update the UD Destination handle with
1811  * the obtained DQPN and QKey (from SIDR REP) and invokes actual client
1812  * handler that was specified by the client.
1813  */
1814 static ibt_cm_status_t
1815 ibcm_local_cm_handler(void *priv, ibt_cm_ud_event_t *event,
1816     ibt_cm_ud_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
1817 {
1818 	ibcm_local_handler_t	*handler_priv = (ibcm_local_handler_t *)priv;
1819 
1820 	IBTF_DPRINTF_L4(cmlog, "ibcm_local_cm_handler: event %d",
1821 	    event->cm_type);
1822 
1823 	ASSERT(handler_priv != NULL);
1824 
1825 	switch (event->cm_type) {
1826 	case IBT_CM_UD_EVENT_SIDR_REP:
1827 		/* Update QPN & QKey from event into destination handle. */
1828 		if (handler_priv->dest_hdl != NULL) {
1829 			handler_priv->dest_hdl->ud_dst_qpn =
1830 			    event->cm_event.sidr_rep.srep_remote_qpn;
1831 			handler_priv->dest_hdl->ud_qkey =
1832 			    event->cm_event.sidr_rep.srep_remote_qkey;
1833 		}
1834 
1835 		/* Invoke the client handler - inform only, so ignore retval */
1836 		(void) handler_priv->actual_cm_handler(
1837 		    handler_priv->actual_cm_private, event, ret_args, priv_data,
1838 		    len);
1839 
1840 		/* Free memory allocated for local handler's private data. */
1841 		if (handler_priv != NULL)
1842 			kmem_free(handler_priv, sizeof (*handler_priv));
1843 
1844 		break;
1845 	default:
1846 		IBTF_DPRINTF_L2(cmlog, "ibcm_local_cm_handler: ERROR");
1847 		break;
1848 	}
1849 
1850 	return (IBT_CM_ACCEPT);
1851 }
1852 
1853 
1854 /* Validate the input UD destination attributes.  */
1855 static ibt_status_t
1856 ibcm_validate_dqpn_data(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
1857     ibt_ud_returns_t *ret_args)
1858 {
1859 	/* cm handler must always be specified */
1860 	if (mode == IBT_NONBLOCKING && attr->ud_cm_handler == NULL) {
1861 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1862 		    "CM handler is not specified ");
1863 		return (IBT_INVALID_PARAM);
1864 	}
1865 
1866 	if (mode == IBT_NONBLOCKING) {
1867 		if (ret_args != NULL) {
1868 			IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1869 			    "ret_args should be NULL when called in "
1870 			    "non-blocking mode");
1871 			return (IBT_INVALID_PARAM);
1872 		}
1873 	} else if (mode == IBT_BLOCKING) {
1874 		if (ret_args == NULL) {
1875 			IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1876 			    "ret_args should be Non-NULL when called in "
1877 			    "blocking mode");
1878 			return (IBT_INVALID_PARAM);
1879 		}
1880 	} else {
1881 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1882 		    "invalid mode %x specified ", mode);
1883 		return (IBT_INVALID_PARAM);
1884 	}
1885 
1886 	if (attr->ud_sid == 0) {
1887 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1888 		    "ServiceID must be specified. ");
1889 		return (IBT_INVALID_PARAM);
1890 	}
1891 
1892 	if (attr->ud_addr == NULL) {
1893 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1894 		    "Address Info NULL");
1895 		return (IBT_INVALID_PARAM);
1896 	}
1897 
1898 	/* Validate SGID */
1899 	if ((attr->ud_addr->av_sgid.gid_prefix == 0) ||
1900 	    (attr->ud_addr->av_sgid.gid_guid == 0)) {
1901 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: Invalid SGID");
1902 		return (IBT_INVALID_PARAM);
1903 	}
1904 	IBTF_DPRINTF_L3(cmlog, "ibcm_validate_dqpn_data: SGID<%llX:%llX>",
1905 	    attr->ud_addr->av_sgid.gid_prefix,
1906 	    attr->ud_addr->av_sgid.gid_guid);
1907 
1908 	/* Validate DGID */
1909 	if ((attr->ud_addr->av_dgid.gid_prefix == 0) ||
1910 	    (attr->ud_addr->av_dgid.gid_guid == 0)) {
1911 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: Invalid DGID");
1912 		return (IBT_INVALID_PARAM);
1913 	}
1914 	IBTF_DPRINTF_L3(cmlog, "ibcm_validate_dqpn_data: DGID<%llX:%llX>",
1915 	    attr->ud_addr->av_dgid.gid_prefix,
1916 	    attr->ud_addr->av_dgid.gid_guid);
1917 
1918 	return (IBT_SUCCESS);
1919 }
1920 
1921 
1922 /* Perform SIDR to retrieve DQPN and QKey.  */
1923 static ibt_status_t
1924 ibcm_ud_get_dqpn(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
1925     ibt_ud_returns_t *ret_args)
1926 {
1927 	ibt_status_t		retval;
1928 	ib_pkey_t		ud_pkey;
1929 	ibmf_handle_t		ibmf_hdl;
1930 	ibmf_msg_t		*ibmf_msg;
1931 	ibcm_hca_info_t		*hcap;
1932 	ibcm_sidr_req_msg_t	*sidr_req_msgp;
1933 	ibcm_ud_state_data_t	*ud_statep;
1934 	ibtl_cm_hca_port_t	port;
1935 	ibcm_sidr_srch_t	sidr_entry;
1936 	ibcm_qp_list_t		*cm_qp_entry;
1937 
1938 	/* Retrieve HCA GUID value from the available SGID info. */
1939 	retval = ibtl_cm_get_hca_port(attr->ud_addr->av_sgid, 0, &port);
1940 	if ((retval != IBT_SUCCESS) || (port.hp_port == 0)) {
1941 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1942 		    "ibtl_cm_get_hca_port failed: %d", retval);
1943 		return (retval);
1944 	}
1945 
1946 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: "
1947 	    "HCA GUID:%llX, port_num:%d", port.hp_hca_guid, port.hp_port);
1948 
1949 	/* Lookup the HCA info for this GUID */
1950 	if ((hcap = ibcm_find_hca_entry(port.hp_hca_guid)) == NULL) {
1951 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: hcap is NULL");
1952 		return (IBT_HCA_INVALID);
1953 	}
1954 
1955 	/* Return failure if the HCA device or Port is not operational */
1956 
1957 	if ((retval = ibt_get_port_state_byguid(port.hp_hca_guid, port.hp_port,
1958 	    NULL, NULL)) != IBT_SUCCESS) {
1959 		/* Device Port is not in good state, don't use it. */
1960 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: Invalid "
1961 		    "port specified or port not active");
1962 		ibcm_dec_hca_acc_cnt(hcap);
1963 		return (retval);
1964 	}
1965 
1966 	retval = ibt_index2pkey_byguid(port.hp_hca_guid, port.hp_port,
1967 	    attr->ud_pkey_ix, &ud_pkey);
1968 	if (retval != IBT_SUCCESS) {
1969 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1970 		    "Failed to convert index2pkey: %d", retval);
1971 		ibcm_dec_hca_acc_cnt(hcap);
1972 		return (retval);
1973 	}
1974 
1975 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(sidr_entry))
1976 
1977 	/* Allocate a new request id */
1978 	if (ibcm_alloc_reqid(hcap, &sidr_entry.srch_req_id) == IBCM_FAILURE) {
1979 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1980 		    "no req id available");
1981 		ibcm_dec_hca_acc_cnt(hcap);
1982 		return (IBT_INSUFF_KERNEL_RESOURCE);
1983 	}
1984 
1985 	if ((hcap->hca_port_info[port.hp_port - 1].port_ibmf_hdl == NULL) &&
1986 	    ((retval = ibcm_hca_reinit_port(hcap, port.hp_port - 1))
1987 	    != IBT_SUCCESS)) {
1988 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1989 		    "ibmf reg or callback setup failed during re-initialize");
1990 		return (retval);
1991 	}
1992 
1993 	ibmf_hdl = hcap->hca_port_info[port.hp_port - 1].port_ibmf_hdl;
1994 
1995 	/* find the ibmf QP to post the SIDR REQ */
1996 	if ((cm_qp_entry = ibcm_find_qp(hcap, port.hp_port, ud_pkey)) ==
1997 	    NULL) {
1998 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: IBMF QP allocation"
1999 		    " failed");
2000 		ibcm_dec_hca_acc_cnt(hcap);
2001 		return (IBT_INSUFF_RESOURCE);
2002 	}
2003 
2004 	if ((retval = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg, MAD_METHOD_SEND))
2005 	    != IBT_SUCCESS) {
2006 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: IBMF MSG allocation"
2007 		    " failed");
2008 		ibcm_release_qp(cm_qp_entry);
2009 		ibcm_dec_hca_acc_cnt(hcap);
2010 		return (retval);
2011 	}
2012 
2013 	sidr_entry.srch_lid = port.hp_base_lid;
2014 	sidr_entry.srch_gid = attr->ud_addr->av_sgid;
2015 	sidr_entry.srch_grh_exists = attr->ud_addr->av_send_grh;
2016 	sidr_entry.srch_mode = IBCM_ACTIVE_MODE;
2017 
2018 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(sidr_entry))
2019 
2020 	/* do various allocations needed here */
2021 	rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);
2022 
2023 	(void) ibcm_find_sidr_entry(&sidr_entry, hcap, &ud_statep,
2024 	    IBCM_FLAG_ADD);
2025 	rw_exit(&hcap->hca_sidr_list_lock);
2026 
2027 	/* Increment hca's resource count */
2028 	ibcm_inc_hca_res_cnt(hcap);
2029 
2030 	/* After a resource created on hca, no need to hold the acc cnt */
2031 	ibcm_dec_hca_acc_cnt(hcap);
2032 
2033 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ud_statep))
2034 
2035 	/* Initialize some ud_statep fields */
2036 	ud_statep->ud_stored_msg = ibmf_msg;
2037 	ud_statep->ud_svc_id = attr->ud_sid;
2038 	ud_statep->ud_pkt_life_time =
2039 	    ibt_ib2usec(attr->ud_pkt_lt);
2040 	ud_statep->ud_stored_reply_addr.cm_qp_entry = cm_qp_entry;
2041 
2042 	/* set remaining retry cnt */
2043 	ud_statep->ud_remaining_retry_cnt = ud_statep->ud_max_cm_retries;
2044 
2045 	/*
2046 	 * Get UD handler and corresponding args which is pass it back
2047 	 * as first argument for the handler.
2048 	 */
2049 	ud_statep->ud_state_cm_private = attr->ud_cm_private;
2050 
2051 	if (mode == IBT_BLOCKING)
2052 		ud_statep->ud_return_data = ret_args;
2053 	else
2054 		ud_statep->ud_cm_handler = attr->ud_cm_handler;
2055 
2056 	/* Initialize the fields of ud_statep->ud_stored_reply_addr */
2057 	ud_statep->ud_stored_reply_addr.grh_exists = attr->ud_addr->av_send_grh;
2058 	ud_statep->ud_stored_reply_addr.ibmf_hdl = ibmf_hdl;
2059 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_hop_limit =
2060 	    attr->ud_addr->av_hop;
2061 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_sender_gid =
2062 	    attr->ud_addr->av_sgid;
2063 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_recver_gid =
2064 	    attr->ud_addr->av_dgid;
2065 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_tclass =
2066 	    attr->ud_addr->av_tclass;
2067 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_flow_label =
2068 	    attr->ud_addr->av_flow & IB_GRH_FLOW_LABEL_MASK;
2069 
2070 	/* needs to be derived based on the base LID and path bits */
2071 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_local_lid =
2072 	    port.hp_base_lid;
2073 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_remote_lid =
2074 	    attr->ud_addr->av_dlid;
2075 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_p_key = ud_pkey;
2076 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_q_key = IB_GSI_QKEY;
2077 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_service_level =
2078 	    attr->ud_addr->av_srvl;
2079 
2080 	/*
2081 	 * This may be enchanced later, to use a remote qno based on past
2082 	 * redirect rej mad responses. This would be the place to specify
2083 	 * appropriate remote qno
2084 	 */
2085 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_remote_qno = 1;
2086 
2087 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sidr_req_msgp))
2088 
2089 	/* Initialize the SIDR REQ message fields */
2090 	sidr_req_msgp =
2091 	    (ibcm_sidr_req_msg_t *)IBCM_OUT_MSGP(ud_statep->ud_stored_msg);
2092 
2093 	sidr_req_msgp->sidr_req_request_id = h2b32(ud_statep->ud_req_id);
2094 	sidr_req_msgp->sidr_req_service_id = h2b64(attr->ud_sid);
2095 	sidr_req_msgp->sidr_req_pkey = h2b16(ud_pkey);
2096 	IBCM_OUT_HDRP(ud_statep->ud_stored_msg)->AttributeID =
2097 	    h2b16(IBCM_INCOMING_SIDR_REQ + IBCM_ATTR_BASE_ID);
2098 
2099 	if ((attr->ud_priv_data != NULL) && (attr->ud_priv_data_len > 0)) {
2100 		bcopy(attr->ud_priv_data, sidr_req_msgp->sidr_req_private_data,
2101 		    min(attr->ud_priv_data_len, IBT_SIDR_REQ_PRIV_DATA_SZ));
2102 	}
2103 
2104 	/* Send out the SIDR REQ message */
2105 	ud_statep->ud_state = IBCM_STATE_SIDR_REQ_SENT;
2106 	ud_statep->ud_timer_stored_state = IBCM_STATE_SIDR_REQ_SENT;
2107 	IBCM_UD_REF_CNT_INCR(ud_statep); /* for non-blocking SIDR REQ post */
2108 	ud_statep->ud_timer_value = ibt_ib2usec(ibcm_max_sidr_rep_proctime) +
2109 	    (ud_statep->ud_pkt_life_time * 2);
2110 
2111 	IBCM_OUT_HDRP(ud_statep->ud_stored_msg)->TransactionID =
2112 	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_SIDR_REQ,
2113 	    ud_statep->ud_req_id, 0));
2114 
2115 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: timer_value in HZ = %x",
2116 	    ud_statep->ud_timer_value);
2117 
2118 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ud_statep))
2119 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sidr_req_msgp))
2120 
2121 	ibcm_post_ud_mad(ud_statep, ud_statep->ud_stored_msg,
2122 	    ibcm_post_sidr_req_complete, ud_statep);
2123 
2124 	mutex_enter(&ud_statep->ud_state_mutex);
2125 
2126 	/* Wait for SIDR_REP */
2127 	if (mode == IBT_BLOCKING) {
2128 		IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: blocking");
2129 
2130 		while (ud_statep->ud_blocking_done != B_TRUE) {
2131 			cv_wait(&ud_statep->ud_block_client_cv,
2132 			    &ud_statep->ud_state_mutex);
2133 		}
2134 
2135 		IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: finished blocking");
2136 
2137 		if (ret_args->ud_status == IBT_CM_SREP_QPN_VALID) {
2138 			IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: DQPN = %x, "
2139 			    "status = %x, QKey = %x", ret_args->ud_dqpn,
2140 			    ret_args->ud_status, ret_args->ud_qkey);
2141 
2142 		} else {
2143 			IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: Status<%x>",
2144 			    ret_args->ud_status);
2145 			retval = IBT_CM_FAILURE;
2146 		}
2147 	}
2148 
2149 	IBCM_UD_REF_CNT_DECR(ud_statep);
2150 	mutex_exit(&ud_statep->ud_state_mutex);
2151 
2152 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: done");
2153 
2154 	return (retval);
2155 }
2156 
2157 
2158 /*
2159  * Function:
2160  *	ibt_request_ud_dest
2161  * Input:
2162  *	ud_dest		A previously allocated UD destination handle.
2163  *	mode		This function can execute in blocking or non blocking
2164  *			modes.
2165  *	attr		UD destination attributes to be modified.
2166  * Output:
2167  *	ud_ret_args	If the function is called in blocking mode, ud_ret_args
2168  *			should be a pointer to an ibt_ud_returns_t struct.
2169  * Returns:
2170  *	IBT_SUCCESS
2171  * Description:
2172  *	Modify a previously allocated UD destination handle based on the
2173  *	results of doing the SIDR protocol.
2174  */
2175 ibt_status_t
2176 ibt_request_ud_dest(ibt_ud_dest_hdl_t ud_dest, ibt_execution_mode_t mode,
2177     ibt_ud_dest_attr_t *attr, ibt_ud_returns_t *ud_ret_args)
2178 {
2179 	ibt_status_t		retval;
2180 	ibt_ud_dest_t		*ud_destp;
2181 	ibcm_local_handler_t	*local_handler_priv = NULL;
2182 
2183 	IBTF_DPRINTF_L3(cmlog, "ibt_request_ud_dest(%p, %x, %p, %p)",
2184 	    ud_dest, mode, attr, ud_ret_args);
2185 
2186 	retval = ibcm_validate_dqpn_data(attr, mode, ud_ret_args);
2187 	if (retval != IBT_SUCCESS) {
2188 		return (retval);
2189 	}
2190 
2191 	ud_destp = ud_dest;
2192 
2193 	/* Allocate an Address handle. */
2194 	retval = ibt_modify_ah(ud_destp->ud_dest_hca, ud_destp->ud_ah,
2195 	    attr->ud_addr);
2196 	if (retval != IBT_SUCCESS) {
2197 		IBTF_DPRINTF_L2(cmlog, "ibt_request_ud_dest: "
2198 		    "Address Handle Modification failed: %d", retval);
2199 		return (retval);
2200 	}
2201 
2202 	if (mode == IBT_NONBLOCKING) {
2203 		/*
2204 		 * In NON-BLOCKING mode, and we need to update the destination
2205 		 * handle with the DQPN and QKey that are obtained from
2206 		 * SIDR REP, hook-up our own handler, so that we can catch
2207 		 * the event, and we ourselves call the actual client's
2208 		 * ud_cm_handler, in our handler.
2209 		 */
2210 
2211 		/* Allocate memory for local handler's private data. */
2212 		local_handler_priv =
2213 		    kmem_alloc(sizeof (*local_handler_priv), KM_SLEEP);
2214 
2215 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*local_handler_priv))
2216 
2217 		local_handler_priv->actual_cm_handler = attr->ud_cm_handler;
2218 		local_handler_priv->actual_cm_private = attr->ud_cm_private;
2219 		local_handler_priv->dest_hdl = ud_destp;
2220 
2221 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*local_handler_priv))
2222 
2223 		attr->ud_cm_handler = ibcm_local_cm_handler;
2224 		attr->ud_cm_private = local_handler_priv;
2225 	}
2226 
2227 	/* In order to get DQPN and Destination QKey, perform SIDR */
2228 	retval = ibcm_ud_get_dqpn(attr, mode, ud_ret_args);
2229 	if (retval != IBT_SUCCESS) {
2230 		IBTF_DPRINTF_L2(cmlog, "ibt_request_ud_dest: "
2231 		    "Failed to get DQPN: %d", retval);
2232 
2233 		/* Free memory allocated for local handler's private data. */
2234 		if (local_handler_priv != NULL)
2235 			kmem_free(local_handler_priv,
2236 			    sizeof (*local_handler_priv));
2237 		return (retval);
2238 	}
2239 
2240 	/*
2241 	 * Fill in the dqpn and dqkey as obtained from ud_ret_args,
2242 	 * values will be valid only on BLOCKING mode.
2243 	 */
2244 	if (mode == IBT_BLOCKING) {
2245 		ud_destp->ud_dst_qpn = ud_ret_args->ud_dqpn;
2246 		ud_destp->ud_qkey = ud_ret_args->ud_qkey;
2247 	}
2248 
2249 	return (retval);
2250 }
2251 
2252 /*
2253  * Function:
2254  *	ibt_ud_get_dqpn
2255  * Input:
2256  *	attr		A pointer to an ibt_ud_dest_attr_t struct that are
2257  *			required for SIDR REQ message. Not specified attributes
2258  *			should be set to "NULL" or "0".
2259  *			ud_sid, ud_addr and ud_pkt_lt must be specified.
2260  *	mode		This function can execute in blocking or non blocking
2261  *			modes.
2262  * Output:
2263  *	returns		If the function is called in blocking mode, returns
2264  *			should be a pointer to an ibt_ud_returns_t struct.
2265  * Return:
2266  *	IBT_SUCCESS	on success or respective failure on error.
2267  * Description:
2268  *	Finds the destination QPN at the specified destination that the
2269  *	specified service can be reached on. The IBTF CM initiates the
2270  *	service ID resolution protocol (SIDR) to determine a destination QPN.
2271  *
2272  * NOTE: SIDR_REQ is initiated from active side.
2273  */
2274 ibt_status_t
2275 ibt_ud_get_dqpn(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
2276     ibt_ud_returns_t *returns)
2277 {
2278 	ibt_status_t		retval;
2279 
2280 	IBTF_DPRINTF_L3(cmlog, "ibt_ud_get_dqpn(%p, %x, %p)",
2281 	    attr, mode, returns);
2282 
2283 	retval = ibcm_validate_dqpn_data(attr, mode, returns);
2284 	if (retval != IBT_SUCCESS) {
2285 		return (retval);
2286 	}
2287 
2288 	return (ibcm_ud_get_dqpn(attr, mode, returns));
2289 }
2290 
2291 
2292 /*
2293  * ibt_cm_delay:
2294  *	A client CM handler function can call this function
2295  *	to extend its response time to a CM event.
2296  * INPUTS:
2297  *	flags		Indicates what CM message processing is being delayed
2298  *			by the CM handler, valid values are:
2299  *				IBT_CM_DELAY_REQ
2300  *				IBT_CM_DELAY_REP
2301  *				IBT_CM_DELAY_LAP
2302  *	cm_session_id	The session ID that was passed to client srv_handler
2303  *			by the CM
2304  *	service_time	The extended service time
2305  *	priv_data	Vendor specific data to be sent in the CM generated
2306  *			MRA message. Should be NULL if not specified.
2307  *	len		The number of bytes of data specified by priv_data.
2308  *
2309  * RETURN VALUES:
2310  *	IBT_SUCCESS	on success (or respective failure on error)
2311  */
2312 ibt_status_t
2313 ibt_cm_delay(ibt_cmdelay_flags_t flags, void *cm_session_id,
2314     clock_t service_time, void *priv_data, ibt_priv_data_len_t len)
2315 {
2316 	uint8_t			msg_typ = 0;
2317 	ibcm_mra_msg_t		*mra_msgp;
2318 	ibcm_state_data_t	*statep;
2319 	ibt_status_t		status;
2320 
2321 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_delay(0x%x, %p, 0x%x)",
2322 	    flags, cm_session_id, service_time);
2323 
2324 	/*
2325 	 * Make sure channel is associated with a statep
2326 	 */
2327 	statep = (ibcm_state_data_t *)cm_session_id;
2328 
2329 	if (statep == NULL) {
2330 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: statep NULL");
2331 		return (IBT_INVALID_PARAM);
2332 	}
2333 
2334 	IBTF_DPRINTF_L4(cmlog, "ibt_cm_delay: statep %p", statep);
2335 
2336 	/* Allocate an ibmf msg for mra, if not allocated yet */
2337 	if (statep->mra_msg == NULL) {
2338 		if ((status = ibcm_alloc_out_msg(
2339 		    statep->stored_reply_addr.ibmf_hdl, &statep->mra_msg,
2340 		    MAD_METHOD_SEND)) != IBT_SUCCESS) {
2341 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: chan 0x%p"
2342 			    "IBMF MSG allocation failed", statep->channel);
2343 			return (status);
2344 		}
2345 	}
2346 
2347 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mra_msgp))
2348 
2349 	mra_msgp = (ibcm_mra_msg_t *)IBCM_OUT_MSGP(statep->mra_msg);
2350 	mra_msgp->mra_local_comm_id = h2b32(statep->local_comid);
2351 	mra_msgp->mra_remote_comm_id = h2b32(statep->remote_comid);
2352 
2353 	/* fill in rest of MRA's fields - Message MRAed and Service Timeout */
2354 	if (flags == IBT_CM_DELAY_REQ) {
2355 		msg_typ = IBT_CM_MRA_TYPE_REQ;
2356 	} else if (flags == IBT_CM_DELAY_REP) {
2357 		msg_typ = IBT_CM_MRA_TYPE_REP;
2358 	} else if (flags == IBT_CM_DELAY_LAP) {
2359 		msg_typ = IBT_CM_MRA_TYPE_LAP;
2360 	}
2361 
2362 	mra_msgp->mra_message_type_plus = msg_typ << 6;
2363 	mra_msgp->mra_service_timeout_plus = ibt_usec2ib(service_time) << 3;
2364 
2365 	len = min(len, IBT_MRA_PRIV_DATA_SZ);
2366 	if (priv_data && (len > 0))
2367 		bcopy(priv_data, mra_msgp->mra_private_data, len);
2368 
2369 	IBCM_OUT_HDRP(statep->mra_msg)->AttributeID =
2370 	    h2b16(IBCM_INCOMING_MRA + IBCM_ATTR_BASE_ID);
2371 
2372 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mra_msgp))
2373 
2374 	mutex_enter(&statep->state_mutex);
2375 
2376 	if ((statep->mode == IBCM_ACTIVE_MODE) &&
2377 	    (statep->state == IBCM_STATE_REP_RCVD)) {
2378 		statep->state = IBCM_STATE_MRA_REP_SENT;
2379 	} else if (statep->mode == IBCM_PASSIVE_MODE) {
2380 		if (statep->state == IBCM_STATE_REQ_RCVD) {
2381 			statep->state = IBCM_STATE_MRA_SENT;
2382 		} else if (statep->ap_state == IBCM_AP_STATE_LAP_RCVD) {
2383 			statep->ap_state = IBCM_AP_STATE_MRA_LAP_RCVD;
2384 		} else {
2385 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: invalid state "
2386 			    "/ap_state/mode %x, %x, %x", statep->state,
2387 			    statep->ap_state, statep->mode);
2388 			mutex_exit(&statep->state_mutex);
2389 			return (IBT_CHAN_STATE_INVALID);
2390 		}
2391 	} else {
2392 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: invalid state "
2393 		    "/ap_state/mode %x, %x, %x", statep->state,
2394 		    statep->ap_state, statep->mode);
2395 		mutex_exit(&statep->state_mutex);
2396 
2397 		return (IBT_CHAN_STATE_INVALID);
2398 	}
2399 	/* service time is usecs, stale_clock is nsecs */
2400 	statep->stale_clock = gethrtime() +
2401 	    (hrtime_t)ibt_ib2usec(ibt_usec2ib(service_time)) * (1000 *
2402 	    statep->max_cm_retries);
2403 
2404 	statep->send_mad_flags |= IBCM_MRA_POST_BUSY;
2405 	IBCM_REF_CNT_INCR(statep);	/* for ibcm_post_mra_complete */
2406 	mutex_exit(&statep->state_mutex);
2407 
2408 	IBCM_OUT_HDRP(statep->mra_msg)->TransactionID =
2409 	    IBCM_OUT_HDRP(statep->stored_msg)->TransactionID;
2410 
2411 	/* post the MRA mad in blocking mode, as no timers involved */
2412 	ibcm_post_rc_mad(statep, statep->mra_msg, ibcm_post_mra_complete,
2413 	    statep);
2414 	ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_MRA);
2415 	/* If this message isn't seen then ibt_cm_delay failed */
2416 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_delay: done !!");
2417 
2418 	return (IBT_SUCCESS);
2419 }
2420 
2421 
2422 /*
2423  * ibt_register_service()
2424  *	Register a service with the IBCM
2425  *
2426  * INPUTS:
2427  *	ibt_hdl		The IBT client handle returned to the client
2428  *			on an ibt_attach() call.
2429  *
2430  *	srv		The address of a ibt_srv_desc_t that describes
2431  *			the service, containing the following:
2432  *
2433  *		sd_ud_handler	The Service CM UD event Handler.
2434  *		sd_handler	The Service CM RC/UC/RD event Handler.
2435  *		sd_flags	Service flags (peer-to-peer, or not).
2436  *
2437  *	sid		This tells CM if the service is local (sid is 0) or
2438  *			wellknown (sid is the starting service id of the range).
2439  *
2440  *	num_sids	The number of contiguous service-ids to reserve.
2441  *
2442  *	srv_hdl		The address of a service identification handle, used
2443  *			to deregister a service, and to bind GIDs to.
2444  *
2445  *	ret_sid		The address to store the Service ID return value.
2446  *			If num_sids > 1, ret_sid is the first Service ID
2447  *			in the range.
2448  *
2449  * ibt_register_service() returns:
2450  *	IBT_SUCCESS		- added a service successfully.
2451  *	IBT_INVALID_PARAM	- invalid input parameter.
2452  *	IBT_CM_FAILURE		- failed to add the service.
2453  *	IBT_CM_SERVICE_EXISTS	- service already exists.
2454  *	IBT_INSUFF_KERNEL_RESOURCE - ran out of local service ids (should
2455  *				     never happen).
2456  */
2457 ibt_status_t
2458 ibt_register_service(ibt_clnt_hdl_t ibt_hdl, ibt_srv_desc_t *srv,
2459     ib_svc_id_t sid, int num_sids, ibt_srv_hdl_t *srv_hdl, ib_svc_id_t *ret_sid)
2460 {
2461 	ibcm_svc_info_t		*svcinfop;
2462 
2463 	IBTF_DPRINTF_L2(cmlog, "ibt_register_service(%p (%s), %p, 0x%llX, %d)",
2464 	    ibt_hdl, ibtl_cm_get_clnt_name(ibt_hdl), srv, (longlong_t)sid,
2465 	    num_sids);
2466 
2467 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*svcinfop))
2468 
2469 	*srv_hdl = NULL;
2470 
2471 	if (num_sids <= 0) {
2472 		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
2473 		    "Invalid number of service-ids specified (%d)", num_sids);
2474 		return (IBT_INVALID_PARAM);
2475 	}
2476 
2477 	if (sid == 0) {
2478 		if (ret_sid == NULL)
2479 			return (IBT_INVALID_PARAM);
2480 		sid = ibcm_alloc_local_sids(num_sids);
2481 		if (sid == 0)
2482 			return (IBT_INSUFF_KERNEL_RESOURCE);
2483 
2484 	/* Make sure that the ServiceId specified is not of LOCAL AGN type. */
2485 	} else if ((sid & IB_SID_AGN_MASK) == IB_SID_AGN_LOCAL) {
2486 		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
2487 		    "Invalid non-LOCAL SID specified: 0x%llX",
2488 		    (longlong_t)sid);
2489 		return (IBT_INVALID_PARAM);
2490 	}
2491 
2492 	svcinfop = ibcm_create_svc_entry(sid, num_sids);
2493 
2494 	if (svcinfop == NULL) {
2495 		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
2496 		    "Service-ID 0x%llx already registered", (longlong_t)sid);
2497 		return (IBT_CM_SERVICE_EXISTS);
2498 	}
2499 
2500 	/*
2501 	 * 'sid' and 'num_sids' are filled in ibcm_create_svc_entry()
2502 	 */
2503 	svcinfop->svc_flags = srv->sd_flags;
2504 	svcinfop->svc_rc_handler = srv->sd_handler;
2505 	svcinfop->svc_ud_handler = srv->sd_ud_handler;
2506 
2507 	if (ret_sid != NULL)
2508 		*ret_sid = sid;
2509 
2510 	*srv_hdl = svcinfop;
2511 
2512 	ibtl_cm_change_service_cnt(ibt_hdl, num_sids);
2513 
2514 	/* If this message isn't seen, then ibt_register_service failed. */
2515 	IBTF_DPRINTF_L2(cmlog, "ibt_register_service: done (%p, %llX)",
2516 	    svcinfop, sid);
2517 
2518 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*svcinfop))
2519 
2520 	return (IBT_SUCCESS);
2521 }
2522 
2523 
2524 static ibt_status_t
2525 ibcm_write_service_record(ibmf_saa_handle_t saa_handle,
2526     sa_service_record_t *srv_recp, ibmf_saa_access_type_t saa_type)
2527 {
2528 	int	rval;
2529 	int	retry;
2530 
2531 	ibcm_sa_access_enter();
2532 	for (retry = 0; retry < ibcm_max_sa_retries; retry++) {
2533 		rval = ibmf_saa_update_service_record(
2534 		    saa_handle, srv_recp, saa_type, 0);
2535 		if (rval != IBMF_TRANS_TIMEOUT) {
2536 			break;
2537 		}
2538 		IBTF_DPRINTF_L2(cmlog, "ibcm_write_service_record: "
2539 		    "ibmf_saa_update_service_record timed out"
2540 		    " SID = %llX, rval = %d, saa_type = %d",
2541 		    (longlong_t)srv_recp->ServiceID, rval, saa_type);
2542 		delay(ibcm_sa_timeout_delay);
2543 	}
2544 	ibcm_sa_access_exit();
2545 
2546 	if (rval != IBMF_SUCCESS) {
2547 		IBTF_DPRINTF_L2(cmlog, "ibcm_write_service_record: "
2548 		    "ibmf_saa_update_service_record() : Failed - %d", rval);
2549 		return (ibcm_ibmf_analyze_error(rval));
2550 	} else
2551 		return (IBT_SUCCESS);
2552 }
2553 
2554 
2555 static void
2556 ibcm_rem_stale_srec(ibmf_saa_handle_t saa_handle, sa_service_record_t *srec)
2557 {
2558 	ibt_status_t		retval;
2559 	uint_t			num_found;
2560 	size_t			length;
2561 	sa_service_record_t	*srv_resp;
2562 	void			*results_p;
2563 	uint_t			i;
2564 	uint64_t		component_mask;
2565 	ibmf_saa_access_args_t	access_args;
2566 
2567 	component_mask =
2568 	    SA_SR_COMPMASK_PKEY | SA_SR_COMPMASK_NAME | SA_SR_COMPMASK_GID;
2569 
2570 	/* Call in SA Access retrieve routine to get Service Records. */
2571 	access_args.sq_attr_id = SA_SERVICERECORD_ATTRID;
2572 	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
2573 	access_args.sq_component_mask = component_mask;
2574 	access_args.sq_template = srec;
2575 	access_args.sq_template_length = sizeof (sa_service_record_t);
2576 	access_args.sq_callback = NULL;
2577 	access_args.sq_callback_arg = NULL;
2578 
2579 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
2580 	    &results_p);
2581 	if (retval != IBT_SUCCESS) {
2582 		IBTF_DPRINTF_L2(cmlog, "ibcm_rem_stale_srec: "
2583 		    "SA Access Failure");
2584 		return;
2585 	}
2586 
2587 	num_found = length / sizeof (sa_service_record_t);
2588 
2589 	if (num_found)
2590 		IBTF_DPRINTF_L3(cmlog, "ibcm_rem_stale_srec: "
2591 		    "Found %d matching Service Records.", num_found);
2592 
2593 	/* Validate the returned number of records. */
2594 	if ((results_p != NULL) && (num_found > 0)) {
2595 
2596 		/* Remove all the records. */
2597 		for (i = 0; i < num_found; i++) {
2598 
2599 			srv_resp = (sa_service_record_t *)
2600 			    ((uchar_t *)results_p +
2601 			    i * sizeof (sa_service_record_t));
2602 
2603 			/*
2604 			 * Found some matching records, but check out whether
2605 			 * this Record is really stale or just happens to match
2606 			 * the current session records. If yes, don't remove it.
2607 			 */
2608 			mutex_enter(&ibcm_svc_info_lock);
2609 			if (ibcm_find_svc_entry(srv_resp->ServiceID) != NULL) {
2610 				/* This record is NOT STALE. */
2611 				mutex_exit(&ibcm_svc_info_lock);
2612 				IBTF_DPRINTF_L3(cmlog, "ibcm_rem_stale_srec: "
2613 				    "This is not Stale, it's an active record");
2614 				continue;
2615 			}
2616 			mutex_exit(&ibcm_svc_info_lock);
2617 
2618 			IBTF_DPRINTF_L2(cmlog, "ibcm_rem_stale_srec: "
2619 			    "Removing Stale Rec: %s, %llX",
2620 			    srv_resp->ServiceName, srv_resp->ServiceID);
2621 
2622 			IBCM_DUMP_SERVICE_REC(srv_resp);
2623 
2624 			/*
2625 			 * Remove the Service Record Entry from SA.
2626 			 *
2627 			 * Get ServiceID info from Response Buf, other
2628 			 * attributes are already filled-in.
2629 			 */
2630 
2631 			 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(srec->ServiceID))
2632 
2633 			srec->ServiceID = srv_resp->ServiceID;
2634 
2635 			 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(srec->ServiceID))
2636 
2637 			(void) ibcm_write_service_record(saa_handle, srec,
2638 			    IBMF_SAA_DELETE);
2639 		}
2640 
2641 		/* Deallocate the memory for results_p. */
2642 		kmem_free(results_p, length);
2643 	}
2644 }
2645 
2646 
2647 
2648 /*
2649  * ibt_bind_service()
2650  *	Register a service with the IBCM
2651  *
2652  * INPUTS:
2653  *	srv_hdl		The service id handle returned to the client
2654  *			on an ibt_service_register() call.
2655  *
2656  *	gid		The GID to which to bind the service.
2657  *
2658  *	srv_bind	The address of a ibt_srv_bind_t that describes
2659  *			the service record.  This should be NULL if there
2660  *			is to be no service record.  This contains:
2661  *
2662  *		sb_lease	Lease period
2663  *		sb_pkey		Partition
2664  *		sb_name		pointer to ASCII string Service Name,
2665  *				NULL terminated.
2666  *		sb_key[]	Key to secure the service record.
2667  *		sb_data		Service Data structure (64-byte)
2668  *
2669  *	cm_private	First argument of Service handler.
2670  *
2671  *	sb_hdl_p	The address of a service bind handle, used
2672  *			to undo the service binding.
2673  *
2674  * ibt_bind_service() returns:
2675  *	IBT_SUCCESS		- added a service successfully.
2676  *	IBT_INVALID_PARAM	- invalid input parameter.
2677  *	IBT_CM_FAILURE		- failed to add the service.
2678  *	IBT_CM_SERVICE_EXISTS	- service already exists.
2679  */
2680 ibt_status_t
2681 ibt_bind_service(ibt_srv_hdl_t srv_hdl, ib_gid_t gid, ibt_srv_bind_t *srv_bind,
2682     void *cm_private, ibt_sbind_hdl_t *sb_hdl_p)
2683 {
2684 	ibt_status_t		status;
2685 	ibtl_cm_hca_port_t	port;
2686 	ibcm_svc_bind_t		*sbindp, *sbp;
2687 	ibcm_hca_info_t		*hcap;
2688 	ib_svc_id_t		sid, start_sid, end_sid;
2689 	ibmf_saa_handle_t	saa_handle;
2690 	sa_service_record_t	srv_rec;
2691 	uint16_t		pkey_ix;
2692 
2693 	if (sb_hdl_p != NULL)
2694 		*sb_hdl_p = NULL;	/* return value for error cases */
2695 
2696 	IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: srv_hdl %p, gid (%llX:%llX)",
2697 	    srv_hdl, (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid);
2698 
2699 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sbindp))
2700 
2701 	/* Call ibtl_cm_get_hca_port to get the port number and the HCA GUID. */
2702 	if ((status = ibtl_cm_get_hca_port(gid, 0, &port)) != IBT_SUCCESS) {
2703 		IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2704 		    "ibtl_cm_get_hca_port failed: %d", status);
2705 		return (status);
2706 	}
2707 	IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: Port:%d HCA GUID:%llX",
2708 	    port.hp_port, port.hp_hca_guid);
2709 
2710 	hcap = ibcm_find_hca_entry(port.hp_hca_guid);
2711 	if (hcap == NULL) {
2712 		IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: NO HCA found");
2713 		return (IBT_HCA_BUSY_DETACHING);
2714 	}
2715 	IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: hcap = %p", hcap);
2716 
2717 	if (srv_bind != NULL) {
2718 		saa_handle = ibcm_get_saa_handle(hcap, port.hp_port);
2719 		if (saa_handle == NULL) {
2720 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2721 			    "saa_handle is NULL");
2722 			ibcm_dec_hca_acc_cnt(hcap);
2723 			return (IBT_HCA_PORT_NOT_ACTIVE);
2724 		}
2725 		if (srv_bind->sb_pkey == 0) {
2726 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2727 			    "P_Key must not be 0");
2728 			ibcm_dec_hca_acc_cnt(hcap);
2729 			return (IBT_INVALID_PARAM);
2730 		}
2731 		if (strlen(srv_bind->sb_name) >= IB_SVC_NAME_LEN) {
2732 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2733 			    "Service Name is too long");
2734 			ibcm_dec_hca_acc_cnt(hcap);
2735 			return (IBT_INVALID_PARAM);
2736 		} else
2737 			IBTF_DPRINTF_L3(cmlog, "ibt_bind_service: "
2738 			    "Service Name='%s'", srv_bind->sb_name);
2739 		status = ibt_pkey2index_byguid(port.hp_hca_guid,
2740 		    port.hp_port, srv_bind->sb_pkey, &pkey_ix);
2741 		if (status != IBT_SUCCESS) {
2742 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2743 			    "P_Key 0x%x not found in P_Key_Table",
2744 			    srv_bind->sb_pkey);
2745 			ibcm_dec_hca_acc_cnt(hcap);
2746 			return (status);
2747 		}
2748 	}
2749 
2750 	/* assume success - allocate before locking */
2751 	sbindp = kmem_zalloc(sizeof (*sbindp), KM_SLEEP);
2752 	sbindp->sbind_cm_private = cm_private;
2753 	sbindp->sbind_gid = gid;
2754 	sbindp->sbind_hcaguid = port.hp_hca_guid;
2755 	sbindp->sbind_port = port.hp_port;
2756 
2757 	mutex_enter(&ibcm_svc_info_lock);
2758 
2759 	sbp = srv_hdl->svc_bind_list;
2760 	while (sbp != NULL) {
2761 		if (sbp->sbind_gid.gid_guid == gid.gid_guid &&
2762 		    sbp->sbind_gid.gid_prefix == gid.gid_prefix) {
2763 			if (srv_bind == NULL ||
2764 			    srv_bind->sb_pkey == sbp->sbind_pkey) {
2765 				IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2766 				    "failed: GID %llX:%llX and PKEY %x is "
2767 				    "already bound", gid.gid_prefix,
2768 				    gid.gid_guid, sbp->sbind_pkey);
2769 				mutex_exit(&ibcm_svc_info_lock);
2770 				ibcm_dec_hca_acc_cnt(hcap);
2771 				kmem_free(sbindp, sizeof (*sbindp));
2772 				return (IBT_CM_SERVICE_EXISTS);
2773 			}
2774 		}
2775 		sbp = sbp->sbind_link;
2776 	}
2777 	/* no entry found */
2778 
2779 	sbindp->sbind_link = srv_hdl->svc_bind_list;
2780 	srv_hdl->svc_bind_list = sbindp;
2781 
2782 	mutex_exit(&ibcm_svc_info_lock);
2783 
2784 	if (srv_bind != NULL) {
2785 		bzero(&srv_rec, sizeof (srv_rec));
2786 
2787 		srv_rec.ServiceLease =
2788 		    sbindp->sbind_lease = srv_bind->sb_lease;
2789 		srv_rec.ServiceP_Key =
2790 		    sbindp->sbind_pkey = srv_bind->sb_pkey;
2791 		srv_rec.ServiceKey_hi =
2792 		    sbindp->sbind_key[0] = srv_bind->sb_key[0];
2793 		srv_rec.ServiceKey_lo =
2794 		    sbindp->sbind_key[1] = srv_bind->sb_key[1];
2795 		(void) strcpy(sbindp->sbind_name, srv_bind->sb_name);
2796 		(void) strcpy((char *)srv_rec.ServiceName, srv_bind->sb_name);
2797 		srv_rec.ServiceGID = gid;
2798 
2799 		/*
2800 		 * Find out whether we have any stale Local Service records
2801 		 * matching the current attributes.  If yes, we shall try to
2802 		 * remove them from SA using the current request's ServiceKey.
2803 		 *
2804 		 * We will perform this operation only for Local Services, as
2805 		 * it is handled by SA automatically for WellKnown Services.
2806 		 *
2807 		 * Ofcourse, clients can specify NOT to do this clean-up by
2808 		 * setting IBT_SBIND_NO_CLEANUP flag (srv_bind->sb_flag).
2809 		 */
2810 		if ((srv_hdl->svc_id & IB_SID_AGN_LOCAL) &&
2811 		    (!(srv_bind->sb_flag & IBT_SBIND_NO_CLEANUP))) {
2812 			ibcm_rem_stale_srec(saa_handle, &srv_rec);
2813 		}
2814 
2815 		/* Handle endianess for service data. */
2816 		ibcm_swizzle_from_srv(&srv_bind->sb_data, sbindp->sbind_data);
2817 
2818 		bcopy(sbindp->sbind_data, srv_rec.ServiceData, IB_SVC_DATA_LEN);
2819 
2820 		/* insert srv record into the SA */
2821 		start_sid = srv_hdl->svc_id;
2822 		end_sid = start_sid + srv_hdl->svc_num_sids - 1;
2823 		for (sid = start_sid; sid <= end_sid; sid++) {
2824 
2825 			srv_rec.ServiceID = sid;
2826 
2827 			IBCM_DUMP_SERVICE_REC(&srv_rec);
2828 
2829 			IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: "
2830 			    "ibmf_saa_write_service_record, SvcId = %llX",
2831 			    (longlong_t)sid);
2832 
2833 			status = ibcm_write_service_record(saa_handle, &srv_rec,
2834 			    IBMF_SAA_UPDATE);
2835 			if (status != IBT_SUCCESS) {
2836 				IBTF_DPRINTF_L2(cmlog, "ibt_bind_service:"
2837 				    " ibcm_write_service_record fails %d, "
2838 				    "sid %llX", status, (longlong_t)sid);
2839 
2840 				if (sid != start_sid) {
2841 					/*
2842 					 * Bind failed while bind SID other than
2843 					 * first in the sid_range.  So we need
2844 					 * to unbind those, which are passed.
2845 					 *
2846 					 * Need to increment svc count to
2847 					 * compensate for ibt_unbind_service().
2848 					 */
2849 					ibcm_inc_hca_svc_cnt(hcap);
2850 					ibcm_dec_hca_acc_cnt(hcap);
2851 
2852 					(void) ibt_unbind_service(srv_hdl,
2853 					    sbindp);
2854 				} else {
2855 					ibcm_svc_bind_t		**sbpp;
2856 
2857 					/*
2858 					 * Bind failed for the first SID or the
2859 					 * only SID in question, then no need
2860 					 * to unbind, just free memory and
2861 					 * return error.
2862 					 */
2863 					mutex_enter(&ibcm_svc_info_lock);
2864 
2865 					sbpp = &srv_hdl->svc_bind_list;
2866 					sbp = *sbpp;
2867 					while (sbp != NULL) {
2868 						if (sbp == sbindp) {
2869 							*sbpp = sbp->sbind_link;
2870 							break;
2871 						}
2872 						sbpp = &sbp->sbind_link;
2873 						sbp = *sbpp;
2874 					}
2875 					mutex_exit(&ibcm_svc_info_lock);
2876 					ibcm_dec_hca_acc_cnt(hcap);
2877 
2878 					kmem_free(sbindp, sizeof (*sbindp));
2879 				}
2880 				return (status);
2881 			}
2882 		}
2883 	}
2884 	ibcm_inc_hca_svc_cnt(hcap);
2885 	ibcm_dec_hca_acc_cnt(hcap);
2886 
2887 	/* If this message isn't seen then ibt_bind_service failed */
2888 	IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: DONE (%p, %llX:%llX)",
2889 	    srv_hdl, gid.gid_prefix, gid.gid_guid);
2890 
2891 	if (sb_hdl_p != NULL)
2892 		*sb_hdl_p = sbindp;
2893 
2894 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sbindp))
2895 
2896 	return (IBT_SUCCESS);
2897 }
2898 
2899 ibt_status_t
2900 ibt_unbind_service(ibt_srv_hdl_t srv_hdl, ibt_sbind_hdl_t sbindp)
2901 {
2902 	ib_svc_id_t	sid, end_sid;
2903 	ibt_status_t	rval;
2904 	ibcm_hca_info_t	*hcap;
2905 	ibcm_svc_bind_t	*sbp, **sbpp;
2906 
2907 	IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service(%p, %p)",
2908 	    srv_hdl, sbindp);
2909 
2910 	hcap = ibcm_find_hca_entry(sbindp->sbind_hcaguid);
2911 
2912 	/* If there is a service on hca, respective hcap cannot go away */
2913 	ASSERT(hcap != NULL);
2914 
2915 	mutex_enter(&ibcm_svc_info_lock);
2916 
2917 	sbpp = &srv_hdl->svc_bind_list;
2918 	sbp = *sbpp;
2919 	while (sbp != NULL) {
2920 		if (sbp == sbindp) {
2921 			*sbpp = sbp->sbind_link;
2922 			break;
2923 		}
2924 		sbpp = &sbp->sbind_link;
2925 		sbp = *sbpp;
2926 	}
2927 	sid = srv_hdl->svc_id;
2928 	end_sid = srv_hdl->svc_id + srv_hdl->svc_num_sids - 1;
2929 	if (sbp != NULL)
2930 		while (sbp->sbind_rewrite_state == IBCM_REWRITE_BUSY)
2931 			cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
2932 	mutex_exit(&ibcm_svc_info_lock);
2933 
2934 	if (sbp == NULL) {
2935 		IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2936 		    "service binding not found: srv_hdl %p, srv_bind %p",
2937 		    srv_hdl, sbindp);
2938 		ibcm_dec_hca_acc_cnt(hcap);
2939 		return (IBT_INVALID_PARAM);
2940 	}
2941 
2942 	if (sbindp->sbind_pkey != 0) {	/* Are there service records? */
2943 		ibtl_cm_hca_port_t	port;
2944 		sa_service_record_t	srv_rec;
2945 		ibmf_saa_handle_t	saa_handle;
2946 		ibt_status_t		status;
2947 
2948 		/* get the default SGID of the port */
2949 		if ((status = ibtl_cm_get_hca_port(sbindp->sbind_gid, 0, &port))
2950 		    != IBT_SUCCESS) {
2951 			IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2952 			    "ibtl_cm_get_hca_port failed: %d", status);
2953 			/* we're done, but there may be stale service records */
2954 			goto done;
2955 		}
2956 
2957 		saa_handle = ibcm_get_saa_handle(hcap, port.hp_port);
2958 		if (saa_handle == NULL) {
2959 			IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2960 			    "saa_handle is NULL");
2961 			/* we're done, but there may be stale service records */
2962 			goto done;
2963 		}
2964 
2965 		/* Fill in fields of srv_rec */
2966 		bzero(&srv_rec, sizeof (srv_rec));
2967 
2968 		srv_rec.ServiceP_Key = sbindp->sbind_pkey;
2969 		srv_rec.ServiceKey_hi = sbindp->sbind_key[0];
2970 		srv_rec.ServiceKey_lo = sbindp->sbind_key[1];
2971 		srv_rec.ServiceGID = sbindp->sbind_gid;
2972 		(void) strcpy((char *)srv_rec.ServiceName, sbindp->sbind_name);
2973 
2974 		while (sid <= end_sid) {
2975 
2976 			srv_rec.ServiceID = sid;
2977 			IBCM_DUMP_SERVICE_REC(&srv_rec);
2978 
2979 			rval = ibcm_write_service_record(saa_handle, &srv_rec,
2980 			    IBMF_SAA_DELETE);
2981 
2982 			IBTF_DPRINTF_L4(cmlog, "ibt_unbind_service: "
2983 			    "ibcm_write_service_record rval = %d, SID %llx",
2984 			    rval, sid);
2985 			if (rval != IBT_SUCCESS) {
2986 				/* this is not considered a reason to fail */
2987 				IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2988 				    "ibcm_write_service_record fails %d, "
2989 				    "sid %llx", rval, sid);
2990 			}
2991 			sid++;
2992 		}
2993 	}
2994 done:
2995 	ibcm_dec_hca_svc_cnt(hcap);
2996 	ibcm_dec_hca_acc_cnt(hcap);
2997 	kmem_free(sbindp, sizeof (*sbindp));
2998 
2999 	/* If this message isn't seen then ibt_unbind_service failed */
3000 	IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: done !!");
3001 
3002 	return (IBT_SUCCESS);
3003 }
3004 
3005 /*
3006  * Simply pull off each binding from the list and unbind it.
3007  * If any of the unbind calls fail, we fail.
3008  */
3009 ibt_status_t
3010 ibt_unbind_all_services(ibt_srv_hdl_t srv_hdl)
3011 {
3012 	ibt_status_t	status;
3013 	ibcm_svc_bind_t	*sbp;
3014 
3015 	mutex_enter(&ibcm_svc_info_lock);
3016 	sbp = NULL;
3017 
3018 	/* this compare keeps the loop from being infinite */
3019 	while (sbp != srv_hdl->svc_bind_list) {
3020 		sbp = srv_hdl->svc_bind_list;
3021 		mutex_exit(&ibcm_svc_info_lock);
3022 		status = ibt_unbind_service(srv_hdl, sbp);
3023 		if (status != IBT_SUCCESS)
3024 			return (status);
3025 		mutex_enter(&ibcm_svc_info_lock);
3026 		if (srv_hdl->svc_bind_list == NULL)
3027 			break;
3028 	}
3029 	mutex_exit(&ibcm_svc_info_lock);
3030 	return (IBT_SUCCESS);
3031 }
3032 
3033 /*
3034  * ibt_deregister_service()
3035  *	Deregister a service with the IBCM
3036  *
3037  * INPUTS:
3038  *	ibt_hdl		The IBT client handle returned to the client
3039  *			on an ibt_attach() call.
3040  *
3041  *	srv_hdl		The address of a service identification handle, used
3042  *			to de-register a service.
3043  * RETURN VALUES:
3044  *	IBT_SUCCESS	on success (or respective failure on error)
3045  */
3046 ibt_status_t
3047 ibt_deregister_service(ibt_clnt_hdl_t ibt_hdl, ibt_srv_hdl_t srv_hdl)
3048 {
3049 	ibcm_svc_info_t		*svcp;
3050 	ibcm_svc_lookup_t	svc;
3051 
3052 	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service(%p (%s), %p)",
3053 	    ibt_hdl, ibtl_cm_get_clnt_name(ibt_hdl), srv_hdl);
3054 
3055 	mutex_enter(&ibcm_svc_info_lock);
3056 
3057 	if (srv_hdl->svc_bind_list != NULL) {
3058 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service:"
3059 		    " srv_hdl %p still has bindings", srv_hdl);
3060 		mutex_exit(&ibcm_svc_info_lock);
3061 		return (IBT_CM_SERVICE_BUSY);
3062 	}
3063 	svc.sid = srv_hdl->svc_id;
3064 	svc.num_sids = 1;
3065 	IBTF_DPRINTF_L3(cmlog, "ibt_deregister_service: SID 0x%llX, numsids %d",
3066 	    srv_hdl->svc_id, srv_hdl->svc_num_sids);
3067 
3068 #ifdef __lock_lint
3069 	ibcm_svc_compare(NULL, NULL);
3070 #endif
3071 	svcp = avl_find(&ibcm_svc_avl_tree, &svc, NULL);
3072 	if (svcp != srv_hdl) {
3073 		mutex_exit(&ibcm_svc_info_lock);
3074 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service(): "
3075 		    "srv_hdl %p not found", srv_hdl);
3076 		return (IBT_INVALID_PARAM);
3077 	}
3078 	avl_remove(&ibcm_svc_avl_tree, svcp);
3079 
3080 	/* wait for active REQ/SREQ handling to be done */
3081 	svcp->svc_to_delete = 1;
3082 	while (svcp->svc_ref_cnt != 0)
3083 		cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
3084 
3085 	mutex_exit(&ibcm_svc_info_lock);
3086 
3087 	if ((srv_hdl->svc_id & IB_SID_AGN_MASK) == IB_SID_AGN_LOCAL)
3088 		ibcm_free_local_sids(srv_hdl->svc_id, srv_hdl->svc_num_sids);
3089 
3090 	ibtl_cm_change_service_cnt(ibt_hdl, -srv_hdl->svc_num_sids);
3091 	kmem_free(srv_hdl, sizeof (*srv_hdl));
3092 
3093 	/* If this message isn't seen then ibt_deregister_service failed */
3094 	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service: done !!");
3095 
3096 	return (IBT_SUCCESS);
3097 }
3098 
3099 ibcm_status_t
3100 ibcm_ar_init(void)
3101 {
3102 	ib_svc_id_t	sid = IBCM_DAPL_ATS_SID;
3103 	ibcm_svc_info_t *tmp_svcp;
3104 
3105 	IBTF_DPRINTF_L3(cmlog, "ibcm_ar_init()");
3106 
3107 	/* remove this special SID from the pool of available SIDs */
3108 	if ((tmp_svcp = ibcm_create_svc_entry(sid, 1)) == NULL) {
3109 		IBTF_DPRINTF_L3(cmlog, "ibcm_ar_init: "
3110 		    "DAPL ATS SID 0x%llx already registered", (longlong_t)sid);
3111 		return (IBCM_FAILURE);
3112 	}
3113 	mutex_enter(&ibcm_svc_info_lock);
3114 	ibcm_ar_svcinfop = tmp_svcp;
3115 	ibcm_ar_list = NULL;	/* no address records registered yet */
3116 	mutex_exit(&ibcm_svc_info_lock);
3117 	return (IBCM_SUCCESS);
3118 }
3119 
3120 ibcm_status_t
3121 ibcm_ar_fini(void)
3122 {
3123 	ibcm_ar_t	*ar_list;
3124 	ibcm_svc_info_t	*tmp_svcp;
3125 
3126 	mutex_enter(&ibcm_svc_info_lock);
3127 	ar_list = ibcm_ar_list;
3128 
3129 	if (ar_list == NULL &&
3130 	    avl_numnodes(&ibcm_svc_avl_tree) == 1 &&
3131 	    avl_first(&ibcm_svc_avl_tree) == ibcm_ar_svcinfop) {
3132 		avl_remove(&ibcm_svc_avl_tree, ibcm_ar_svcinfop);
3133 		tmp_svcp = ibcm_ar_svcinfop;
3134 		mutex_exit(&ibcm_svc_info_lock);
3135 		kmem_free(tmp_svcp, sizeof (*ibcm_ar_svcinfop));
3136 		return (IBCM_SUCCESS);
3137 	}
3138 	mutex_exit(&ibcm_svc_info_lock);
3139 	return (IBCM_FAILURE);
3140 }
3141 
3142 
3143 /*
3144  * Return to the caller:
3145  *	IBT_SUCCESS		Found a perfect match.
3146  *				*arpp is set to the record.
3147  *	IBT_INCONSISTENT_AR	Found a record that's inconsistent.
3148  *	IBT_AR_NOT_REGISTERED	Found no record with same GID/pkey and
3149  *				found no record with same data.
3150  */
3151 static ibt_status_t
3152 ibcm_search_ar(ibt_ar_t *arp, ibcm_ar_t **arpp)
3153 {
3154 	ibcm_ar_t	*tmp;
3155 	int		i;
3156 
3157 	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
3158 	tmp = ibcm_ar_list;
3159 	while (tmp != NULL) {
3160 		if (tmp->ar.ar_gid.gid_prefix == arp->ar_gid.gid_prefix &&
3161 		    tmp->ar.ar_gid.gid_guid == arp->ar_gid.gid_guid &&
3162 		    tmp->ar.ar_pkey == arp->ar_pkey) {
3163 			for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++)
3164 				if (tmp->ar.ar_data[i] != arp->ar_data[i])
3165 					return (IBT_INCONSISTENT_AR);
3166 			*arpp = tmp;
3167 			return (IBT_SUCCESS);
3168 		} else {
3169 			/* if all the data bytes match, we have inconsistency */
3170 			for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++)
3171 				if (tmp->ar.ar_data[i] != arp->ar_data[i])
3172 					break;
3173 			if (i == IBCM_DAPL_ATS_NBYTES)
3174 				return (IBT_INCONSISTENT_AR);
3175 			/* try next address record */
3176 		}
3177 		tmp = tmp->ar_link;
3178 	}
3179 	return (IBT_AR_NOT_REGISTERED);
3180 }
3181 
3182 ibt_status_t
3183 ibt_register_ar(ibt_clnt_hdl_t ibt_hdl, ibt_ar_t *arp)
3184 {
3185 	ibcm_ar_t		*found;
3186 	ibcm_ar_t		*tmp;
3187 	ibt_status_t		status;
3188 	ibt_status_t		s1, s2;
3189 	char			*s;
3190 	ibcm_ar_ref_t		*hdlp;
3191 	ibcm_ar_t		*new;
3192 	ibcm_ar_t		**linkp;
3193 	ibtl_cm_hca_port_t	cm_port;
3194 	uint16_t		pkey_ix;
3195 	ibcm_hca_info_t		*hcap;
3196 	ibmf_saa_handle_t	saa_handle;
3197 	sa_service_record_t	*srv_recp;
3198 	uint64_t		gid_ored;
3199 
3200 	IBTF_DPRINTF_L3(cmlog, "ibt_register_ar: PKey 0x%X GID %llX:%llX",
3201 	    arp->ar_pkey, (longlong_t)arp->ar_gid.gid_prefix,
3202 	    (longlong_t)arp->ar_gid.gid_guid);
3203 
3204 	/*
3205 	 * If P_Key is 0, but GID is not, this query is invalid.
3206 	 * If GID is 0, but P_Key is not, this query is invalid.
3207 	 */
3208 	gid_ored = arp->ar_gid.gid_guid | arp->ar_gid.gid_prefix;
3209 	if ((arp->ar_pkey == 0 && gid_ored != 0ULL) ||
3210 	    (arp->ar_pkey != 0 && gid_ored == 0ULL)) {
3211 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
3212 		    "GID/P_Key is not valid");
3213 		return (IBT_INVALID_PARAM);
3214 	}
3215 
3216 	/* assume success, so these might be needed */
3217 	hdlp = kmem_alloc(sizeof (*hdlp), KM_SLEEP);
3218 	new = kmem_zalloc(sizeof (*new), KM_SLEEP);
3219 
3220 	mutex_enter(&ibcm_svc_info_lock);
3221 	/* search for existing GID/pkey (there can be at most 1) */
3222 	status = ibcm_search_ar(arp, &found);
3223 	if (status == IBT_INCONSISTENT_AR) {
3224 		mutex_exit(&ibcm_svc_info_lock);
3225 		kmem_free(new, sizeof (*new));
3226 		kmem_free(hdlp, sizeof (*hdlp));
3227 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
3228 		    "address record is inconsistent with a known one");
3229 		return (IBT_INCONSISTENT_AR);
3230 	} else if (status == IBT_SUCCESS) {
3231 		if (found->ar_flags == IBCM_AR_INITING) {
3232 			found->ar_waiters++;
3233 			cv_wait(&found->ar_cv, &ibcm_svc_info_lock);
3234 			found->ar_waiters--;
3235 		}
3236 		if (found->ar_flags == IBCM_AR_FAILED) {
3237 			if (found->ar_waiters == 0) {
3238 				cv_destroy(&found->ar_cv);
3239 				kmem_free(found, sizeof (*found));
3240 			}
3241 			mutex_exit(&ibcm_svc_info_lock);
3242 			kmem_free(new, sizeof (*new));
3243 			kmem_free(hdlp, sizeof (*hdlp));
3244 			return (ibt_get_module_failure(IBT_FAILURE_IBCM, 0));
3245 		}
3246 		hdlp->ar_ibt_hdl = ibt_hdl;
3247 		hdlp->ar_ref_link = found->ar_ibt_hdl_list;
3248 		found->ar_ibt_hdl_list = hdlp;
3249 		mutex_exit(&ibcm_svc_info_lock);
3250 		kmem_free(new, sizeof (*new));
3251 		ibtl_cm_change_service_cnt(ibt_hdl, 1);
3252 		return (IBT_SUCCESS);
3253 	} else {
3254 		ASSERT(status == IBT_AR_NOT_REGISTERED);
3255 	}
3256 	hdlp->ar_ref_link = NULL;
3257 	hdlp->ar_ibt_hdl = ibt_hdl;
3258 	new->ar_ibt_hdl_list = hdlp;
3259 	new->ar = *arp;
3260 	new->ar_flags = IBCM_AR_INITING;
3261 	new->ar_waiters = 0;
3262 	cv_init(&new->ar_cv, NULL, CV_DEFAULT, NULL);
3263 	new->ar_link = ibcm_ar_list;
3264 	ibcm_ar_list = new;
3265 
3266 	/* verify GID/pkey is valid for a local port, etc. */
3267 	hcap = NULL;
3268 	if ((s1 = ibtl_cm_get_hca_port(arp->ar_gid, 0, &cm_port))
3269 	    != IBT_SUCCESS ||
3270 	    (s2 = ibt_pkey2index_byguid(cm_port.hp_hca_guid, cm_port.hp_port,
3271 	    arp->ar_pkey, &pkey_ix)) != IBT_SUCCESS ||
3272 	    (hcap = ibcm_find_hca_entry(cm_port.hp_hca_guid)) == NULL) {
3273 		cv_destroy(&new->ar_cv);
3274 		ibcm_ar_list = new->ar_link;
3275 		mutex_exit(&ibcm_svc_info_lock);
3276 		kmem_free(new, sizeof (*new));
3277 		kmem_free(hdlp, sizeof (*hdlp));
3278 		status = IBT_INVALID_PARAM;
3279 		if (s1 == IBT_HCA_PORT_NOT_ACTIVE) {
3280 			s = "PORT DOWN";
3281 			status = IBT_HCA_PORT_NOT_ACTIVE;
3282 		} else if (s1 != IBT_SUCCESS)
3283 			s = "GID not found";
3284 		else if (s2 != IBT_SUCCESS)
3285 			s = "PKEY not found";
3286 		else
3287 			s = "CM could not find its HCA entry";
3288 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: %s, status = %d",
3289 		    s, status);
3290 		return (status);
3291 	}
3292 	mutex_exit(&ibcm_svc_info_lock);
3293 	saa_handle = ibcm_get_saa_handle(hcap, cm_port.hp_port);
3294 
3295 	/* create service record */
3296 	srv_recp = kmem_zalloc(sizeof (*srv_recp), KM_SLEEP);
3297 	srv_recp->ServiceLease = 0xFFFFFFFF;	/* infinite */
3298 	srv_recp->ServiceP_Key = arp->ar_pkey;
3299 	srv_recp->ServiceKey_hi = 0xDA410000ULL;	/* DAPL */
3300 	srv_recp->ServiceKey_lo = 0xA7500000ULL;	/* ATS */
3301 	(void) strcpy((char *)srv_recp->ServiceName, IBCM_DAPL_ATS_NAME);
3302 	srv_recp->ServiceGID = arp->ar_gid;
3303 	bcopy(arp->ar_data, srv_recp->ServiceData, IBCM_DAPL_ATS_NBYTES);
3304 	srv_recp->ServiceID = IBCM_DAPL_ATS_SID;
3305 
3306 	/* insert service record into the SA */
3307 
3308 	IBCM_DUMP_SERVICE_REC(srv_recp);
3309 
3310 	if (saa_handle != NULL)
3311 		status = ibcm_write_service_record(saa_handle, srv_recp,
3312 		    IBMF_SAA_UPDATE);
3313 	else
3314 		status = IBT_HCA_PORT_NOT_ACTIVE;
3315 
3316 	if (status != IBT_SUCCESS) {
3317 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: sa access fails %d, "
3318 		    "sid %llX", status, (longlong_t)srv_recp->ServiceID);
3319 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: FAILED for gid "
3320 		    "%llX:%llX pkey 0x%X", (longlong_t)arp->ar_gid.gid_prefix,
3321 		    (longlong_t)arp->ar_gid.gid_guid, arp->ar_pkey);
3322 
3323 		kmem_free(srv_recp, sizeof (*srv_recp));
3324 		kmem_free(hdlp, sizeof (*hdlp));
3325 
3326 		mutex_enter(&ibcm_svc_info_lock);
3327 		linkp = &ibcm_ar_list;
3328 		tmp = *linkp;
3329 		while (tmp != NULL) {
3330 			if (tmp == new) {
3331 				*linkp = new->ar_link;
3332 				break;
3333 			}
3334 			linkp = &tmp->ar_link;
3335 			tmp = *linkp;
3336 		}
3337 		if (new->ar_waiters > 0) {
3338 			new->ar_flags = IBCM_AR_FAILED;
3339 			cv_broadcast(&new->ar_cv);
3340 			mutex_exit(&ibcm_svc_info_lock);
3341 		} else {
3342 			cv_destroy(&new->ar_cv);
3343 			mutex_exit(&ibcm_svc_info_lock);
3344 			kmem_free(new, sizeof (*new));
3345 		}
3346 		ibcm_dec_hca_acc_cnt(hcap);
3347 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
3348 		    "IBMF_SAA failed to write address record");
3349 	} else {					/* SUCCESS */
3350 		uint8_t		*b;
3351 
3352 		IBTF_DPRINTF_L3(cmlog, "ibt_register_ar: SUCCESS for gid "
3353 		    "%llx:%llx pkey %x", (longlong_t)arp->ar_gid.gid_prefix,
3354 		    (longlong_t)arp->ar_gid.gid_guid, arp->ar_pkey);
3355 		b = arp->ar_data;
3356 
3357 		IBTF_DPRINTF_L3(cmlog, "ibt_register_ar:"
3358 		    " data %d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
3359 		    b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9],
3360 		    b[10], b[11], b[12], b[13], b[14], b[15]);
3361 		mutex_enter(&ibcm_svc_info_lock);
3362 		new->ar_srv_recp = srv_recp;
3363 		new->ar_saa_handle = saa_handle;
3364 		new->ar_port = cm_port.hp_port;
3365 		new->ar_hcap = hcap;
3366 		new->ar_flags = IBCM_AR_SUCCESS;
3367 		if (new->ar_waiters > 0)
3368 			cv_broadcast(&new->ar_cv);
3369 		mutex_exit(&ibcm_svc_info_lock);
3370 		ibtl_cm_change_service_cnt(ibt_hdl, 1);
3371 		/* do not call ibcm_dec_hca_acc_cnt(hcap) until deregister */
3372 	}
3373 	return (status);
3374 }
3375 
3376 ibt_status_t
3377 ibt_deregister_ar(ibt_clnt_hdl_t ibt_hdl, ibt_ar_t *arp)
3378 {
3379 	ibcm_ar_t		*found;
3380 	ibcm_ar_t		*tmp;
3381 	ibcm_ar_t		**linkp;
3382 	ibcm_ar_ref_t		*hdlp;
3383 	ibcm_ar_ref_t		**hdlpp;
3384 	ibt_status_t		status;
3385 	ibmf_saa_handle_t	saa_handle;
3386 	sa_service_record_t	*srv_recp;
3387 	uint64_t		gid_ored;
3388 
3389 	IBTF_DPRINTF_L3(cmlog, "ibt_deregister_ar: pkey %x", arp->ar_pkey);
3390 	IBTF_DPRINTF_L3(cmlog, "ibt_deregister_ar: gid %llx:%llx",
3391 	    (longlong_t)arp->ar_gid.gid_prefix,
3392 	    (longlong_t)arp->ar_gid.gid_guid);
3393 
3394 	/*
3395 	 * If P_Key is 0, but GID is not, this query is invalid.
3396 	 * If GID is 0, but P_Key is not, this query is invalid.
3397 	 */
3398 	gid_ored = arp->ar_gid.gid_guid | arp->ar_gid.gid_prefix;
3399 	if ((arp->ar_pkey == 0 && gid_ored != 0ULL) ||
3400 	    (arp->ar_pkey != 0 && gid_ored == 0ULL)) {
3401 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3402 		    "GID/P_Key is not valid");
3403 		return (IBT_INVALID_PARAM);
3404 	}
3405 
3406 	mutex_enter(&ibcm_svc_info_lock);
3407 	/* search for existing GID/pkey (there can be at most 1) */
3408 	status = ibcm_search_ar(arp, &found);
3409 	if (status == IBT_INCONSISTENT_AR || status == IBT_AR_NOT_REGISTERED) {
3410 		mutex_exit(&ibcm_svc_info_lock);
3411 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3412 		    "address record not found");
3413 		return (IBT_AR_NOT_REGISTERED);
3414 	}
3415 	ASSERT(status == IBT_SUCCESS);
3416 
3417 	hdlpp = &found->ar_ibt_hdl_list;
3418 	hdlp = *hdlpp;
3419 	while (hdlp != NULL) {
3420 		if (hdlp->ar_ibt_hdl == ibt_hdl)
3421 			break;
3422 		hdlpp = &hdlp->ar_ref_link;
3423 		hdlp = *hdlpp;
3424 	}
3425 	if (hdlp == NULL) {	/* could not find ibt_hdl on list */
3426 		mutex_exit(&ibcm_svc_info_lock);
3427 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3428 		    "address record found, but not for this client");
3429 		return (IBT_AR_NOT_REGISTERED);
3430 	}
3431 	*hdlpp = hdlp->ar_ref_link;	/* remove ref for this client */
3432 	if (found->ar_ibt_hdl_list == NULL && found->ar_waiters == 0) {
3433 		/* last entry was removed */
3434 		found->ar_flags = IBCM_AR_INITING; /* hold off register_ar */
3435 		saa_handle = found->ar_saa_handle;
3436 		srv_recp = found->ar_srv_recp;
3437 
3438 		/* wait if this service record is being rewritten */
3439 		while (found->ar_rewrite_state == IBCM_REWRITE_BUSY)
3440 			cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
3441 		mutex_exit(&ibcm_svc_info_lock);
3442 
3443 		/* remove service record */
3444 		status = ibcm_write_service_record(saa_handle, srv_recp,
3445 		    IBMF_SAA_DELETE);
3446 		if (status != IBT_SUCCESS)
3447 			IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3448 			    "IBMF_SAA failed to delete address record");
3449 		mutex_enter(&ibcm_svc_info_lock);
3450 		if (found->ar_waiters == 0) {	/* still no waiters */
3451 			linkp = &ibcm_ar_list;
3452 			tmp = *linkp;
3453 			while (tmp != found) {
3454 				linkp = &tmp->ar_link;
3455 				tmp = *linkp;
3456 			}
3457 			*linkp = tmp->ar_link;
3458 			ibcm_dec_hca_acc_cnt(found->ar_hcap);
3459 			kmem_free(srv_recp, sizeof (*srv_recp));
3460 			cv_destroy(&found->ar_cv);
3461 			kmem_free(found, sizeof (*found));
3462 		} else {
3463 			/* add service record back in for the waiters */
3464 			mutex_exit(&ibcm_svc_info_lock);
3465 			status = ibcm_write_service_record(saa_handle, srv_recp,
3466 			    IBMF_SAA_UPDATE);
3467 			mutex_enter(&ibcm_svc_info_lock);
3468 			if (status == IBT_SUCCESS)
3469 				found->ar_flags = IBCM_AR_SUCCESS;
3470 			else {
3471 				found->ar_flags = IBCM_AR_FAILED;
3472 				IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3473 				    "IBMF_SAA failed to write address record");
3474 			}
3475 			cv_broadcast(&found->ar_cv);
3476 		}
3477 	}
3478 	mutex_exit(&ibcm_svc_info_lock);
3479 	kmem_free(hdlp, sizeof (*hdlp));
3480 	ibtl_cm_change_service_cnt(ibt_hdl, -1);
3481 	return (status);
3482 }
3483 
3484 ibt_status_t
3485 ibt_query_ar(ib_gid_t *sgid, ibt_ar_t *queryp, ibt_ar_t *resultp)
3486 {
3487 	sa_service_record_t	svcrec_req;
3488 	sa_service_record_t	*svcrec_resp;
3489 	void			*results_p;
3490 	uint64_t		component_mask = 0;
3491 	uint64_t		gid_ored;
3492 	size_t			length;
3493 	int			num_rec;
3494 	int			i;
3495 	ibmf_saa_access_args_t	access_args;
3496 	ibt_status_t		retval;
3497 	ibtl_cm_hca_port_t	cm_port;
3498 	ibcm_hca_info_t		*hcap;
3499 	ibmf_saa_handle_t	saa_handle;
3500 
3501 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar(%p, %p)", queryp, resultp);
3502 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: sgid %llx:%llx",
3503 	    (longlong_t)sgid->gid_prefix, (longlong_t)sgid->gid_guid);
3504 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: query_pkey %x", queryp->ar_pkey);
3505 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: query_gid %llx:%llx",
3506 	    (longlong_t)queryp->ar_gid.gid_prefix,
3507 	    (longlong_t)queryp->ar_gid.gid_guid);
3508 
3509 	/*
3510 	 * If P_Key is 0, but GID is not, this query is invalid.
3511 	 * If GID is 0, but P_Key is not, this query is invalid.
3512 	 */
3513 	gid_ored = queryp->ar_gid.gid_guid | queryp->ar_gid.gid_prefix;
3514 	if ((queryp->ar_pkey == 0 && gid_ored != 0ULL) ||
3515 	    (queryp->ar_pkey != 0 && gid_ored == 0ULL)) {
3516 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: GID/P_Key is not valid");
3517 		return (IBT_INVALID_PARAM);
3518 	}
3519 
3520 	hcap = NULL;
3521 	if (ibtl_cm_get_hca_port(*sgid, 0, &cm_port) != IBT_SUCCESS ||
3522 	    (hcap = ibcm_find_hca_entry(cm_port.hp_hca_guid)) == NULL ||
3523 	    (saa_handle = ibcm_get_saa_handle(hcap, cm_port.hp_port)) == NULL) {
3524 		if (hcap != NULL)
3525 			ibcm_dec_hca_acc_cnt(hcap);
3526 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: sgid is not valid");
3527 		return (IBT_INVALID_PARAM);
3528 	}
3529 
3530 	bzero(&svcrec_req, sizeof (svcrec_req));
3531 
3532 	/* Is GID/P_Key Specified. */
3533 	if (queryp->ar_pkey != 0) {	/* GID is non-zero from check above */
3534 		svcrec_req.ServiceP_Key = queryp->ar_pkey;
3535 		component_mask |= SA_SR_COMPMASK_PKEY;
3536 		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: P_Key %X",
3537 		    queryp->ar_pkey);
3538 		svcrec_req.ServiceGID = queryp->ar_gid;
3539 		component_mask |= SA_SR_COMPMASK_GID;
3540 		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: GID %llX:%llX",
3541 		    (longlong_t)queryp->ar_gid.gid_prefix,
3542 		    (longlong_t)queryp->ar_gid.gid_guid);
3543 	}
3544 
3545 	/* Is ServiceData Specified. */
3546 	for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++) {
3547 		if (queryp->ar_data[i] != 0) {
3548 			bcopy(queryp->ar_data, svcrec_req.ServiceData,
3549 			    IBCM_DAPL_ATS_NBYTES);
3550 			component_mask |= 0xFFFF << 7;	/* all 16 Data8 */
3551 							/* components */
3552 			break;
3553 		}
3554 	}
3555 
3556 	/* Service Name */
3557 	(void) strcpy((char *)svcrec_req.ServiceName, IBCM_DAPL_ATS_NAME);
3558 	component_mask |= SA_SR_COMPMASK_NAME;
3559 
3560 	svcrec_req.ServiceID = IBCM_DAPL_ATS_SID;
3561 	component_mask |= SA_SR_COMPMASK_ID;
3562 
3563 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: "
3564 	    "Perform SA Access: Mask: 0x%X", component_mask);
3565 
3566 	/*
3567 	 * Call in SA Access retrieve routine to get Service Records.
3568 	 *
3569 	 * SA Access framework allocated memory for the "results_p".
3570 	 * Make sure to deallocate once we are done with the results_p.
3571 	 * The size of the buffer allocated will be as returned in
3572 	 * "length" field.
3573 	 */
3574 	access_args.sq_attr_id = SA_SERVICERECORD_ATTRID;
3575 	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
3576 	access_args.sq_component_mask = component_mask;
3577 	access_args.sq_template = &svcrec_req;
3578 	access_args.sq_template_length = sizeof (sa_service_record_t);
3579 	access_args.sq_callback = NULL;
3580 	access_args.sq_callback_arg = NULL;
3581 
3582 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
3583 	    &results_p);
3584 
3585 	ibcm_dec_hca_acc_cnt(hcap);
3586 	if (retval != IBT_SUCCESS) {
3587 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: SA Access Failed");
3588 		return (retval);
3589 	}
3590 
3591 	num_rec = length / sizeof (sa_service_record_t);
3592 
3593 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: "
3594 	    "Found %d Service Records.", num_rec);
3595 
3596 	/* Validate the returned number of records. */
3597 	if ((results_p != NULL) && (num_rec > 0)) {
3598 		uint8_t		*b;
3599 
3600 		/* Just return info from the first service record. */
3601 		svcrec_resp = (sa_service_record_t *)results_p;
3602 
3603 		/* The Service GID and Service ID */
3604 		resultp->ar_gid = svcrec_resp->ServiceGID;
3605 		resultp->ar_pkey = svcrec_resp->ServiceP_Key;
3606 		bcopy(svcrec_resp->ServiceData,
3607 		    resultp->ar_data, IBCM_DAPL_ATS_NBYTES);
3608 
3609 		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: "
3610 		    "Found: pkey %x dgid %llX:%llX", resultp->ar_pkey,
3611 		    (longlong_t)resultp->ar_gid.gid_prefix,
3612 		    (longlong_t)resultp->ar_gid.gid_guid);
3613 		b = resultp->ar_data;
3614 		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar:"
3615 		    " data %d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
3616 		    b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9],
3617 		    b[10], b[11], b[12], b[13], b[14], b[15]);
3618 
3619 		/* Deallocate the memory for results_p. */
3620 		kmem_free(results_p, length);
3621 		if (num_rec > 1)
3622 			retval = IBT_MULTIPLE_AR;
3623 		else
3624 			retval = IBT_SUCCESS;
3625 	} else {
3626 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: "
3627 		    "ibmf_sa_access found 0 matching records");
3628 		retval = IBT_AR_NOT_REGISTERED;
3629 	}
3630 	return (retval);
3631 }
3632 
3633 /* mark all ATS service records associated with the port */
3634 static void
3635 ibcm_mark_ar(ib_guid_t hca_guid, uint8_t port)
3636 {
3637 	ibcm_ar_t	*tmp;
3638 
3639 	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
3640 	for (tmp = ibcm_ar_list; tmp != NULL; tmp = tmp->ar_link) {
3641 		if (tmp->ar_hcap == NULL)
3642 			continue;
3643 		if (tmp->ar_hcap->hca_guid == hca_guid &&
3644 		    tmp->ar_port == port) {
3645 			/* even if it's busy, we mark it for rewrite */
3646 			tmp->ar_rewrite_state = IBCM_REWRITE_NEEDED;
3647 		}
3648 	}
3649 }
3650 
3651 /* rewrite all ATS service records */
3652 static int
3653 ibcm_rewrite_ar(void)
3654 {
3655 	ibcm_ar_t		*tmp;
3656 	ibmf_saa_handle_t	saa_handle;
3657 	sa_service_record_t	*srv_recp;
3658 	ibt_status_t		rval;
3659 	int			did_something = 0;
3660 
3661 	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
3662 check_for_work:
3663 	for (tmp = ibcm_ar_list; tmp != NULL; tmp = tmp->ar_link) {
3664 		if (tmp->ar_rewrite_state == IBCM_REWRITE_NEEDED) {
3665 			tmp->ar_rewrite_state = IBCM_REWRITE_BUSY;
3666 			saa_handle = tmp->ar_saa_handle;
3667 			srv_recp = tmp->ar_srv_recp;
3668 			mutex_exit(&ibcm_svc_info_lock);
3669 			IBTF_DPRINTF_L3(cmlog, "ibcm_rewrite_ar: "
3670 			    "rewriting ar @ %p", tmp);
3671 			did_something = 1;
3672 			rval = ibcm_write_service_record(saa_handle, srv_recp,
3673 			    IBMF_SAA_UPDATE);
3674 			if (rval != IBT_SUCCESS)
3675 				IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_ar: "
3676 				    "ibcm_write_service_record failed: "
3677 				    "status = %d", rval);
3678 			mutex_enter(&ibcm_svc_info_lock);
3679 			/* if it got marked again, then we want to rewrite */
3680 			if (tmp->ar_rewrite_state == IBCM_REWRITE_BUSY)
3681 				tmp->ar_rewrite_state = IBCM_REWRITE_IDLE;
3682 			/* in case there was a waiter... */
3683 			cv_broadcast(&ibcm_svc_info_cv);
3684 			goto check_for_work;
3685 		}
3686 	}
3687 	return (did_something);
3688 }
3689 
3690 static void
3691 ibcm_rewrite_svc_record(ibcm_svc_info_t *srv_hdl, ibcm_svc_bind_t *sbindp)
3692 {
3693 	ibcm_hca_info_t		*hcap;
3694 	ib_svc_id_t		sid, start_sid, end_sid;
3695 	ibmf_saa_handle_t	saa_handle;
3696 	sa_service_record_t	srv_rec;
3697 	ibt_status_t		rval;
3698 
3699 	hcap = ibcm_find_hca_entry(sbindp->sbind_hcaguid);
3700 	if (hcap == NULL) {
3701 		IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record: "
3702 		    "NO HCA found for HCA GUID %llX", sbindp->sbind_hcaguid);
3703 		return;
3704 	}
3705 
3706 	saa_handle = ibcm_get_saa_handle(hcap, sbindp->sbind_port);
3707 	if (saa_handle == NULL) {
3708 		IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record: "
3709 		    "saa_handle is NULL");
3710 		ibcm_dec_hca_acc_cnt(hcap);
3711 		return;
3712 	}
3713 
3714 	IBTF_DPRINTF_L3(cmlog, "ibcm_rewrite_svc_record: "
3715 	    "rewriting svc '%s', port_guid = %llX", sbindp->sbind_name,
3716 	    sbindp->sbind_gid.gid_guid);
3717 
3718 	bzero(&srv_rec, sizeof (srv_rec));
3719 
3720 	srv_rec.ServiceLease = sbindp->sbind_lease;
3721 	srv_rec.ServiceP_Key = sbindp->sbind_pkey;
3722 	srv_rec.ServiceKey_hi = sbindp->sbind_key[0];
3723 	srv_rec.ServiceKey_lo = sbindp->sbind_key[1];
3724 	(void) strcpy((char *)srv_rec.ServiceName, sbindp->sbind_name);
3725 	srv_rec.ServiceGID = sbindp->sbind_gid;
3726 
3727 	bcopy(sbindp->sbind_data, srv_rec.ServiceData, IB_SVC_DATA_LEN);
3728 
3729 	/* insert srv record into the SA */
3730 	start_sid = srv_hdl->svc_id;
3731 	end_sid = start_sid + srv_hdl->svc_num_sids - 1;
3732 	for (sid = start_sid; sid <= end_sid; sid++) {
3733 		srv_rec.ServiceID = sid;
3734 
3735 		rval = ibcm_write_service_record(saa_handle, &srv_rec,
3736 		    IBMF_SAA_UPDATE);
3737 
3738 		IBTF_DPRINTF_L4(cmlog, "ibcm_rewrite_svc_record: "
3739 		    "ibcm_write_service_record, SvcId = %llX, "
3740 		    "rval = %d", (longlong_t)sid, rval);
3741 		if (rval != IBT_SUCCESS) {
3742 			IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record:"
3743 			    " ibcm_write_service_record fails %d sid %llX",
3744 			    rval, (longlong_t)sid);
3745 		}
3746 	}
3747 	ibcm_dec_hca_acc_cnt(hcap);
3748 }
3749 
3750 /*
3751  * Task to mark all service records as needing to be rewritten to the SM/SA.
3752  * This task does not return until all of them have been rewritten.
3753  */
3754 void
3755 ibcm_service_record_rewrite_task(void *arg)
3756 {
3757 	ibcm_port_up_t	*pup = (ibcm_port_up_t *)arg;
3758 	ib_guid_t	hca_guid = pup->pup_hca_guid;
3759 	uint8_t		port = pup->pup_port;
3760 	ibcm_svc_info_t	*svcp;
3761 	ibcm_svc_bind_t	*sbp;
3762 	avl_tree_t	*avl_tree = &ibcm_svc_avl_tree;
3763 	static int	task_is_running = 0;
3764 
3765 	IBTF_DPRINTF_L3(cmlog, "ibcm_service_record_rewrite_task STARTED "
3766 	    "for hca_guid %llX, port %d", hca_guid, port);
3767 
3768 	mutex_enter(&ibcm_svc_info_lock);
3769 	ibcm_mark_ar(hca_guid, port);
3770 	for (svcp = avl_first(avl_tree); svcp != NULL;
3771 	    svcp = avl_walk(avl_tree, svcp, AVL_AFTER)) {
3772 		sbp = svcp->svc_bind_list;
3773 		while (sbp != NULL) {
3774 			if (sbp->sbind_pkey != 0 &&
3775 			    sbp->sbind_port == port &&
3776 			    sbp->sbind_hcaguid == hca_guid) {
3777 				/* even if it's busy, we mark it for rewrite */
3778 				sbp->sbind_rewrite_state = IBCM_REWRITE_NEEDED;
3779 			}
3780 			sbp = sbp->sbind_link;
3781 		}
3782 	}
3783 	if (task_is_running) {
3784 		/* let the other task thread finish the work */
3785 		mutex_exit(&ibcm_svc_info_lock);
3786 		return;
3787 	}
3788 	task_is_running = 1;
3789 
3790 	(void) ibcm_rewrite_ar();
3791 
3792 check_for_work:
3793 	for (svcp = avl_first(avl_tree); svcp != NULL;
3794 	    svcp = avl_walk(avl_tree, svcp, AVL_AFTER)) {
3795 		sbp = svcp->svc_bind_list;
3796 		while (sbp != NULL) {
3797 			if (sbp->sbind_rewrite_state == IBCM_REWRITE_NEEDED) {
3798 				sbp->sbind_rewrite_state = IBCM_REWRITE_BUSY;
3799 				mutex_exit(&ibcm_svc_info_lock);
3800 				ibcm_rewrite_svc_record(svcp, sbp);
3801 				mutex_enter(&ibcm_svc_info_lock);
3802 				/* if it got marked again, we want to rewrite */
3803 				if (sbp->sbind_rewrite_state ==
3804 				    IBCM_REWRITE_BUSY)
3805 					sbp->sbind_rewrite_state =
3806 					    IBCM_REWRITE_IDLE;
3807 				/* in case there was a waiter... */
3808 				cv_broadcast(&ibcm_svc_info_cv);
3809 				goto check_for_work;
3810 			}
3811 			sbp = sbp->sbind_link;
3812 		}
3813 	}
3814 	/*
3815 	 * If there were no service records to write, and we failed to
3816 	 * have to rewrite any more ATS service records, then we're done.
3817 	 */
3818 	if (ibcm_rewrite_ar() != 0)
3819 		goto check_for_work;
3820 	task_is_running = 0;
3821 	mutex_exit(&ibcm_svc_info_lock);
3822 
3823 	IBTF_DPRINTF_L3(cmlog, "ibcm_service_record_rewrite_task DONE");
3824 	kmem_free(pup, sizeof (ibcm_port_up_t));
3825 }
3826 
3827 ibt_status_t
3828 ibt_ofuvcm_get_req_data(void *session_id, ibt_ofuvcm_req_data_t *req_data)
3829 {
3830 	ibcm_state_data_t 	*statep = (ibcm_state_data_t *)session_id;
3831 	ibcm_req_msg_t 		*req_msgp;
3832 
3833 	IBTF_DPRINTF_L3(cmlog, "ibt_get_ofuvcm_req_data: session_id %p",
3834 	    session_id);
3835 	mutex_enter(&statep->state_mutex);
3836 	if ((statep->state != IBCM_STATE_REQ_RCVD) &&
3837 	    (statep->state != IBCM_STATE_MRA_SENT)) {
3838 		IBTF_DPRINTF_L2(cmlog, "ibt_get_ofuvcm_req_data: Invalid "
3839 		    "State %x", statep->state);
3840 		mutex_exit(&statep->state_mutex);
3841 		return (IBT_CHAN_STATE_INVALID);
3842 	}
3843 	if (statep->mode == IBCM_ACTIVE_MODE) {
3844 		IBTF_DPRINTF_L2(cmlog, "ibt_get_ofuvcm_req_data: Active mode "
3845 		    "not supported");
3846 		mutex_exit(&statep->state_mutex);
3847 		return (IBT_INVALID_PARAM);
3848 	}
3849 	ASSERT(statep->req_msgp);
3850 
3851 	/*
3852 	 * Fill in the additional req message values reqired for
3853 	 * RTR transition.
3854 	 * Should the PSN be same as the active side??
3855 	 */
3856 	req_msgp = (ibcm_req_msg_t *)statep->req_msgp;
3857 	req_data->req_rnr_nak_time = ibcm_default_rnr_nak_time;
3858 	req_data->req_path_mtu = req_msgp->req_mtu_plus >> 4;
3859 	req_data->req_rq_psn = b2h32(req_msgp->req_starting_psn_plus) >> 8;
3860 	mutex_exit(&statep->state_mutex);
3861 	return (IBT_SUCCESS);
3862 }
3863 
3864 ibt_status_t
3865 ibt_ofuvcm_proceed(ibt_cm_event_type_t event, void *session_id,
3866     ibt_cm_status_t status, ibt_cm_proceed_reply_t *cm_event_data,
3867     void *priv_data, ibt_priv_data_len_t priv_data_len)
3868 {
3869 	ibcm_state_data_t *statep = (ibcm_state_data_t *)session_id;
3870 	ibt_status_t		ret;
3871 
3872 	IBTF_DPRINTF_L3(cmlog, "ibt_ofuvcm_proceed chan 0x%p event %x "
3873 	    "status %x session_id %p", statep->channel, event, status,
3874 	    session_id);
3875 
3876 	IBTF_DPRINTF_L5(cmlog, "ibt_ofuvcm_proceed chan 0x%p "
3877 	    "cm_event_data %p, priv_data %p priv_data_len %x",
3878 	    statep->channel, cm_event_data, priv_data, priv_data_len);
3879 
3880 	/* validate session_id and status */
3881 	if ((statep == NULL) || (status == IBT_CM_DEFER)) {
3882 		IBTF_DPRINTF_L2(cmlog, "ibt_ofuvcm_proceed : Invalid Args");
3883 		return (IBT_INVALID_PARAM);
3884 	}
3885 
3886 	if (event != IBT_CM_EVENT_REQ_RCV) {
3887 		IBTF_DPRINTF_L2(cmlog, "ibt_ofuvcm_proceed : only for REQ_RCV");
3888 		return (IBT_INVALID_PARAM);
3889 	}
3890 	mutex_enter(&statep->state_mutex);
3891 	statep->is_this_ofuv_chan = B_TRUE;
3892 	mutex_exit(&statep->state_mutex);
3893 
3894 	ret = ibt_cm_proceed(event, session_id, status, cm_event_data,
3895 	    priv_data, priv_data_len);
3896 	return (ret);
3897 }
3898 
3899 /*
3900  * Function:
3901  * 	ibt_cm_proceed
3902  *
3903  * Verifies the arguments and dispatches the cm state machine processing
3904  * via taskq
3905  */
3906 
3907 ibt_status_t
3908 ibt_cm_proceed(ibt_cm_event_type_t event, void *session_id,
3909     ibt_cm_status_t status, ibt_cm_proceed_reply_t *cm_event_data,
3910     void *priv_data, ibt_priv_data_len_t priv_data_len)
3911 {
3912 	ibcm_state_data_t *statep = (ibcm_state_data_t *)session_id;
3913 	ibcm_proceed_targs_t	*proceed_targs;
3914 	ibcm_proceed_error_t	proceed_error;
3915 
3916 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_proceed chan 0x%p event %x status %x "
3917 	    "session_id %p", statep->channel, event, status, session_id);
3918 
3919 	IBTF_DPRINTF_L5(cmlog, "ibt_cm_proceed chan 0x%p cm_event_data %p, "
3920 	    "priv_data %p priv_data_len %x", statep->channel, cm_event_data,
3921 	    priv_data, priv_data_len);
3922 
3923 	/* validate session_id and status */
3924 	if ((statep == NULL) || (status == IBT_CM_DEFER)) {
3925 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : Invalid Args");
3926 		return (IBT_INVALID_PARAM);
3927 	}
3928 
3929 	/* If priv data len specified, then priv_data cannot be NULL */
3930 	if ((priv_data_len > 0) && (priv_data == NULL))
3931 		return (IBT_INVALID_PARAM);
3932 
3933 	proceed_error = IBCM_PROCEED_INVALID_NONE;
3934 
3935 	mutex_enter(&statep->state_mutex);
3936 	if (event == IBT_CM_EVENT_REQ_RCV) {
3937 
3938 		if ((statep->state != IBCM_STATE_REQ_RCVD) &&
3939 		    (statep->state != IBCM_STATE_MRA_SENT))
3940 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3941 		else if (priv_data_len > IBT_REP_PRIV_DATA_SZ)
3942 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3943 
3944 	} else if (event == IBT_CM_EVENT_REP_RCV) {
3945 		if ((statep->state != IBCM_STATE_REP_RCVD) &&
3946 		    (statep->state != IBCM_STATE_MRA_REP_SENT))
3947 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3948 		else if (priv_data_len > IBT_RTU_PRIV_DATA_SZ)
3949 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3950 	} else if (event == IBT_CM_EVENT_LAP_RCV) {
3951 		if ((statep->ap_state != IBCM_AP_STATE_LAP_RCVD) &&
3952 		    (statep->ap_state != IBCM_AP_STATE_MRA_LAP_SENT))
3953 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3954 		else if (priv_data_len > IBT_APR_PRIV_DATA_SZ)
3955 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3956 	} else if (event == IBT_CM_EVENT_CONN_CLOSED) {
3957 		if (statep->state != IBCM_STATE_DREQ_RCVD)
3958 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3959 		else if (priv_data_len > IBT_DREP_PRIV_DATA_SZ)
3960 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3961 	} else {
3962 			proceed_error = IBCM_PROCEED_INVALID_EVENT;
3963 	}
3964 
3965 	/* if there is an error, print an error message and return */
3966 	if (proceed_error != IBCM_PROCEED_INVALID_NONE) {
3967 		mutex_exit(&statep->state_mutex);
3968 		if (proceed_error == IBCM_PROCEED_INVALID_EVENT_STATE) {
3969 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3970 			    "Invalid Event/State combination specified",
3971 			    statep->channel);
3972 			return (IBT_INVALID_PARAM);
3973 		} else if (proceed_error == IBCM_PROCEED_INVALID_PRIV_SZ) {
3974 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3975 			    "Invalid Event/priv len combination specified",
3976 			    statep->channel);
3977 			return (IBT_INVALID_PARAM);
3978 		} else if (proceed_error == IBCM_PROCEED_INVALID_EVENT) {
3979 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3980 			    "Invalid Event specified", statep->channel);
3981 			return (IBT_INVALID_PARAM);
3982 		} else {
3983 			ASSERT(proceed_error == IBCM_PROCEED_INVALID_LAP);
3984 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3985 			    "IBT_CM_EVENT_LAP_RCV not supported",
3986 			    statep->channel);
3987 			/* UNTIL HCA DRIVER ENABLES AP SUPPORT, FAIL THE CALL */
3988 			return (IBT_APM_NOT_SUPPORTED);
3989 		}
3990 	}
3991 
3992 
3993 	/* wait until client's CM handler returns DEFER status back to CM */
3994 
3995 	while (statep->clnt_proceed == IBCM_BLOCK) {
3996 		IBTF_DPRINTF_L5(cmlog, "ibt_cm_proceed : chan 0x%p blocked for "
3997 		    "return of client's cm handler", statep->channel);
3998 		cv_wait(&statep->block_client_cv, &statep->state_mutex);
3999 	}
4000 
4001 	if (statep->clnt_proceed == IBCM_FAIL) {
4002 		mutex_exit(&statep->state_mutex);
4003 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p Failed as "
4004 		    "client returned non-DEFER status from cm handler",
4005 		    statep->channel);
4006 		return (IBT_CHAN_STATE_INVALID);
4007 	}
4008 
4009 	ASSERT(statep->clnt_proceed == IBCM_UNBLOCK);
4010 	statep->clnt_proceed = IBCM_FAIL;
4011 	mutex_exit(&statep->state_mutex);
4012 
4013 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*proceed_targs))
4014 
4015 	/* the state machine processing is done in a separate thread */
4016 
4017 	/* proceed_targs is freed in ibcm_proceed_via_taskq */
4018 	proceed_targs = kmem_alloc(sizeof (ibcm_proceed_targs_t),
4019 	    KM_SLEEP);
4020 
4021 	proceed_targs->event  = event;
4022 	proceed_targs->status = status;
4023 	proceed_targs->priv_data_len = priv_data_len;
4024 
4025 	bcopy(priv_data, proceed_targs->priv_data, priv_data_len);
4026 
4027 	proceed_targs->tst.rc.statep = statep;
4028 	bcopy(cm_event_data, &proceed_targs->tst.rc.rc_cm_event_data,
4029 	    sizeof (ibt_cm_proceed_reply_t));
4030 
4031 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*proceed_targs))
4032 
4033 	(void) taskq_dispatch(ibcm_taskq, ibcm_proceed_via_taskq,
4034 	    proceed_targs, TQ_SLEEP);
4035 
4036 	return (IBT_SUCCESS);
4037 }
4038 
4039 /*
4040  * Function:
4041  * 	ibcm_proceed_via_taskq
4042  *
4043  * Called from taskq, dispatched by ibt_cm_proceed
4044  * Completes the cm state processing for ibt_cm_proceed
4045  */
4046 void
4047 ibcm_proceed_via_taskq(void *targs)
4048 {
4049 	ibcm_proceed_targs_t	*proceed_targs = (ibcm_proceed_targs_t *)targs;
4050 	ibcm_state_data_t *statep = proceed_targs->tst.rc.statep;
4051 	ibt_cm_reason_t reject_reason;
4052 	uint8_t arej_len;
4053 	ibcm_status_t response;
4054 	ibcm_clnt_reply_info_t clnt_info;
4055 
4056 	clnt_info.reply_event = &proceed_targs->tst.rc.rc_cm_event_data;
4057 	clnt_info.priv_data = proceed_targs->priv_data;
4058 	clnt_info.priv_data_len = proceed_targs->priv_data_len;
4059 
4060 	IBTF_DPRINTF_L4(cmlog, "ibcm_proceed_via_taskq chan 0x%p targs %x",
4061 	    statep->channel, targs);
4062 
4063 	if (proceed_targs->event == IBT_CM_EVENT_REQ_RCV) {
4064 		response =
4065 		    ibcm_process_cep_req_cm_hdlr(statep, proceed_targs->status,
4066 		    &clnt_info, &reject_reason, &arej_len,
4067 		    (ibcm_req_msg_t *)statep->defer_cm_msg);
4068 
4069 		ibcm_handle_cep_req_response(statep, response, reject_reason,
4070 		    arej_len);
4071 
4072 	} else if (proceed_targs->event == IBT_CM_EVENT_REP_RCV) {
4073 		response =
4074 		    ibcm_process_cep_rep_cm_hdlr(statep, proceed_targs->status,
4075 		    &clnt_info, &reject_reason, &arej_len,
4076 		    (ibcm_rep_msg_t *)statep->defer_cm_msg);
4077 
4078 		ibcm_handle_cep_rep_response(statep, response, reject_reason,
4079 		    arej_len, (ibcm_rep_msg_t *)statep->defer_cm_msg);
4080 
4081 	} else if (proceed_targs->event == IBT_CM_EVENT_LAP_RCV) {
4082 		ibcm_process_cep_lap_cm_hdlr(statep, proceed_targs->status,
4083 		    &clnt_info, (ibcm_lap_msg_t *)statep->defer_cm_msg,
4084 		    (ibcm_apr_msg_t *)IBCM_OUT_MSGP(statep->lapr_msg));
4085 
4086 		ibcm_post_apr_mad(statep);
4087 
4088 	} else {
4089 		ASSERT(proceed_targs->event == IBT_CM_EVENT_CONN_CLOSED);
4090 		ibcm_handle_cep_dreq_response(statep, proceed_targs->priv_data,
4091 		    proceed_targs->priv_data_len);
4092 	}
4093 
4094 	kmem_free(targs, sizeof (ibcm_proceed_targs_t));
4095 }
4096 
4097 /*
4098  * Function:
4099  * 	ibt_cm_ud_proceed
4100  *
4101  * Verifies the arguments and dispatches the cm state machine processing
4102  * via taskq
4103  */
4104 ibt_status_t
4105 ibt_cm_ud_proceed(void *session_id, ibt_channel_hdl_t ud_channel,
4106     ibt_cm_status_t status, ibt_redirect_info_t *redirect_infop,
4107     void *priv_data, ibt_priv_data_len_t priv_data_len)
4108 {
4109 	ibcm_ud_state_data_t *ud_statep = (ibcm_ud_state_data_t *)session_id;
4110 	ibcm_proceed_targs_t	*proceed_targs;
4111 	ibt_qp_query_attr_t	qp_attr;
4112 	ibt_status_t		retval;
4113 
4114 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_ud_proceed session_id %p "
4115 	    "ud_channel %p ", session_id, ud_channel);
4116 
4117 	IBTF_DPRINTF_L4(cmlog, "ibt_cm_ud_proceed status %x priv_data %p "
4118 	    "priv_data_len %x",  status, priv_data, priv_data_len);
4119 
4120 	/* validate session_id and status */
4121 	if ((ud_statep == NULL) || (status == IBT_CM_DEFER)) {
4122 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Invalid Args");
4123 		return (IBT_INVALID_PARAM);
4124 	}
4125 
4126 	/* If priv data len specified, then priv_data cannot be NULL */
4127 	if ((priv_data_len > 0) && (priv_data == NULL))
4128 		return (IBT_INVALID_PARAM);
4129 
4130 	if (priv_data_len > IBT_SIDR_REP_PRIV_DATA_SZ)
4131 		return (IBT_INVALID_PARAM);
4132 
4133 	/* retrieve qpn and qkey from ud channel */
4134 
4135 	/* validate event and statep's state */
4136 
4137 	if (status == IBT_CM_ACCEPT) {
4138 		retval = ibt_query_qp(ud_channel, &qp_attr);
4139 		if ((retval != IBT_SUCCESS) ||
4140 		    (qp_attr.qp_info.qp_trans != IBT_UD_SRV)) {
4141 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed: "
4142 			    "Failed to retrieve QPN from the channel: %d",
4143 			    retval);
4144 			return (IBT_INVALID_PARAM);
4145 		}
4146 	}
4147 
4148 
4149 	mutex_enter(&ud_statep->ud_state_mutex);
4150 
4151 	if (ud_statep->ud_state != IBCM_STATE_SIDR_REQ_RCVD) {
4152 		mutex_exit(&ud_statep->ud_state_mutex);
4153 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Invalid State "
4154 		    "specified");
4155 		return (IBT_INVALID_PARAM);
4156 	}
4157 
4158 	/* wait until client's CM handler returns DEFER status back to CM */
4159 
4160 	while (ud_statep->ud_clnt_proceed == IBCM_BLOCK) {
4161 		IBTF_DPRINTF_L5(cmlog, "ibt_cm_ud_proceed : Blocked for return"
4162 		    " of client's ud cm handler");
4163 		cv_wait(&ud_statep->ud_block_client_cv,
4164 		    &ud_statep->ud_state_mutex);
4165 	}
4166 
4167 	if (ud_statep->ud_clnt_proceed == IBCM_FAIL) {
4168 		mutex_exit(&ud_statep->ud_state_mutex);
4169 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Failed as client "
4170 		    "returned non-DEFER status from cm handler");
4171 		return (IBT_INVALID_PARAM);
4172 	}
4173 
4174 	ASSERT(ud_statep->ud_clnt_proceed == IBCM_UNBLOCK);
4175 	ud_statep->ud_clnt_proceed = IBCM_FAIL;
4176 	mutex_exit(&ud_statep->ud_state_mutex);
4177 
4178 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*proceed_targs))
4179 
4180 	/* the state machine processing is done in a separate thread */
4181 
4182 	/* proceed_targs is freed in ibcm_proceed_via_taskq */
4183 	proceed_targs = kmem_zalloc(sizeof (ibcm_proceed_targs_t),
4184 	    KM_SLEEP);
4185 
4186 	proceed_targs->status = status;
4187 	proceed_targs->priv_data_len = priv_data_len;
4188 
4189 	bcopy(priv_data, proceed_targs->priv_data, priv_data_len);
4190 
4191 	if (status == IBT_CM_ACCEPT) {
4192 		proceed_targs->tst.ud.ud_qkey =
4193 		    qp_attr.qp_info.qp_transport.ud.ud_qkey;
4194 		proceed_targs->tst.ud.ud_qpn = qp_attr.qp_qpn;
4195 	}
4196 
4197 	proceed_targs->tst.ud.ud_statep = ud_statep;
4198 
4199 	/* copy redirect info based on status */
4200 	if (status == IBT_CM_REDIRECT)
4201 		bcopy(redirect_infop, &proceed_targs->tst.ud.ud_redirect_info,
4202 		    sizeof (ibt_redirect_info_t));
4203 
4204 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*proceed_targs))
4205 
4206 	(void) taskq_dispatch(ibcm_taskq, ibcm_ud_proceed_via_taskq,
4207 	    proceed_targs, TQ_SLEEP);
4208 
4209 	return (IBT_SUCCESS);
4210 }
4211 
4212 /*
4213  * Function:
4214  * 	ibcm_ud_proceed_via_taskq
4215  *
4216  * Called from taskq, dispatched by ibt_cm_ud_proceed
4217  * Completes the cm state processing for ibt_cm_ud_proceed
4218  */
4219 void
4220 ibcm_ud_proceed_via_taskq(void *targs)
4221 {
4222 	ibcm_proceed_targs_t	*proceed_targs = (ibcm_proceed_targs_t *)targs;
4223 	ibcm_ud_state_data_t	*ud_statep = proceed_targs->tst.ud.ud_statep;
4224 	ibcm_ud_clnt_reply_info_t ud_clnt_info;
4225 	ibt_sidr_status_t	sidr_status;
4226 
4227 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_proceed_via_taskq(%p)", targs);
4228 
4229 	ud_clnt_info.ud_qpn  = proceed_targs->tst.ud.ud_qpn;
4230 	ud_clnt_info.ud_qkey  = proceed_targs->tst.ud.ud_qkey;
4231 	ud_clnt_info.priv_data = proceed_targs->priv_data;
4232 	ud_clnt_info.priv_data_len = proceed_targs->priv_data_len;
4233 	ud_clnt_info.redirect_infop = &proceed_targs->tst.ud.ud_redirect_info;
4234 
4235 	/* validate event and statep's state */
4236 	ibcm_process_sidr_req_cm_hdlr(ud_statep, proceed_targs->status,
4237 	    &ud_clnt_info, &sidr_status,
4238 	    (ibcm_sidr_rep_msg_t *)IBCM_OUT_MSGP(ud_statep->ud_stored_msg));
4239 
4240 	ibcm_post_sidr_rep_mad(ud_statep, sidr_status);
4241 
4242 	/* decr the statep ref cnt incremented in ibcm_process_sidr_req_msg */
4243 	mutex_enter(&ud_statep->ud_state_mutex);
4244 	IBCM_UD_REF_CNT_DECR(ud_statep);
4245 	mutex_exit(&ud_statep->ud_state_mutex);
4246 
4247 	kmem_free(targs, sizeof (ibcm_proceed_targs_t));
4248 }
4249 
4250 /*
4251  * Function:
4252  *	ibt_set_alt_path
4253  * Input:
4254  *	channel		Channel handle returned from ibt_alloc_rc_channel(9F).
4255  *
4256  *	mode		Execute in blocking or non blocking mode.
4257  *
4258  *	alt_path	A pointer to an ibt_alt_path_info_t as returned from an
4259  *			ibt_get_alt_path(9F) call that specifies the new
4260  *			alternate path.
4261  *
4262  *	priv_data       A pointer to a buffer specified by caller for the
4263  *			private data in the outgoing CM Load Alternate Path
4264  *			(LAP) message sent to the remote host. This can be NULL
4265  *			if no private data is available to communicate to the
4266  *			remote node.
4267  *
4268  *	priv_data_len   Length of valid data in priv_data, this should be less
4269  *			than or equal to IBT_LAP_PRIV_DATA_SZ.
4270  *
4271  * Output:
4272  *	ret_args	If called in blocking mode, points to a return argument
4273  *			structure of type ibt_ap_returns_t.
4274  *
4275  * Returns:
4276  *	IBT_SUCCESS on Success else appropriate error.
4277  * Description:
4278  *	Load the specified alternate path. Causes the CM to send an LAP message
4279  *	to the remote node.
4280  *	Can only be called on a previously opened RC channel.
4281  */
4282 ibt_status_t
4283 ibt_set_alt_path(ibt_channel_hdl_t channel, ibt_execution_mode_t mode,
4284     ibt_alt_path_info_t *alt_path, void *priv_data,
4285     ibt_priv_data_len_t priv_data_len, ibt_ap_returns_t *ret_args)
4286 {
4287 	ibmf_handle_t		ibmf_hdl;
4288 	ibt_status_t		status = IBT_SUCCESS;
4289 	ibcm_lap_msg_t		*lap_msgp;
4290 	ibcm_hca_info_t		*hcap;
4291 	ibcm_state_data_t	*statep;
4292 	uint8_t			port_no;
4293 	ib_lid_t		alternate_slid;
4294 	ibt_priv_data_len_t	len;
4295 	ib_lid_t		base_lid;
4296 	boolean_t		alt_grh;
4297 
4298 	IBTF_DPRINTF_L3(cmlog, "ibt_set_alt_path(%p, %x, %p, %p, %x, %p)",
4299 	    channel, mode, alt_path, priv_data, priv_data_len, ret_args);
4300 
4301 	/* validate channel */
4302 	if (IBCM_INVALID_CHANNEL(channel)) {
4303 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: invalid channel");
4304 		return (IBT_CHAN_HDL_INVALID);
4305 	}
4306 
4307 	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
4308 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4309 		    "Invalid Channel type: Applicable only to RC Channel");
4310 		return (IBT_CHAN_SRV_TYPE_INVALID);
4311 	}
4312 
4313 	if (mode == IBT_NONBLOCKING) {
4314 		if (ret_args != NULL) {
4315 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4316 			    "ret_args should be NULL when called in "
4317 			    "non-blocking mode");
4318 			return (IBT_INVALID_PARAM);
4319 		}
4320 	} else if (mode == IBT_BLOCKING) {
4321 		if (ret_args == NULL) {
4322 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4323 			    "ret_args should be Non-NULL when called in "
4324 			    "blocking mode");
4325 			return (IBT_INVALID_PARAM);
4326 		}
4327 		if (ret_args->ap_priv_data_len > IBT_APR_PRIV_DATA_SZ) {
4328 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4329 			    "expected private data length is too large");
4330 			return (IBT_INVALID_PARAM);
4331 		}
4332 		if ((ret_args->ap_priv_data_len > 0) &&
4333 		    (ret_args->ap_priv_data == NULL)) {
4334 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4335 			    "apr_priv_data_len > 0, but apr_priv_data NULL");
4336 			return (IBT_INVALID_PARAM);
4337 		}
4338 	} else { /* any other mode is not valid for ibt_set_alt_path */
4339 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4340 		    "invalid mode %x specified", mode);
4341 		return (IBT_INVALID_PARAM);
4342 	}
4343 
4344 	if ((port_no = alt_path->ap_alt_cep_path.cep_hca_port_num) == 0)
4345 		return (IBT_INVALID_PARAM);
4346 
4347 	/* get the statep */
4348 	IBCM_GET_CHAN_PRIVATE(channel, statep);
4349 	if (statep == NULL) {
4350 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: statep NULL");
4351 		return (IBT_CM_FAILURE);
4352 	}
4353 
4354 	mutex_enter(&statep->state_mutex);
4355 	IBCM_RELEASE_CHAN_PRIVATE(channel);
4356 	IBCM_REF_CNT_INCR(statep);
4357 	mutex_exit(&statep->state_mutex);
4358 
4359 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: statep %p", statep);
4360 
4361 	hcap = statep->hcap;
4362 
4363 	/* HCA must have been in active state. If not, it's a client bug */
4364 	if (!IBCM_ACCESS_HCA_OK(hcap))
4365 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: hca in error state");
4366 
4367 	ASSERT(statep->cm_handler != NULL);
4368 
4369 	/* Check Alternate port */
4370 	status = ibt_get_port_state_byguid(hcap->hca_guid, port_no, NULL,
4371 	    &base_lid);
4372 	if (status != IBT_SUCCESS) {
4373 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4374 		    "ibt_get_port_state_byguid status %d ", status);
4375 		mutex_enter(&statep->state_mutex);
4376 		IBCM_REF_CNT_DECR(statep);
4377 		mutex_exit(&statep->state_mutex);
4378 		return (status);
4379 	}
4380 
4381 	if ((hcap->hca_port_info[port_no - 1].port_ibmf_hdl == NULL) &&
4382 	    ((status = ibcm_hca_reinit_port(hcap, port_no - 1))
4383 	    != IBT_SUCCESS)) {
4384 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4385 		    "ibmf reg or callback setup failed during re-initialize");
4386 		mutex_enter(&statep->state_mutex);
4387 		IBCM_REF_CNT_DECR(statep);
4388 		mutex_exit(&statep->state_mutex);
4389 		return (status);
4390 	}
4391 
4392 	ibmf_hdl = statep->stored_reply_addr.ibmf_hdl;
4393 
4394 	alternate_slid = base_lid +
4395 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_src_path;
4396 
4397 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: alternate SLID = %x",
4398 	    h2b16(alternate_slid));
4399 
4400 	ibcm_lapr_enter();	/* limit how many run simultaneously */
4401 
4402 	/* Allocate MAD for LAP */
4403 	if (statep->lapr_msg == NULL)
4404 		if ((status = ibcm_alloc_out_msg(ibmf_hdl, &statep->lapr_msg,
4405 		    MAD_METHOD_SEND)) != IBT_SUCCESS) {
4406 			ibcm_lapr_exit();
4407 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4408 			    "chan 0x%p ibcm_alloc_out_msg failed", channel);
4409 			mutex_enter(&statep->state_mutex);
4410 			IBCM_REF_CNT_DECR(statep);
4411 			mutex_exit(&statep->state_mutex);
4412 			return (status);
4413 		}
4414 
4415 	mutex_enter(&statep->state_mutex);
4416 
4417 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: connection state is"
4418 	    " %x", statep->state);
4419 
4420 	/* Check state */
4421 	if ((statep->state != IBCM_STATE_ESTABLISHED) ||
4422 	    (statep->ap_state != IBCM_AP_STATE_IDLE)) {
4423 		IBCM_REF_CNT_DECR(statep);
4424 		mutex_exit(&statep->state_mutex);
4425 		(void) ibcm_free_out_msg(ibmf_hdl, &statep->lapr_msg);
4426 		ibcm_lapr_exit();
4427 		return (IBT_CHAN_STATE_INVALID);
4428 	} else {
4429 		/* Set to LAP Sent state */
4430 		statep->ap_state = IBCM_AP_STATE_LAP_SENT;
4431 		statep->ap_done = B_FALSE;
4432 		statep->remaining_retry_cnt = statep->max_cm_retries;
4433 		statep->timer_stored_state = statep->state;
4434 		statep->timer_stored_ap_state = statep->ap_state;
4435 		IBCM_REF_CNT_INCR(statep); /* for ibcm_post_lap_complete */
4436 	}
4437 
4438 	mutex_exit(&statep->state_mutex);
4439 
4440 	/* No more failure returns below */
4441 
4442 	/* Allocate MAD for LAP */
4443 	IBTF_DPRINTF_L5(cmlog, "ibt_set_alt_path:"
4444 	    " statep's mad addr = 0x%p", IBCM_OUT_HDRP(statep->lapr_msg));
4445 
4446 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*lap_msgp))
4447 
4448 	lap_msgp = (ibcm_lap_msg_t *)IBCM_OUT_MSGP(statep->lapr_msg);
4449 
4450 	lap_msgp->lap_alt_l_port_lid = h2b16(alternate_slid);
4451 	lap_msgp->lap_alt_r_port_lid =
4452 	    h2b16(alt_path->ap_alt_cep_path.cep_adds_vect.av_dlid);
4453 
4454 	/* Fill in remote port gid */
4455 	lap_msgp->lap_alt_r_port_gid.gid_prefix =
4456 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_dgid.gid_prefix);
4457 	lap_msgp->lap_alt_r_port_gid.gid_guid =
4458 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_dgid.gid_guid);
4459 
4460 	/* Fill in local port gid */
4461 	lap_msgp->lap_alt_l_port_gid.gid_prefix =
4462 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_sgid.gid_prefix);
4463 	lap_msgp->lap_alt_l_port_gid.gid_guid =
4464 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_sgid.gid_guid);
4465 
4466 	alt_grh = alt_path->ap_alt_cep_path.cep_adds_vect.av_send_grh;
4467 
4468 	/* alternate_flow_label, and alternate srate, alternate traffic class */
4469 	lap_msgp->lap_alt_srate_plus =
4470 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_srate & 0x3f;
4471 	lap_msgp->lap_alt_flow_label_plus = h2b32(((alt_grh == B_TRUE) ?
4472 	    (alt_path->ap_alt_cep_path.cep_adds_vect.av_flow << 12) : 0) |
4473 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_tclass);
4474 
4475 	/* Alternate hop limit, service level */
4476 	lap_msgp->lap_alt_hop_limit = (alt_grh == B_TRUE) ?
4477 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_hop : 1;
4478 	lap_msgp->lap_alt_sl_plus =
4479 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_srvl << 4 |
4480 	    ((alt_grh == B_FALSE) ? 0x8 : 0);
4481 
4482 	lap_msgp->lap_alt_local_acktime_plus = ibt_usec2ib(
4483 	    (2 * statep->rc_alt_pkt_lt) +
4484 	    ibt_ib2usec(hcap->hca_ack_delay)) << 3;
4485 
4486 	lap_msgp->lap_local_comm_id = h2b32(statep->local_comid);
4487 	lap_msgp->lap_remote_comm_id = h2b32(statep->remote_comid);
4488 
4489 	lap_msgp->lap_remote_qpn_eecn_plus =
4490 	    h2b32((statep->remote_qpn << 8) |
4491 	    ibt_usec2ib(ibcm_remote_response_time) << 3);
4492 
4493 	len = min(priv_data_len, IBT_LAP_PRIV_DATA_SZ);
4494 	if ((len > 0) && priv_data) {
4495 		bcopy(priv_data, lap_msgp->lap_private_data, len);
4496 	}
4497 
4498 	/* only rc_alt_pkt_lt and ap_return_data fields are initialized */
4499 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
4500 
4501 	statep->rc_alt_pkt_lt = ibt_ib2usec(alt_path->ap_alt_pkt_lt);
4502 
4503 	/* return_data is filled up in the state machine code */
4504 	statep->ap_return_data = ret_args;
4505 
4506 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*statep))
4507 
4508 	IBCM_OUT_HDRP(statep->lapr_msg)->AttributeID =
4509 	    h2b16(IBCM_INCOMING_LAP + IBCM_ATTR_BASE_ID);
4510 
4511 	IBCM_OUT_HDRP(statep->lapr_msg)->TransactionID =
4512 	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_LAP, statep->local_comid,
4513 	    0));
4514 	IBTF_DPRINTF_L3(cmlog, "ibt_set_alt_path: statep %p, tid %llx",
4515 	    statep, IBCM_OUT_HDRP(statep->lapr_msg)->TransactionID);
4516 
4517 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*lap_msgp))
4518 
4519 	/* Send LAP */
4520 	ibcm_post_rc_mad(statep, statep->lapr_msg, ibcm_post_lap_complete,
4521 	    statep);
4522 
4523 	mutex_enter(&statep->state_mutex);
4524 
4525 	if (mode == IBT_BLOCKING) {
4526 		IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: blocking");
4527 
4528 		/* wait for APR */
4529 		while (statep->ap_done != B_TRUE) {
4530 			cv_wait(&statep->block_client_cv,
4531 			    &statep->state_mutex);
4532 		}
4533 
4534 		IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: done blocking");
4535 
4536 		/*
4537 		 * In the case that ibt_set_alt_path fails,
4538 		 * change retval to IBT_CM_FAILURE
4539 		 */
4540 		if (statep->ap_return_data->ap_status != IBT_CM_AP_LOADED)
4541 			status = IBT_CM_FAILURE;
4542 
4543 	}
4544 
4545 	/* decrement the ref-count before leaving here */
4546 	IBCM_REF_CNT_DECR(statep);
4547 
4548 	mutex_exit(&statep->state_mutex);
4549 
4550 	ibcm_lapr_exit();
4551 
4552 	/* If this message isn't seen then ibt_set_alt_path failed */
4553 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: done");
4554 
4555 	return (status);
4556 }
4557 
4558 
4559 #ifdef DEBUG
4560 
4561 /*
4562  * ibcm_query_classport_info:
4563  *	Query classportinfo
4564  *
4565  * INPUTS:
4566  *	channel		- Channel that is associated with a statep
4567  *
4568  * RETURN VALUE: NONE
4569  * This function is currently used to generate a valid get method classport
4570  * info, and test CM functionality. There is no ibtl client interface to
4571  * generate a classportinfo. It is possible that CM may use classportinfo
4572  * from other nodes in the future, and most of the code below could be re-used.
4573  */
4574 void
4575 ibcm_query_classport_info(ibt_channel_hdl_t channel)
4576 {
4577 	ibcm_state_data_t	*statep;
4578 	ibmf_msg_t		*msgp;
4579 
4580 	IBTF_DPRINTF_L3(cmlog, "ibcm_query_classport_info(%p)", channel);
4581 
4582 	/* validate channel, first */
4583 	if (IBCM_INVALID_CHANNEL(channel)) {
4584 		IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info: "
4585 		    "invalid channel (%p)", channel);
4586 		return;
4587 	}
4588 
4589 	/* get the statep */
4590 	IBCM_GET_CHAN_PRIVATE(channel, statep);
4591 
4592 	/*
4593 	 * This can happen, if the statep is already gone by a DREQ from
4594 	 * the remote side
4595 	 */
4596 	if (statep == NULL) {
4597 		IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info: "
4598 		    "statep NULL");
4599 		return;
4600 	}
4601 
4602 	mutex_enter(&statep->state_mutex);
4603 	IBCM_RELEASE_CHAN_PRIVATE(channel);
4604 	IBCM_REF_CNT_INCR(statep);
4605 	mutex_exit(&statep->state_mutex);
4606 
4607 	/* Debug/test code, so don't care about return status */
4608 	(void) ibcm_alloc_out_msg(statep->stored_reply_addr.ibmf_hdl, &msgp,
4609 	    MAD_METHOD_GET);
4610 
4611 	IBCM_OUT_HDRP(msgp)->TransactionID = h2b64(ibcm_generate_tranid(
4612 	    MAD_ATTR_ID_CLASSPORTINFO, statep->local_comid, 0));
4613 	IBCM_OUT_HDRP(msgp)->AttributeID = h2b16(MAD_ATTR_ID_CLASSPORTINFO);
4614 
4615 	(void) ibcm_post_mad(msgp, &statep->stored_reply_addr, NULL, NULL);
4616 
4617 	IBTF_DPRINTF_L3(cmlog, "ibcm_query_classport_info(%p) "
4618 	    "Get method MAD posted ", channel);
4619 
4620 	(void) ibcm_free_out_msg(statep->stored_reply_addr.ibmf_hdl, &msgp);
4621 
4622 	mutex_enter(&statep->state_mutex);
4623 	IBCM_REF_CNT_DECR(statep);
4624 	mutex_exit(&statep->state_mutex);
4625 }
4626 
4627 static void
4628 ibcm_print_reply_addr(ibt_channel_hdl_t channel, ibcm_mad_addr_t *cm_reply_addr)
4629 {
4630 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: chan 0x%p, SLID %x, "
4631 	    "DLID %x", channel, cm_reply_addr->rcvd_addr.ia_local_lid,
4632 	    cm_reply_addr->rcvd_addr.ia_remote_lid);
4633 
4634 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: QKEY %x, PKEY %x, "
4635 	    "RQPN %x SL %x", cm_reply_addr->rcvd_addr.ia_q_key,
4636 	    cm_reply_addr->rcvd_addr.ia_p_key,
4637 	    cm_reply_addr->rcvd_addr.ia_remote_qno,
4638 	    cm_reply_addr->rcvd_addr.ia_service_level);
4639 
4640 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM SGID %llX:%llX ",
4641 	    cm_reply_addr->grh_hdr.ig_sender_gid.gid_prefix,
4642 	    cm_reply_addr->grh_hdr.ig_sender_gid.gid_guid);
4643 
4644 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM DGID %llX:%llX",
4645 	    cm_reply_addr->grh_hdr.ig_recver_gid.gid_prefix,
4646 	    cm_reply_addr->grh_hdr.ig_recver_gid.gid_guid);
4647 
4648 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM FL %x TC %x HL %x",
4649 	    cm_reply_addr->grh_hdr.ig_flow_label,
4650 	    cm_reply_addr->grh_hdr.ig_tclass,
4651 	    cm_reply_addr->grh_hdr.ig_hop_limit);
4652 }
4653 
4654 #endif
4655 
4656 /* For MCG List search */
4657 typedef struct ibcm_mcg_list_s {
4658 	struct ibcm_mcg_list_s	*ml_next;
4659 	ib_gid_t		ml_sgid;
4660 	ib_gid_t		ml_mgid;
4661 	ib_pkey_t		ml_pkey;
4662 	ib_qkey_t		ml_qkey;
4663 	uint_t			ml_refcnt;
4664 	uint8_t			ml_jstate;
4665 } ibcm_mcg_list_t;
4666 
4667 ibcm_mcg_list_t	*ibcm_mcglist = NULL;
4668 
4669 _NOTE(MUTEX_PROTECTS_DATA(ibcm_mcglist_lock, ibcm_mcg_list_s))
4670 _NOTE(MUTEX_PROTECTS_DATA(ibcm_mcglist_lock, ibcm_mcglist))
4671 
4672 typedef struct ibcm_join_mcg_tqarg_s {
4673 	ib_gid_t		rgid;
4674 	ibt_mcg_attr_t		mcg_attr;
4675 	ibt_mcg_info_t		*mcg_infop;
4676 	ibt_mcg_handler_t	func;
4677 	void			*arg;
4678 } ibcm_join_mcg_tqarg_t;
4679 
4680 _NOTE(READ_ONLY_DATA(ibcm_join_mcg_tqarg_s))
4681 
4682 void
4683 ibcm_add_incr_mcg_entry(sa_mcmember_record_t *mcg_req,
4684     sa_mcmember_record_t *mcg_resp)
4685 {
4686 	ibcm_mcg_list_t	*new = NULL;
4687 	ibcm_mcg_list_t	*head = NULL;
4688 
4689 	IBTF_DPRINTF_L3(cmlog, "ibcm_add_incr_mcg_entry: MGID %llX:%llX"
4690 	    "\n SGID %llX:%llX, JState %X)", mcg_req->MGID.gid_prefix,
4691 	    mcg_req->MGID.gid_guid, mcg_req->PortGID.gid_prefix,
4692 	    mcg_req->PortGID.gid_guid, mcg_req->JoinState);
4693 
4694 	mutex_enter(&ibcm_mcglist_lock);
4695 	head = ibcm_mcglist;
4696 
4697 	while (head != NULL) {
4698 		if ((head->ml_mgid.gid_guid == mcg_resp->MGID.gid_guid) &&
4699 		    (head->ml_mgid.gid_prefix == mcg_resp->MGID.gid_prefix) &&
4700 		    (head->ml_sgid.gid_guid == mcg_resp->PortGID.gid_guid)) {
4701 			/* Increment the count */
4702 			head->ml_refcnt++;
4703 			/* OR the join_state value, we need this during leave */
4704 			head->ml_jstate |= mcg_req->JoinState;
4705 
4706 			IBTF_DPRINTF_L3(cmlog, "ibcm_add_incr_mcg_entry: Entry "
4707 			    "FOUND: refcnt %d JState %X", head->ml_refcnt,
4708 			    head->ml_jstate);
4709 
4710 			mutex_exit(&ibcm_mcglist_lock);
4711 			return;
4712 		}
4713 		head = head->ml_next;
4714 	}
4715 	mutex_exit(&ibcm_mcglist_lock);
4716 
4717 	IBTF_DPRINTF_L3(cmlog, "ibcm_add_incr_mcg_entry: Create NEW Entry ");
4718 
4719 	/* If we are here, either list is empty or match couldn't be found */
4720 	new = kmem_zalloc(sizeof (ibcm_mcg_list_t), KM_SLEEP);
4721 
4722 	mutex_enter(&ibcm_mcglist_lock);
4723 	/* Initialize the fields */
4724 	new->ml_sgid = mcg_resp->PortGID;
4725 	new->ml_mgid = mcg_resp->MGID;
4726 	new->ml_qkey = mcg_req->Q_Key;
4727 	new->ml_pkey = mcg_req->P_Key;
4728 	new->ml_refcnt = 1; /* As this is the first entry */
4729 	new->ml_jstate = mcg_req->JoinState;
4730 	new->ml_next = NULL;
4731 
4732 	new->ml_next = ibcm_mcglist;
4733 	ibcm_mcglist = new;
4734 	mutex_exit(&ibcm_mcglist_lock);
4735 }
4736 
4737 /*
4738  * ibcm_del_decr_mcg_entry
4739  *
4740  * Return value:
4741  * IBCM_SUCCESS		Entry found and ref_cnt is now zero. So go-ahead and
4742  * 			leave the MCG group. The return arg *jstate will have
4743  * 			a valid join_state value that needed to be used by
4744  * 			xxx_leave_mcg().
4745  * IBCM_LOOKUP_EXISTS	Entry found and ref_cnt is decremented but is NOT zero.
4746  * 			So do not leave the MCG group yet.
4747  * IBCM_LOOKUP_FAIL	Entry is NOT found.
4748  */
4749 ibcm_status_t
4750 ibcm_del_decr_mcg_entry(sa_mcmember_record_t *mcg_req, uint8_t *jstate)
4751 {
4752 	ibcm_mcg_list_t	*head, *prev;
4753 
4754 	IBTF_DPRINTF_L3(cmlog, "ibcm_del_decr_mcg_entry: MGID %llX:%llX"
4755 	    "\n SGID %llX:%llX, JState %X)", mcg_req->MGID.gid_prefix,
4756 	    mcg_req->MGID.gid_guid, mcg_req->PortGID.gid_prefix,
4757 	    mcg_req->PortGID.gid_guid, mcg_req->JoinState);
4758 
4759 	*jstate = 0;
4760 
4761 	mutex_enter(&ibcm_mcglist_lock);
4762 	head = ibcm_mcglist;
4763 	prev = NULL;
4764 
4765 	while (head != NULL) {
4766 		if ((head->ml_mgid.gid_guid == mcg_req->MGID.gid_guid) &&
4767 		    (head->ml_mgid.gid_prefix == mcg_req->MGID.gid_prefix) &&
4768 		    (head->ml_sgid.gid_guid == mcg_req->PortGID.gid_guid)) {
4769 			if (!(head->ml_jstate & mcg_req->JoinState)) {
4770 				IBTF_DPRINTF_L2(cmlog, "ibcm_del_decr_mcg_entry"
4771 				    ": JoinState mismatch %X %X)",
4772 				    head->ml_jstate, mcg_req->JoinState);
4773 			}
4774 			/* Decrement the count */
4775 			head->ml_refcnt--;
4776 
4777 			if (head->ml_refcnt == 0) {
4778 				*jstate = head->ml_jstate;
4779 
4780 				IBTF_DPRINTF_L3(cmlog, "ibcm_del_decr_mcg_entry"
4781 				    ": refcnt is ZERO, so delete the entry ");
4782 				if ((head == ibcm_mcglist) || (prev == NULL)) {
4783 					ibcm_mcglist = head->ml_next;
4784 				} else if (prev != NULL) {
4785 					prev->ml_next = head->ml_next;
4786 				}
4787 				mutex_exit(&ibcm_mcglist_lock);
4788 
4789 				kmem_free(head, sizeof (ibcm_mcg_list_t));
4790 				return (IBCM_SUCCESS);
4791 			}
4792 			mutex_exit(&ibcm_mcglist_lock);
4793 			return (IBCM_LOOKUP_EXISTS);
4794 		}
4795 		prev = head;
4796 		head = head->ml_next;
4797 	}
4798 	mutex_exit(&ibcm_mcglist_lock);
4799 
4800 	/*
4801 	 * If we are here, something went wrong, we don't have the entry
4802 	 * for that MCG being joined.
4803 	 */
4804 	IBTF_DPRINTF_L2(cmlog, "ibcm_del_decr_mcg_entry: Match NOT "
4805 	    "Found ");
4806 
4807 	return (IBCM_LOOKUP_FAIL);
4808 }
4809 
4810 
4811 /*
4812  * Function:
4813  *	ibt_join_mcg
4814  * Input:
4815  *	rgid		The request GID that defines the HCA port from which a
4816  *			contact to SA Access is performed to add the specified
4817  *			endport GID ((mcg_attr->mc_pgid) to a multicast group.
4818  *			If mcg_attr->mc_pgid is null, then this (rgid) will be
4819  *			treated as endport GID that is to be added to the
4820  *			multicast group.
4821  *
4822  *	mcg_attr	A pointer to an ibt_mcg_attr_t structure that defines
4823  *			the attributes of the desired multicast group to be
4824  *			created or joined.
4825  *
4826  *	func		NULL or a pointer to a function to call when
4827  *			ibt_join_mcg() completes. If 'func' is not NULL then
4828  *			ibt_join_mcg() will return as soon as possible after
4829  *			initiating the multicast group join/create process.
4830  *			'func' is then called when the process completes.
4831  *
4832  *	arg		Argument to the 'func'.
4833  *
4834  * Output:
4835  *	mcg_info_p	A pointer to the ibt_mcg_info_t structure, allocated
4836  *			by the caller, where the attributes of the created or
4837  *			joined multicast group are copied.
4838  * Returns:
4839  *	IBT_SUCCESS
4840  *	IBT_INVALID_PARAM
4841  *	IBT_MCG_RECORDS_NOT_FOUND
4842  *	IBT_INSUFF_RESOURCE
4843  * Description:
4844  *	Join a multicast group.  The first full member "join" causes the MCG
4845  *	to be created.
4846  */
4847 ibt_status_t
4848 ibt_join_mcg(ib_gid_t rgid, ibt_mcg_attr_t *mcg_attr,
4849     ibt_mcg_info_t *mcg_info_p, ibt_mcg_handler_t func, void  *arg)
4850 {
4851 	ibcm_join_mcg_tqarg_t	*mcg_tq;
4852 	int			flag = ((func == NULL) ? KM_SLEEP : KM_NOSLEEP);
4853 
4854 	IBTF_DPRINTF_L3(cmlog, "ibt_join_mcg(%llX:%llX, %p)", rgid.gid_prefix,
4855 	    rgid.gid_guid, mcg_attr);
4856 
4857 	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
4858 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Request GID is required");
4859 		return (IBT_INVALID_PARAM);
4860 	}
4861 
4862 	if ((mcg_attr->mc_pkey == IB_PKEY_INVALID_LIMITED) ||
4863 	    (mcg_attr->mc_pkey == IB_PKEY_INVALID_FULL)) {
4864 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Invalid P_Key specified");
4865 		return (IBT_INVALID_PARAM);
4866 	}
4867 
4868 	if (mcg_attr->mc_join_state == 0) {
4869 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: JoinState not specified");
4870 		return (IBT_INVALID_PARAM);
4871 	}
4872 
4873 	if (mcg_info_p == NULL) {
4874 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: mcg_info_p is NULL");
4875 		return (IBT_INVALID_PARAM);
4876 	}
4877 
4878 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mcg_tq))
4879 
4880 	mcg_tq = kmem_alloc(sizeof (ibcm_join_mcg_tqarg_t), flag);
4881 	if (mcg_tq == NULL) {
4882 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: "
4883 		    "Unable to allocate memory for local usage.");
4884 		return (IBT_INSUFF_KERNEL_RESOURCE);
4885 	}
4886 
4887 	mcg_tq->rgid = rgid;
4888 	bcopy(mcg_attr, &mcg_tq->mcg_attr, sizeof (ibt_mcg_attr_t));
4889 	mcg_tq->mcg_infop = mcg_info_p;
4890 	mcg_tq->func = func;
4891 	mcg_tq->arg = arg;
4892 
4893 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mcg_tq))
4894 
4895 	if (func != NULL) {	/* Non-Blocking */
4896 		IBTF_DPRINTF_L3(cmlog, "ibt_join_mcg: Non-Blocking Call");
4897 		if (taskq_dispatch(ibcm_taskq, ibcm_process_async_join_mcg,
4898 		    mcg_tq, TQ_NOSLEEP) == 0) {
4899 			IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Failed to "
4900 			    "Dispatch the TaskQ");
4901 			kmem_free(mcg_tq, sizeof (ibcm_join_mcg_tqarg_t));
4902 			return (IBT_INSUFF_KERNEL_RESOURCE);
4903 		} else
4904 			return (IBT_SUCCESS);
4905 	} else {		/* Blocking */
4906 		return (ibcm_process_join_mcg(mcg_tq));
4907 	}
4908 }
4909 
4910 static void
4911 ibcm_process_async_join_mcg(void *tq_arg)
4912 {
4913 	(void) ibcm_process_join_mcg(tq_arg);
4914 }
4915 
4916 static ibt_status_t
4917 ibcm_process_join_mcg(void *taskq_arg)
4918 {
4919 	sa_mcmember_record_t	mcg_req;
4920 	sa_mcmember_record_t	*mcg_resp;
4921 	ibmf_saa_access_args_t	access_args;
4922 	ibmf_saa_handle_t	saa_handle;
4923 	uint64_t		component_mask = 0;
4924 	ibt_status_t		retval;
4925 	ibtl_cm_hca_port_t	hca_port;
4926 	uint_t			num_records;
4927 	size_t			length;
4928 	ibcm_hca_info_t		*hcap;
4929 	ibcm_join_mcg_tqarg_t	*mcg_arg = (ibcm_join_mcg_tqarg_t *)taskq_arg;
4930 	ibt_mcg_info_t		*mcg_info_p = mcg_arg->mcg_infop;
4931 
4932 	IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg(%p)", mcg_arg);
4933 
4934 	retval = ibtl_cm_get_hca_port(mcg_arg->rgid, 0, &hca_port);
4935 	if (retval != IBT_SUCCESS) {
4936 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: Failed to get "
4937 		    "port info from specified RGID: status = %d", retval);
4938 		goto ibcm_join_mcg_exit1;
4939 	}
4940 
4941 	bzero(&mcg_req, sizeof (sa_mcmember_record_t));
4942 
4943 	if ((mcg_arg->mcg_attr.mc_pgid.gid_prefix == 0) ||
4944 	    (mcg_arg->mcg_attr.mc_pgid.gid_guid == 0)) {
4945 		IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
4946 		    "Request GID is Port GID");
4947 		mcg_req.PortGID = mcg_arg->rgid;
4948 	} else {
4949 		mcg_req.PortGID = mcg_arg->mcg_attr.mc_pgid;
4950 	}
4951 	component_mask |= SA_MC_COMPMASK_PORTGID;
4952 
4953 	mcg_req.Q_Key = mcg_arg->mcg_attr.mc_qkey;
4954 	mcg_req.P_Key = mcg_arg->mcg_attr.mc_pkey;
4955 	mcg_req.JoinState = mcg_arg->mcg_attr.mc_join_state;
4956 	mcg_req.TClass = mcg_arg->mcg_attr.mc_tclass;
4957 	mcg_req.FlowLabel = mcg_arg->mcg_attr.mc_flow;
4958 	mcg_req.SL = mcg_arg->mcg_attr.mc_sl;
4959 
4960 	component_mask |= SA_MC_COMPMASK_QKEY | SA_MC_COMPMASK_PKEY |
4961 	    SA_MC_COMPMASK_JOINSTATE | SA_MC_COMPMASK_TCLASS |
4962 	    SA_MC_COMPMASK_FLOWLABEL | SA_MC_COMPMASK_SL;
4963 
4964 	/* If client has specified MGID, use it else SA will assign one. */
4965 	if ((mcg_arg->mcg_attr.mc_mgid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
4966 		mcg_req.MGID = mcg_arg->mcg_attr.mc_mgid;
4967 		component_mask |= SA_MC_COMPMASK_MGID;
4968 	}
4969 
4970 	IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: ");
4971 	IBTF_DPRINTF_L3(cmlog, "PGID=%016llX:%016llX, ",
4972 	    mcg_req.PortGID.gid_prefix, mcg_req.PortGID.gid_guid);
4973 	IBTF_DPRINTF_L3(cmlog, "MGID=%016llX:%016llX",
4974 	    mcg_req.MGID.gid_prefix, mcg_req.MGID.gid_guid);
4975 	IBTF_DPRINTF_L3(cmlog, "JoinState = %X",
4976 	    mcg_arg->mcg_attr.mc_join_state);
4977 	IBTF_DPRINTF_L5(cmlog, "QKey %lX, PKey %lX",
4978 	    mcg_arg->mcg_attr.mc_qkey, mcg_arg->mcg_attr.mc_pkey);
4979 	IBTF_DPRINTF_L5(cmlog, "Scope %X, MLID %X",
4980 	    mcg_arg->mcg_attr.mc_scope, mcg_arg->mcg_attr.mc_mlid);
4981 
4982 	/* Is MTU specified. */
4983 	if (mcg_arg->mcg_attr.mc_mtu_req.r_mtu) {
4984 		mcg_req.MTU = mcg_arg->mcg_attr.mc_mtu_req.r_mtu;
4985 		mcg_req.MTUSelector = mcg_arg->mcg_attr.mc_mtu_req.r_selector;
4986 
4987 		component_mask |= SA_MC_COMPMASK_MTUSELECTOR |
4988 		    SA_MC_COMPMASK_MTU;
4989 	}
4990 
4991 	/* Is RATE specified. */
4992 	if (mcg_arg->mcg_attr.mc_rate_req.r_srate) {
4993 		mcg_req.Rate = mcg_arg->mcg_attr.mc_rate_req.r_srate;
4994 		mcg_req.RateSelector =
4995 		    mcg_arg->mcg_attr.mc_rate_req.r_selector;
4996 
4997 		component_mask |= SA_MC_COMPMASK_RATESELECTOR |
4998 		    SA_MC_COMPMASK_RATE;
4999 	}
5000 
5001 	/* Is Packet Life Time specified. */
5002 	if (mcg_arg->mcg_attr.mc_pkt_lt_req.p_pkt_lt) {
5003 		mcg_req.Rate = mcg_arg->mcg_attr.mc_pkt_lt_req.p_pkt_lt;
5004 		mcg_req.RateSelector =
5005 		    mcg_arg->mcg_attr.mc_pkt_lt_req.p_selector;
5006 
5007 		component_mask |= SA_MC_COMPMASK_PKTLTSELECTOR |
5008 		    SA_MC_COMPMASK_PKTLT;
5009 	}
5010 
5011 	if (mcg_arg->mcg_attr.mc_hop) {
5012 		mcg_req.HopLimit = mcg_arg->mcg_attr.mc_hop;
5013 		component_mask |= SA_MC_COMPMASK_HOPLIMIT;
5014 	}
5015 
5016 	if (mcg_arg->mcg_attr.mc_scope) {
5017 		mcg_req.Scope = mcg_arg->mcg_attr.mc_scope;
5018 		component_mask |= SA_MC_COMPMASK_SCOPE;
5019 	}
5020 
5021 	if (mcg_arg->mcg_attr.mc_mlid) {
5022 		mcg_req.MLID = mcg_arg->mcg_attr.mc_mlid;
5023 		component_mask |= SA_MC_COMPMASK_MLID;
5024 	}
5025 
5026 	/* Get SA Access Handle. */
5027 	hcap = ibcm_find_hca_entry(hca_port.hp_hca_guid);
5028 	if (hcap == NULL) {
5029 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: NO HCA found");
5030 
5031 		retval = IBT_HCA_BUSY_DETACHING;
5032 		goto ibcm_join_mcg_exit1;
5033 	}
5034 
5035 	saa_handle = ibcm_get_saa_handle(hcap, hca_port.hp_port);
5036 	if (saa_handle == NULL) {
5037 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: SA Handle NULL");
5038 
5039 		retval = IBT_HCA_PORT_NOT_ACTIVE;
5040 		goto ibcm_join_mcg_exit;
5041 	}
5042 
5043 	if ((mcg_arg->mcg_attr.mc_pgid.gid_prefix != 0) &&
5044 	    (mcg_arg->mcg_attr.mc_pgid.gid_guid != 0)) {
5045 		retval = ibtl_cm_get_hca_port(mcg_arg->mcg_attr.mc_pgid, 0,
5046 		    &hca_port);
5047 		if (retval != IBT_SUCCESS) {
5048 			IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: Failed "
5049 			    "to get PortInfo of specified PGID: status = %d",
5050 			    retval);
5051 			goto ibcm_join_mcg_exit1;
5052 		}
5053 	}
5054 
5055 	/* Contact SA Access */
5056 	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
5057 	access_args.sq_access_type = IBMF_SAA_UPDATE;
5058 	access_args.sq_component_mask = component_mask;
5059 	access_args.sq_template = &mcg_req;
5060 	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
5061 	access_args.sq_callback = NULL;
5062 	access_args.sq_callback_arg = NULL;
5063 
5064 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
5065 	    (void **)&mcg_resp);
5066 	if (retval != IBT_SUCCESS) {
5067 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: "
5068 		    "SA Access Failed");
5069 		goto ibcm_join_mcg_exit;
5070 	}
5071 
5072 	num_records = length/sizeof (sa_mcmember_record_t);
5073 
5074 	IBTF_DPRINTF_L4(cmlog, "ibcm_process_join_mcg: "
5075 	    "Found %d MCMember Records", num_records);
5076 
5077 	/* Validate the returned number of records. */
5078 	if ((mcg_resp != NULL) && (num_records > 0)) {
5079 		/* Update the return values. */
5080 		mcg_info_p->mc_adds_vect.av_dgid = mcg_resp->MGID;
5081 		mcg_info_p->mc_adds_vect.av_sgid = mcg_resp->PortGID;
5082 		mcg_info_p->mc_adds_vect.av_srate = mcg_resp->Rate;
5083 		mcg_info_p->mc_adds_vect.av_srvl = mcg_resp->SL;
5084 		mcg_info_p->mc_adds_vect.av_flow = mcg_resp->FlowLabel;
5085 		mcg_info_p->mc_adds_vect.av_tclass = mcg_resp->TClass;
5086 		mcg_info_p->mc_adds_vect.av_hop = mcg_resp->HopLimit;
5087 		mcg_info_p->mc_adds_vect.av_send_grh = B_TRUE;
5088 		mcg_info_p->mc_adds_vect.av_dlid = mcg_resp->MLID;
5089 		mcg_info_p->mc_mtu = mcg_resp->MTU;
5090 		mcg_info_p->mc_qkey = mcg_resp->Q_Key;
5091 
5092 		retval = ibt_pkey2index_byguid(hca_port.hp_hca_guid,
5093 		    hca_port.hp_port, mcg_resp->P_Key, &mcg_info_p->mc_pkey_ix);
5094 		if (retval != IBT_SUCCESS) {
5095 			IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
5096 			    "Pkey2Index Conversion failed<%d>", retval);
5097 			mcg_info_p->mc_pkey_ix = 0;
5098 		}
5099 
5100 		mcg_info_p->mc_scope = mcg_resp->Scope;
5101 		mcg_info_p->mc_pkt_lt = mcg_resp->PacketLifeTime;
5102 
5103 		mcg_info_p->mc_adds_vect.av_port_num = hca_port.hp_port;
5104 		mcg_info_p->mc_adds_vect.av_sgid_ix = hca_port.hp_sgid_ix;
5105 		mcg_info_p->mc_adds_vect.av_src_path = 0;
5106 
5107 		/* Add or Incr the matching MCG entry. */
5108 		ibcm_add_incr_mcg_entry(&mcg_req, mcg_resp);
5109 		/* Deallocate the memory allocated by SA for mcg_resp. */
5110 		kmem_free(mcg_resp, length);
5111 
5112 		retval = IBT_SUCCESS;
5113 	} else {
5114 		retval = IBT_MCG_RECORDS_NOT_FOUND;
5115 		IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
5116 		    "MCG RECORDS NOT FOUND");
5117 	}
5118 
5119 ibcm_join_mcg_exit:
5120 	ibcm_dec_hca_acc_cnt(hcap);
5121 
5122 ibcm_join_mcg_exit1:
5123 	if (mcg_arg->func)
5124 		(*(mcg_arg->func))(mcg_arg->arg, retval, mcg_info_p);
5125 
5126 	kmem_free(mcg_arg, sizeof (ibcm_join_mcg_tqarg_t));
5127 
5128 	return (retval);
5129 }
5130 
5131 
5132 /*
5133  * Function:
5134  *	ibt_leave_mcg
5135  * Input:
5136  *	rgid		The request GID that defines the HCA port upon which
5137  *			to send the request to the Subnet Administrator, to
5138  *			remove the specified port (port_gid) from the multicast
5139  *			group.  If 'port_gid' is the Reserved GID (i.e.
5140  *			port_gid.gid_prefix = 0 and port_gid.gid_guid = 0),
5141  *			then the end-port associated with 'rgid' is removed
5142  *			from the multicast group.
5143  *
5144  *	mc_gid		A multicast group GID as returned from ibt_join_mcg()
5145  *			call.  This is optional, if not specified (i.e.
5146  *			mc_gid.gid_prefix has 0xFF in its upper 8 bits to
5147  *			identify this as being a multicast GID), then the
5148  *			port is removed from all the multicast groups of
5149  *			which it is a member.
5150  *
5151  *	port_gid	This is optional, if not the Reserved GID (gid_prefix
5152  *			and gid_guid not equal to 0), then this specifies the
5153  *			endport GID of the multicast group member being deleted
5154  *			from the group. If it is the Reserved GID (gid_prefix
5155  *			and gid_guid equal to 0) then the member endport GID is
5156  *			determined from 'rgid'.
5157  *
5158  *	mc_join_state	The Join State attribute used when the group was joined
5159  *			using ibt_join_mcg(). This Join State component must
5160  *			contains at least one bit set to 1 in the same position
5161  *			as that used during ibt_join_mcg(). i.e. the logical
5162  *			AND of the two JoinState components is not all zeros.
5163  *			This Join State component must not have some bits set
5164  *			which are not set using ibt_join_mcg().
5165  * Output:
5166  *	None.
5167  * Returns:
5168  *	IBT_SUCCESS
5169  *	IBT_INVALID_PARAM
5170  *	IBT_MC_GROUP_INVALID
5171  *	IBT_INSUFF_RESOURCE
5172  * Description:
5173  *	The port associated with the port GID shall be removed from the
5174  *	multicast group specified by MGID (mc_gid) or from all the multicast
5175  *	groups of which it is a member if the MGID (mc_gid) is not specified.
5176  *
5177  *	The last full member to leave causes the destruction of the Multicast
5178  *	Group.
5179  */
5180 ibt_status_t
5181 ibt_leave_mcg(ib_gid_t rgid, ib_gid_t mc_gid, ib_gid_t port_gid,
5182     uint8_t mc_join_state)
5183 {
5184 	sa_mcmember_record_t	mcg_req;
5185 	ibmf_saa_access_args_t	access_args;
5186 	ibmf_saa_handle_t	saa_handle;
5187 	uint64_t		component_mask = 0;
5188 	int			sa_retval;
5189 	ibt_status_t		retval;
5190 	ibcm_status_t		ret;
5191 	ibtl_cm_hca_port_t	hca_port;
5192 	size_t			length;
5193 	void			*results_p;
5194 	ibcm_hca_info_t		*hcap;
5195 	uint8_t			jstate = 0;
5196 
5197 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg(%llX:%llX, %llX:%llX)",
5198 	    rgid.gid_prefix, rgid.gid_guid, mc_gid.gid_prefix, mc_gid.gid_guid);
5199 
5200 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg(%llX:%llX, 0x%X)",
5201 	    port_gid.gid_prefix, port_gid.gid_guid, mc_join_state);
5202 
5203 	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
5204 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: RequestGID is required");
5205 		return (IBT_INVALID_PARAM);
5206 	}
5207 
5208 	bzero(&mcg_req, sizeof (sa_mcmember_record_t));
5209 
5210 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: MGID: %llX%llX",
5211 	    mc_gid.gid_prefix, mc_gid.gid_guid);
5212 
5213 	/* Validate MGID */
5214 	if ((mc_gid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
5215 		mcg_req.MGID = mc_gid;
5216 		component_mask |= SA_MC_COMPMASK_MGID;
5217 	} else if ((mc_gid.gid_prefix != 0) || (mc_gid.gid_guid != 0)) {
5218 		IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Invalid MGID specified");
5219 		return (IBT_MC_MGID_INVALID);
5220 	}
5221 
5222 	if ((port_gid.gid_prefix == 0) || (port_gid.gid_guid == 0)) {
5223 		mcg_req.PortGID = rgid;
5224 	} else {
5225 		IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Performing PROXY Leave");
5226 		mcg_req.PortGID = port_gid;
5227 	}
5228 	component_mask |= SA_MC_COMPMASK_PORTGID;
5229 
5230 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Port GID <%llX:%llX>",
5231 	    mcg_req.PortGID.gid_prefix, mcg_req.PortGID.gid_guid);
5232 
5233 	/* Join State */
5234 	mcg_req.JoinState = mc_join_state;
5235 	component_mask |= SA_MC_COMPMASK_JOINSTATE;
5236 
5237 	ret = ibcm_del_decr_mcg_entry(&mcg_req, &jstate);
5238 	if (ret == IBCM_LOOKUP_EXISTS) {
5239 		IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Multiple JoinMCG record "
5240 		    " still exists, we shall leave for last leave_mcg call");
5241 		return (IBT_SUCCESS);
5242 	} else if (ret == IBCM_LOOKUP_FAIL) {
5243 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: No Record found, "
5244 		    "continue with leave_mcg call");
5245 	} else if ((ret == IBCM_SUCCESS) && (jstate != 0)) {
5246 		/*
5247 		 * Update with cached "jstate", as this will be OR'ed of
5248 		 * all ibt_join_mcg() calls for this record.
5249 		 */
5250 		mcg_req.JoinState = jstate;
5251 	}
5252 
5253 	retval = ibtl_cm_get_hca_port(rgid, 0, &hca_port);
5254 	if (retval != IBT_SUCCESS) {
5255 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: Failed to get port info "
5256 		    "from specified RGID : status = %d", retval);
5257 		return (retval);
5258 	}
5259 
5260 	/* Get SA Access Handle. */
5261 	hcap = ibcm_find_hca_entry(hca_port.hp_hca_guid);
5262 	if (hcap == NULL) {
5263 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: "
5264 		    "NO HCA found");
5265 		return (IBT_HCA_BUSY_DETACHING);
5266 	}
5267 
5268 	saa_handle = ibcm_get_saa_handle(hcap, hca_port.hp_port);
5269 	if (saa_handle == NULL) {
5270 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: saa_handle is NULL");
5271 		ibcm_dec_hca_acc_cnt(hcap);
5272 		return (IBT_HCA_PORT_NOT_ACTIVE);
5273 	}
5274 
5275 	/* Contact SA Access */
5276 	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
5277 	access_args.sq_access_type = IBMF_SAA_DELETE;
5278 	access_args.sq_component_mask = component_mask;
5279 	access_args.sq_template = &mcg_req;
5280 	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
5281 	access_args.sq_callback = NULL;
5282 	access_args.sq_callback_arg = NULL;
5283 
5284 	ibcm_sa_access_enter();
5285 
5286 	sa_retval = ibmf_sa_access(saa_handle, &access_args, 0, &length,
5287 	    &results_p);
5288 	if (sa_retval != IBMF_SUCCESS) {
5289 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: SA access Failed: %d",
5290 		    sa_retval);
5291 		(void) ibcm_ibmf_analyze_error(sa_retval);
5292 		retval = IBT_MC_GROUP_INVALID;
5293 	}
5294 
5295 	ibcm_sa_access_exit();
5296 
5297 	ibcm_dec_hca_acc_cnt(hcap);
5298 
5299 	return (retval);
5300 }
5301 
5302 
5303 /*
5304  * Function:
5305  *	ibt_query_mcg
5306  * Input:
5307  *	rgid		The request GID that defines the HCA port upon which
5308  *			to send the request to the Subnet Administrator, to
5309  *			retrieve Multicast Records matching attributes as
5310  *			specified through 'mcg_attr' argument.
5311  *
5312  *	mcg_attr	NULL or a pointer to an ibt_mcg_attr_t structure that
5313  *			specifies MCG attributes that are to be matched.
5314  *			Attributes that are not required can be wild carded
5315  *			by specifying as '0'.
5316  *
5317  *	mcgs_max_num	The maximum number of matching multicast groups to
5318  *			return.  If zero, then all available matching multicast
5319  *			groups are returned.
5320  * Output:
5321  *	mcgs_info_p	The address of an ibt_mcg_info_t pointer, where
5322  *			multicast group information is returned. The actual
5323  *			number of entries filled in the array is returned in
5324  *			entries_p.
5325  *
5326  *	entries_p	The number of ibt_mcg_attr_t entries returned.
5327  * Returns:
5328  *	IBT_SUCCESS
5329  *	IBT_INVALID_PARAM
5330  *	IBT_MCG_RECORDS_NOT_FOUND
5331  * Description:
5332  *	Request information on multicast groups that match the parameters
5333  *	specified in mcg_attr. Information on each multicast group is returned
5334  *	to the caller in the form of an array of ibt_mcg_info_t.
5335  *	ibt_query_mcg() allocates the memory for this array and returns a
5336  *	pointer to the array (mcgs_p) and the number of entries in the array
5337  *	(entries_p). This memory should be freed by the client using
5338  *	ibt_free_mcg_info().
5339  */
5340 ibt_status_t
5341 ibt_query_mcg(ib_gid_t rgid, ibt_mcg_attr_t *mcg_attr, uint_t mcgs_max_num,
5342     ibt_mcg_info_t **mcgs_info_p, uint_t *entries_p)
5343 {
5344 	sa_mcmember_record_t	mcg_req;
5345 	sa_mcmember_record_t	*mcg_resp;
5346 	ibt_mcg_info_t		*mcg_infop;
5347 	ibmf_saa_access_args_t	access_args;
5348 	ibmf_saa_handle_t	saa_handle;
5349 	uint64_t		component_mask = 0;
5350 	ibt_status_t		retval;
5351 	ibtl_cm_hca_port_t	hport;
5352 	uint_t			num_records;
5353 	size_t			length;
5354 	void			*results_p;
5355 	ib_gid_t		port_gid;
5356 	ibcm_hca_info_t		*hcap;
5357 
5358 	IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg(%p, %d)", mcg_attr, mcgs_max_num);
5359 
5360 	if ((entries_p == NULL) || (mcgs_info_p == NULL)) {
5361 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: "
5362 		    "entries_p or mcgs_info_p is NULL");
5363 		return (IBT_INVALID_PARAM);
5364 	}
5365 
5366 	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
5367 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: RequestGID is required");
5368 		return (IBT_INVALID_PARAM);
5369 	}
5370 	IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: Request GID <%llX:%llX>",
5371 	    rgid.gid_prefix, rgid.gid_guid);
5372 
5373 	bzero(&mcg_req, sizeof (sa_mcmember_record_t));
5374 	port_gid.gid_prefix = port_gid.gid_guid = 0;
5375 
5376 	if (mcg_attr != NULL) {
5377 		port_gid = mcg_attr->mc_pgid;
5378 
5379 		if ((port_gid.gid_prefix != 0) && (port_gid.gid_guid != 0)) {
5380 			mcg_req.PortGID = mcg_attr->mc_pgid;
5381 			component_mask |= SA_MC_COMPMASK_PORTGID;
5382 
5383 			IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: PGID %llX:%llX",
5384 			    port_gid.gid_prefix, port_gid.gid_guid);
5385 		}
5386 
5387 		/* Is Q_Key specified. */
5388 		if (mcg_attr->mc_qkey != 0) {
5389 			mcg_req.Q_Key = mcg_attr->mc_qkey;
5390 			component_mask |= SA_MC_COMPMASK_QKEY;
5391 		}
5392 
5393 		/* Is P_Key specified. */
5394 		if (mcg_attr->mc_pkey != 0) {
5395 			mcg_req.P_Key = mcg_attr->mc_pkey;
5396 			component_mask |= SA_MC_COMPMASK_PKEY;
5397 		}
5398 
5399 		/* Is MGID specified. */
5400 		if ((mcg_attr->mc_mgid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
5401 			mcg_req.MGID = mcg_attr->mc_mgid;
5402 			component_mask |= SA_MC_COMPMASK_MGID;
5403 		}
5404 
5405 		/* Is MTU specified. */
5406 		if (mcg_attr->mc_mtu_req.r_mtu) {
5407 			mcg_req.MTU = mcg_attr->mc_mtu_req.r_mtu;
5408 			mcg_req.MTUSelector = mcg_attr->mc_mtu_req.r_selector;
5409 
5410 			component_mask |= SA_MC_COMPMASK_MTUSELECTOR |
5411 			    SA_MC_COMPMASK_MTU;
5412 		}
5413 
5414 		if (mcg_attr->mc_tclass) {
5415 			mcg_req.TClass = mcg_attr->mc_tclass;
5416 			component_mask |= SA_MC_COMPMASK_TCLASS;
5417 		}
5418 
5419 		/* Is RATE specified. */
5420 		if (mcg_attr->mc_rate_req.r_srate) {
5421 			mcg_req.Rate = mcg_attr->mc_rate_req.r_srate;
5422 			mcg_req.RateSelector = mcg_attr->mc_rate_req.r_selector;
5423 
5424 			component_mask |= SA_MC_COMPMASK_RATESELECTOR |
5425 			    SA_MC_COMPMASK_RATE;
5426 		}
5427 
5428 		/* Is Packet Life Time specified. */
5429 		if (mcg_attr->mc_pkt_lt_req.p_pkt_lt) {
5430 			mcg_req.Rate = mcg_attr->mc_pkt_lt_req.p_pkt_lt;
5431 			mcg_req.RateSelector =
5432 			    mcg_attr->mc_pkt_lt_req.p_selector;
5433 
5434 			component_mask |= SA_MC_COMPMASK_PKTLTSELECTOR |
5435 			    SA_MC_COMPMASK_PKTLT;
5436 		}
5437 
5438 		if (mcg_attr->mc_hop) {
5439 			mcg_req.HopLimit = mcg_attr->mc_hop;
5440 			component_mask |= SA_MC_COMPMASK_HOPLIMIT;
5441 		}
5442 
5443 		if (mcg_attr->mc_flow) {
5444 			mcg_req.FlowLabel = mcg_attr->mc_flow;
5445 			component_mask |= SA_MC_COMPMASK_FLOWLABEL;
5446 		}
5447 
5448 		if (mcg_attr->mc_sl) {
5449 			mcg_req.SL = mcg_attr->mc_sl;
5450 			component_mask |= SA_MC_COMPMASK_SL;
5451 		}
5452 
5453 		if (mcg_attr->mc_scope) {
5454 			mcg_req.Scope = mcg_attr->mc_scope;
5455 			component_mask |= SA_MC_COMPMASK_SCOPE;
5456 		}
5457 
5458 		if (mcg_attr->mc_join_state) {
5459 			mcg_req.JoinState = mcg_attr->mc_join_state;
5460 			component_mask |= SA_MC_COMPMASK_JOINSTATE;
5461 		}
5462 
5463 		if (mcg_attr->mc_mlid) {
5464 			mcg_req.MLID = mcg_attr->mc_mlid;
5465 			component_mask |= SA_MC_COMPMASK_MLID;
5466 		}
5467 	}
5468 
5469 	retval = ibtl_cm_get_hca_port(rgid, 0, &hport);
5470 	if (retval != IBT_SUCCESS) {
5471 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: Failed to get port info "
5472 		    "from specified RGID : status = %d", retval);
5473 		return (retval);
5474 	}
5475 
5476 	/* Get SA Access Handle. */
5477 	hcap = ibcm_find_hca_entry(hport.hp_hca_guid);
5478 	if (hcap == NULL) {
5479 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: NO HCA found");
5480 		return (IBT_HCA_BUSY_DETACHING);
5481 	}
5482 
5483 	saa_handle = ibcm_get_saa_handle(hcap, hport.hp_port);
5484 	if (saa_handle == NULL) {
5485 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: saa_handle is NULL");
5486 		ibcm_dec_hca_acc_cnt(hcap);
5487 		return (IBT_HCA_PORT_NOT_ACTIVE);
5488 	}
5489 
5490 	/* Contact SA Access */
5491 	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
5492 	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
5493 	access_args.sq_component_mask = component_mask;
5494 	access_args.sq_template = &mcg_req;
5495 	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
5496 	access_args.sq_callback = NULL;
5497 	access_args.sq_callback_arg = NULL;
5498 
5499 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
5500 	    &results_p);
5501 	if (retval != IBT_SUCCESS) {
5502 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: SA access Failed");
5503 		ibcm_dec_hca_acc_cnt(hcap);
5504 		return (retval);
5505 	}
5506 
5507 	num_records = length/sizeof (sa_mcmember_record_t);
5508 
5509 	IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: Found %d MCMember Records",
5510 	    num_records);
5511 
5512 	/* Validate the returned number of records. */
5513 	if ((results_p != NULL) && (num_records > 0)) {
5514 		uint_t	i;
5515 
5516 		/*
5517 		 * If mcgs_max_num is zero, then return all records else
5518 		 * return only requested number of records
5519 		 */
5520 		if ((mcgs_max_num != 0) && (num_records > mcgs_max_num)) {
5521 			/* we are interested in only mcgs_max_num records */
5522 			num_records = mcgs_max_num;
5523 		}
5524 
5525 		/*
5526 		 * The SGID returned in "mcg_info_p" buffer should be PortGID,
5527 		 * (mcg_attr->mc_pgid), if 'mcg_attr->mc_pgid' was specified,
5528 		 * else RequestGID (rgid) should be returned.
5529 		 */
5530 		if ((port_gid.gid_prefix != 0) && (port_gid.gid_guid != 0)) {
5531 
5532 			/* Get sgid_ix and port number of 'port_gid' */
5533 			retval = ibtl_cm_get_hca_port(port_gid, 0, &hport);
5534 			if (retval != IBT_SUCCESS) {
5535 				IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: "
5536 				    "Failed to Get Portinfo for PortGID :"
5537 				    "status = %d", retval);
5538 				return (retval);
5539 			}
5540 		} else {
5541 			/*
5542 			 * The sgid_ix and port number related to RequestGID
5543 			 * are already obtained at the beginning.
5544 			 */
5545 			port_gid = rgid;
5546 		}
5547 
5548 		/*
5549 		 * Allocate memory for return buffer, to be freed in
5550 		 * ibt_free_mcg_info().
5551 		 */
5552 		mcg_infop = kmem_alloc((num_records * sizeof (ibt_mcg_info_t)),
5553 		    KM_SLEEP);
5554 
5555 		*mcgs_info_p = mcg_infop;
5556 		*entries_p = num_records;
5557 
5558 		/* Update the return values. */
5559 		for (i = 0; i < num_records; i++) {
5560 
5561 			mcg_resp = (sa_mcmember_record_t *)((uchar_t *)
5562 			    results_p + i * sizeof (sa_mcmember_record_t));
5563 
5564 			mcg_infop[i].mc_adds_vect.av_dgid = mcg_resp->MGID;
5565 			mcg_infop[i].mc_adds_vect.av_sgid = port_gid;
5566 			mcg_infop[i].mc_adds_vect.av_srate = mcg_resp->Rate;
5567 			mcg_infop[i].mc_adds_vect.av_srvl = mcg_resp->SL;
5568 			mcg_infop[i].mc_adds_vect.av_flow = mcg_resp->FlowLabel;
5569 			mcg_infop[i].mc_adds_vect.av_tclass = mcg_resp->TClass;
5570 			mcg_infop[i].mc_adds_vect.av_hop = mcg_resp->HopLimit;
5571 			mcg_infop[i].mc_adds_vect.av_port_num = hport.hp_port;
5572 			mcg_infop[i].mc_adds_vect.av_send_grh = B_TRUE;
5573 			mcg_infop[i].mc_adds_vect.av_dlid = mcg_resp->MLID;
5574 			mcg_infop[i].mc_adds_vect.av_sgid_ix = hport.hp_sgid_ix;
5575 			mcg_infop[i].mc_adds_vect.av_src_path = 0;
5576 			mcg_infop[i].mc_mtu = mcg_resp->MTU;
5577 			mcg_infop[i].mc_qkey = mcg_resp->Q_Key;
5578 			mcg_infop[i].mc_scope = mcg_resp->Scope;
5579 			mcg_infop[i].mc_pkt_lt = mcg_resp->PacketLifeTime;
5580 
5581 			if (ibt_pkey2index_byguid(hport.hp_hca_guid,
5582 			    hport.hp_port, mcg_resp->P_Key,
5583 			    &mcg_infop[i].mc_pkey_ix) != IBT_SUCCESS) {
5584 				IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: "
5585 				    "Pkey2Index Conversion failed");
5586 				mcg_infop[i].mc_pkey_ix = 0;
5587 			}
5588 		}
5589 
5590 		/*
5591 		 * Deallocate the memory allocated by SA for results_p.
5592 		 */
5593 		kmem_free(results_p, length);
5594 		retval = IBT_SUCCESS;
5595 
5596 		IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: returning %d MCGRecords",
5597 		    num_records);
5598 
5599 	} else {
5600 		retval = IBT_MCG_RECORDS_NOT_FOUND;
5601 		*entries_p = 0;
5602 
5603 		IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: MCG RECORDS NOT FOUND");
5604 	}
5605 
5606 	ibcm_dec_hca_acc_cnt(hcap);
5607 
5608 	return (retval);
5609 }
5610 
5611 
5612 /*
5613  * ibt_free_mcg_info()
5614  *	Free the memory allocated by successful ibt_query_mcg()
5615  *
5616  *	mcgs_info	Pointer returned by ibt_query_mcg().
5617  *
5618  *	entries		The number of ibt_mcg_info_t entries to free.
5619  */
5620 void
5621 ibt_free_mcg_info(ibt_mcg_info_t *mcgs_info, uint_t entries)
5622 {
5623 	IBTF_DPRINTF_L3(cmlog, "ibt_free_mcg_info: "
5624 	    "Free <%d> entries from 0x%p", entries, mcgs_info);
5625 
5626 	if ((mcgs_info != NULL) && (entries > 0))
5627 		kmem_free(mcgs_info, entries * sizeof (ibt_mcg_info_t));
5628 	else
5629 		IBTF_DPRINTF_L2(cmlog, "ibt_free_mcg_info: "
5630 		    "ERROR: NULL buf pointer or length specified.");
5631 }
5632 
5633 
5634 /*
5635  * Function:
5636  *	ibt_gid_to_node_info()
5637  * Input:
5638  *	gid		Identifies the IB Node and port for which to obtain
5639  *			Node information.
5640  * Output:
5641  *	node_info_p	A pointer to an ibt_node_info_t structure (allocated
5642  *			by the caller) in which to return the node information.
5643  * Returns:
5644  *	IBT_SUCCESS
5645  *	IBT_INVALID_PARAM
5646  *	IBT_NODE_RECORDS_NOT_FOUND
5647  *	IBT_NO_HCAS_AVAILABLE
5648  * Description:
5649  *	Retrieve Node Information for the specified GID.
5650  */
5651 ibt_status_t
5652 ibt_gid_to_node_info(ib_gid_t gid, ibt_node_info_t *node_info_p)
5653 {
5654 	sa_node_record_t	nr_req, *nr_resp;
5655 	ibmf_saa_handle_t	saa_handle;
5656 	ibt_status_t		retval;
5657 	ibcm_hca_info_t		*hcap;
5658 	ibtl_cm_hca_port_t	hport;
5659 	int			i, j;
5660 	uint_t			num_rec;
5661 	ib_guid_t		*guid_array = NULL;
5662 	sa_path_record_t	*path;
5663 	size_t			len;
5664 	uint8_t			npaths;
5665 	uint32_t		num_hcas = 0;
5666 	ib_lid_t		node_lid;
5667 	boolean_t		local_node = B_FALSE;
5668 	void			*res_p;
5669 	uint8_t			num_ports = 0;
5670 
5671 
5672 	IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info(%llX:%llX, %p)",
5673 	    gid.gid_prefix, gid.gid_guid, node_info_p);
5674 
5675 	if ((gid.gid_prefix == 0) || (gid.gid_guid == 0)) {
5676 		IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: GID is required");
5677 		return (IBT_INVALID_PARAM);
5678 	}
5679 
5680 	if (node_info_p == NULL) {
5681 		IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5682 		    "Return Buf (node_info_p) is NULL.");
5683 		return (IBT_INVALID_PARAM);
5684 	}
5685 
5686 	/*
5687 	 * If 'gid' is on local node, then get node lid (i.e. base lid of the
5688 	 * associated port) info via ibtl_cm_get_hca_port() call.
5689 	 */
5690 	bzero(&hport, sizeof (ibtl_cm_hca_port_t));
5691 	if (ibtl_cm_get_hca_port(gid, 0, &hport) == IBT_SUCCESS) {
5692 
5693 		hcap = ibcm_find_hca_entry(hport.hp_hca_guid);
5694 		if (hcap == NULL) {
5695 			IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5696 			    "HCA(%llX) info not found", hport.hp_hca_guid);
5697 			return (IBT_NO_HCAS_AVAILABLE);
5698 		}
5699 		num_ports = 1;
5700 		num_hcas = 1;
5701 		node_lid = hport.hp_base_lid;
5702 		local_node = B_TRUE;
5703 		IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info: Local Node: "
5704 		    "LID = 0x%X", node_lid);
5705 	} else {
5706 		/* Get the number of HCAs and their GUIDs */
5707 		num_hcas = ibt_get_hca_list(&guid_array);
5708 		IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info: ibt_get_hca_list "
5709 		    "returned %d hcas", num_hcas);
5710 
5711 		if (num_hcas == 0) {
5712 			IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5713 			    "NO HCA's Found on this system");
5714 			return (IBT_NO_HCAS_AVAILABLE);
5715 		}
5716 	}
5717 
5718 	for (i = 0; i < num_hcas; i++) {
5719 		if (local_node == B_FALSE) {
5720 			hcap = ibcm_find_hca_entry(guid_array[i]);
5721 			if (hcap == NULL) {
5722 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5723 				    "HCA(%llX) info not found", guid_array[i]);
5724 				retval = IBT_NO_HCAS_AVAILABLE;
5725 				continue;
5726 			}
5727 			num_ports = hcap->hca_num_ports;
5728 		}
5729 
5730 		for (j = 0; j < num_ports; j++) {
5731 			uint8_t		port = 0;
5732 
5733 			if (local_node == B_TRUE)
5734 				port = hport.hp_port;
5735 			else
5736 				port = j + 1;
5737 
5738 			/* Get SA Access Handle. */
5739 			saa_handle = ibcm_get_saa_handle(hcap, port);
5740 			if (saa_handle == NULL) {
5741 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5742 				    "Port %d of HCA (%llX) is NOT ACTIVE",
5743 				    port, hport.hp_hca_guid);
5744 				retval = IBT_NODE_RECORDS_NOT_FOUND;
5745 				continue;
5746 			}
5747 
5748 			if (local_node == B_FALSE) {
5749 				ib_gid_t	sgid;
5750 				int		sa_ret;
5751 
5752 				/*
5753 				 * Check whether 'gid' and this port has same
5754 				 * subnet prefix. If not, then there is no use
5755 				 * in searching from this port.
5756 				 */
5757 				sgid = hcap->hca_port_info[j].port_sgid0;
5758 				if (gid.gid_prefix != sgid.gid_prefix) {
5759 					IBTF_DPRINTF_L3(cmlog,
5760 					    "ibt_gid_to_node_info:Sn_Prefix of "
5761 					    "GID(%llX) and Port's(%llX) differ",
5762 					    gid.gid_prefix, sgid.gid_prefix);
5763 					retval = IBT_NODE_RECORDS_NOT_FOUND;
5764 					continue;
5765 				}
5766 
5767 				/*
5768 				 * First Get Path Records for the specified DGID
5769 				 * from this port (SGID). From Path Records,
5770 				 * note down DLID, then use this DLID as Input
5771 				 * attribute to get NodeRecords from SA Access.
5772 				 */
5773 				npaths = 1;
5774 				path = NULL;
5775 
5776 				sa_ret = ibmf_saa_gid_to_pathrecords(saa_handle,
5777 				    sgid, gid, 0, 0, B_TRUE, &npaths, 0, &len,
5778 				    &path);
5779 				if (sa_ret != IBMF_SUCCESS) {
5780 					IBTF_DPRINTF_L2(cmlog,
5781 					    "ibt_gid_to_node_info: "
5782 					    "ibmf_saa_gid_to_pathrecords() "
5783 					    "returned error: %d ", sa_ret);
5784 					retval =
5785 					    ibcm_ibmf_analyze_error(sa_ret);
5786 					continue;
5787 				} else if ((npaths == 0) || (path == NULL)) {
5788 					IBTF_DPRINTF_L3(cmlog,
5789 					    "ibt_gid_to_node_info: failed (%d) "
5790 					    "to get path records for the DGID "
5791 					    "0x%llX from SGID 0x%llX", sa_ret,
5792 					    gid.gid_guid, sgid.gid_guid);
5793 					retval = IBT_NODE_RECORDS_NOT_FOUND;
5794 					continue;
5795 				}
5796 				node_lid = path->DLID;	/* LID */
5797 
5798 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5799 				    "Remote Node: LID = 0x%X", node_lid);
5800 
5801 				/* Free SA_Access memory for path record. */
5802 				kmem_free(path, len);
5803 			}
5804 
5805 			/* Retrieve Node Records from SA Access. */
5806 			bzero(&nr_req, sizeof (sa_node_record_t));
5807 
5808 			nr_req.LID = node_lid;	/* LID */
5809 
5810 			retval = ibcm_get_node_rec(saa_handle, &nr_req,
5811 			    SA_NODEINFO_COMPMASK_NODELID, &res_p, &len);
5812 			if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
5813 				IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5814 				    "failed (%d) to get Node records", retval);
5815 				continue;
5816 			} else if (retval != IBT_SUCCESS) {
5817 				IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5818 				    "failed (%d) to get Node records", retval);
5819 				ibcm_dec_hca_acc_cnt(hcap);
5820 				goto gid_to_ni_exit;
5821 			}
5822 
5823 			num_rec = len/sizeof (sa_node_record_t);
5824 			nr_resp = (sa_node_record_t *)(uchar_t *)res_p;
5825 
5826 			/* Validate the returned number of records. */
5827 			if ((nr_resp != NULL) && (num_rec > 0)) {
5828 
5829 				IBCM_DUMP_NODE_REC(nr_resp);
5830 
5831 				_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(
5832 				    *node_info_p))
5833 
5834 				node_info_p->n_sys_img_guid =
5835 				    nr_resp->NodeInfo.SystemImageGUID;
5836 				node_info_p->n_node_guid =
5837 				    nr_resp->NodeInfo.NodeGUID;
5838 				node_info_p->n_port_guid =
5839 				    nr_resp->NodeInfo.PortGUID;
5840 				node_info_p->n_dev_id =
5841 				    nr_resp->NodeInfo.DeviceID;
5842 				node_info_p->n_revision =
5843 				    nr_resp->NodeInfo.Revision;
5844 				node_info_p->n_vendor_id =
5845 				    nr_resp->NodeInfo.VendorID;
5846 				node_info_p->n_num_ports =
5847 				    nr_resp->NodeInfo.NumPorts;
5848 				node_info_p->n_port_num =
5849 				    nr_resp->NodeInfo.LocalPortNum;
5850 				node_info_p->n_node_type =
5851 				    nr_resp->NodeInfo.NodeType;
5852 				(void) strncpy(node_info_p->n_description,
5853 				    (char *)&nr_resp->NodeDescription, 64);
5854 
5855 				_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(
5856 				    *node_info_p))
5857 
5858 				/*
5859 				 * Deallocate the memory allocated by SA for
5860 				 * 'nr_resp'.
5861 				 */
5862 				ibcm_dec_hca_acc_cnt(hcap);
5863 				kmem_free(nr_resp, len);
5864 				retval = IBT_SUCCESS;
5865 
5866 				goto gid_to_ni_exit;
5867 			} else {
5868 				retval = IBT_NODE_RECORDS_NOT_FOUND;
5869 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5870 				    "Node Records NOT found - PortGUID %016llX",
5871 				    gid.gid_guid);
5872 			}
5873 		}
5874 		ibcm_dec_hca_acc_cnt(hcap);
5875 
5876 		if (local_node == B_TRUE)
5877 			break;
5878 	}
5879 
5880 gid_to_ni_exit:
5881 	if (guid_array)
5882 		ibt_free_hca_list(guid_array, num_hcas);
5883 
5884 	IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: done. Status %d", retval);
5885 
5886 	return (retval);
5887 }
5888 
5889 
5890 ibt_status_t
5891 ibcm_get_node_rec(ibmf_saa_handle_t saa_handle, sa_node_record_t *nr_req,
5892     uint64_t component_mask, void *result_p, size_t *len)
5893 {
5894 	ibmf_saa_access_args_t  args;
5895 	size_t			length;
5896 	ibt_status_t		retval;
5897 
5898 	args.sq_attr_id = SA_NODERECORD_ATTRID;
5899 	args.sq_template = nr_req;
5900 	args.sq_access_type = IBMF_SAA_RETRIEVE;
5901 	args.sq_template_length = sizeof (sa_node_record_t);
5902 	args.sq_component_mask = component_mask;
5903 	args.sq_callback = NULL;
5904 	args.sq_callback_arg = NULL;
5905 
5906 	retval = ibcm_contact_sa_access(saa_handle, &args, &length, result_p);
5907 	if (retval != IBT_SUCCESS) {
5908 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_node_rec: SA Call Failed");
5909 		return (retval);
5910 	}
5911 
5912 	*len = length;
5913 
5914 	/* Validate the returned number of records. */
5915 	if ((result_p != NULL) && (length > 0)) {
5916 		IBTF_DPRINTF_L3(cmlog, "ibcm_get_node_rec: Node Records FOUND");
5917 
5918 		/* Got it, done!. */
5919 		return (IBT_SUCCESS);
5920 	} else {
5921 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_node_rec: Node Rec NOT found");
5922 		return (IBT_NODE_RECORDS_NOT_FOUND);
5923 	}
5924 }
5925 
5926 
5927 /*
5928  * Function:
5929  *	ibt_get_companion_port_gids()
5930  * Description:
5931  *	Get list of GID's available on a companion port(s) of the specified
5932  *	GID or list of GIDs available on a specified Node GUID/SystemImage GUID.
5933  */
5934 ibt_status_t
5935 ibt_get_companion_port_gids(ib_gid_t gid, ib_guid_t hca_guid,
5936     ib_guid_t sysimg_guid, ib_gid_t **gids_p, uint_t *num_gids_p)
5937 {
5938 	sa_node_record_t	nr_req, *nr_resp;
5939 	void			*res_p;
5940 	ibmf_saa_handle_t	saa_handle;
5941 	int			sa_ret;
5942 	ibt_status_t		retval = IBT_SUCCESS;
5943 	ibcm_hca_info_t		*hcap;
5944 	ibtl_cm_hca_port_t	hport;
5945 	int			i, j;
5946 	uint_t			num_rec;
5947 	ib_guid_t		*guid_array = NULL;
5948 	sa_path_record_t	*path;
5949 	size_t			len;
5950 	uint8_t			npaths;
5951 	uint32_t		num_hcas = 0;
5952 	boolean_t		local_node = B_FALSE;
5953 	boolean_t		local_hca = B_FALSE;
5954 	ib_guid_t		h_guid = hca_guid;
5955 	ib_gid_t		*gidp = NULL, *t_gidp = NULL;
5956 	int			multi_hca_loop = 0;
5957 
5958 	IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids(%llX:%llX, %llX, "
5959 	    "%llX)", gid.gid_prefix, gid.gid_guid, hca_guid, sysimg_guid);
5960 
5961 	if (((gid.gid_prefix == 0) || (gid.gid_guid == 0)) && (hca_guid == 0) &&
5962 	    (sysimg_guid == 0)) {
5963 		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
5964 		    "Null Input attribute specified.");
5965 		return (IBT_INVALID_PARAM);
5966 	}
5967 
5968 	if ((num_gids_p == NULL) || (gids_p == NULL)) {
5969 		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
5970 		    "num_gids_p or gids_p is NULL");
5971 		return (IBT_INVALID_PARAM);
5972 	}
5973 
5974 	*num_gids_p = 0;
5975 
5976 	/* Get the number of HCAs and their GUIDs */
5977 	if ((num_hcas = ibt_get_hca_list(&guid_array)) == 0) {
5978 		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
5979 		    "NO HCA's Found on this system");
5980 		return (IBT_NO_HCAS_AVAILABLE);
5981 	}
5982 
5983 	IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids: "
5984 	    "ibt_get_hca_list() returned %d hcas", num_hcas);
5985 
5986 	/*
5987 	 * If 'gid' is on local node, then get node lid (i.e. base lid of the
5988 	 * associated port) info via ibtl_cm_get_hca_port() call.
5989 	 */
5990 	bzero(&hport, sizeof (ibtl_cm_hca_port_t));
5991 	if ((gid.gid_prefix != 0) && (gid.gid_guid != 0) &&
5992 	    (ibtl_cm_get_hca_port(gid, 0, &hport) == IBT_SUCCESS)) {
5993 
5994 		if ((hca_guid != 0) && (hca_guid != hport.hp_hca_guid)) {
5995 			IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
5996 			    "Invalid GID<->HCAGUID combination specified.");
5997 			retval = IBT_INVALID_PARAM;
5998 			goto get_comp_pgid_exit;
5999 		}
6000 		h_guid = hport.hp_hca_guid;
6001 		local_node = B_TRUE;
6002 
6003 		IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids: "
6004 		    "Local Node: HCA (0x%llX)", h_guid);
6005 	} else if (h_guid) {	/* Is specified HCA GUID - local? */
6006 		for (i = 0; i < num_hcas; i++) {
6007 			if (h_guid == guid_array[i]) {
6008 				local_hca = B_TRUE;
6009 				break;
6010 			}
6011 		}
6012 	} else if (sysimg_guid) { /* Is specified SystemImage GUID - local? */
6013 		for (i = 0; i < num_hcas; i++) {
6014 			ibt_status_t	ret;
6015 			ibt_hca_attr_t	hca_attr;
6016 
6017 			ret = ibt_query_hca_byguid(guid_array[i], &hca_attr);
6018 			if (ret != IBT_SUCCESS) {
6019 				IBTF_DPRINTF_L2(cmlog,
6020 				    "ibt_get_companion_port_gids: HCA(%llX) "
6021 				    "info not found", guid_array[i]);
6022 				retval = IBT_NO_HCAS_AVAILABLE;
6023 				continue;
6024 			}
6025 			if (hca_attr.hca_si_guid == sysimg_guid) {
6026 				if ((hca_guid != 0) &&
6027 				    (hca_guid != hca_attr.hca_node_guid)) {
6028 					IBTF_DPRINTF_L2(cmlog,
6029 					    "ibt_get_companion_port_gids: "
6030 					    "Invalid SysImg<->HCA GUID "
6031 					    "combination specified.");
6032 					retval = IBT_INVALID_PARAM;
6033 					goto get_comp_pgid_exit;
6034 				}
6035 				local_hca = B_TRUE;
6036 				h_guid = hca_attr.hca_node_guid;
6037 				break;
6038 			}
6039 		}
6040 	}
6041 
6042 	if ((local_node == B_TRUE) || (local_hca == B_TRUE)) {
6043 		retval = ibtl_cm_get_local_comp_gids(h_guid, gid, gids_p,
6044 		    num_gids_p);
6045 		goto get_comp_pgid_exit;
6046 	}
6047 
6048 get_comp_for_multihca:
6049 	/* We will be here, if request is for remote node */
6050 	for (i = 0; i < num_hcas; i++) {
6051 		int		multism;
6052 		uint_t		count = 0;
6053 		int		multi_sm_loop = 0;
6054 		uint_t		k = 0, l;
6055 
6056 		hcap = ibcm_find_hca_entry(guid_array[i]);
6057 		if (hcap == NULL) {
6058 			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
6059 			    "HCA(%llX) info not found", guid_array[i]);
6060 			retval = IBT_NO_HCAS_AVAILABLE;
6061 			continue;
6062 		}
6063 
6064 		/* 1 - MultiSM, 0 - Single SM */
6065 		multism = ibtl_cm_is_multi_sm(guid_array[i]);
6066 
6067 		for (j = 0; j < hcap->hca_num_ports; j++) {
6068 			ib_gid_t	sgid;
6069 			uint64_t	c_mask = 0;
6070 			ib_guid_t	pg;
6071 			uint_t		port = j;
6072 
6073 get_comp_for_multism:
6074 			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
6075 			    "Port %d, HCA %llX, MultiSM= %d, Loop=%d",
6076 			    port + 1, h_guid, multism, multi_sm_loop);
6077 
6078 			/* Get SA Access Handle. */
6079 			saa_handle = ibcm_get_saa_handle(hcap, port + 1);
6080 			if (saa_handle == NULL) {
6081 				IBTF_DPRINTF_L2(cmlog,
6082 				    "ibt_get_companion_port_gids: "
6083 				    "Port (%d)  - NOT ACTIVE", port + 1);
6084 				retval = IBT_GIDS_NOT_FOUND;
6085 				continue;
6086 			}
6087 
6088 			/*
6089 			 * Check whether 'gid' and this port has same subnet
6090 			 * prefix. If not, then there is no use in searching
6091 			 * from this port.
6092 			 */
6093 			sgid = hcap->hca_port_info[port].port_sgid0;
6094 			if ((h_guid == 0) && (gid.gid_prefix != 0) &&
6095 			    (multi_sm_loop == 0) &&
6096 			    (gid.gid_prefix != sgid.gid_prefix)) {
6097 				IBTF_DPRINTF_L2(cmlog,
6098 				    "ibt_get_companion_port_gids: SnPrefix of "
6099 				    "GID(%llX) and Port SN_Pfx(%llX) differ",
6100 				    gid.gid_prefix, sgid.gid_prefix);
6101 				retval = IBT_GIDS_NOT_FOUND;
6102 				continue;
6103 			}
6104 
6105 			/*
6106 			 * If HCA GUID or System Image GUID is specified, then
6107 			 * we can achieve our goal sooner!.
6108 			 */
6109 			if ((h_guid == 0) && (sysimg_guid == 0)) {
6110 				/* So only GID info is provided. */
6111 
6112 				/*
6113 				 * First Get Path Records for the specified DGID
6114 				 * from this port (SGID). From Path Records,
6115 				 * note down DLID, then use this DLID as Input
6116 				 * attribute to get NodeRecords.
6117 				 */
6118 				npaths = 1;
6119 				path = NULL;
6120 
6121 				sa_ret = ibmf_saa_gid_to_pathrecords(saa_handle,
6122 				    sgid, gid, 0, 0, B_TRUE, &npaths, 0, &len,
6123 				    &path);
6124 				if (sa_ret != IBMF_SUCCESS) {
6125 					IBTF_DPRINTF_L2(cmlog,
6126 					    "ibt_get_companion_port_gids: "
6127 					    "ibmf_saa_gid_to_pathrecords() "
6128 					    "returned error: %d ", sa_ret);
6129 					retval =
6130 					    ibcm_ibmf_analyze_error(sa_ret);
6131 					ibcm_dec_hca_acc_cnt(hcap);
6132 					goto get_comp_pgid_exit;
6133 				} else if ((npaths == 0) || (path == NULL)) {
6134 					IBTF_DPRINTF_L2(cmlog,
6135 					    "ibt_get_companion_port_gids: "
6136 					    "failed (%d) to get path records "
6137 					    "for the DGID (0x%llX) from SGID "
6138 					    "(0x%llX)", sa_ret, gid.gid_guid,
6139 					    sgid.gid_guid);
6140 					retval = IBT_GIDS_NOT_FOUND;
6141 					continue;
6142 				}
6143 
6144 				bzero(&nr_req, sizeof (sa_node_record_t));
6145 				nr_req.LID = path->DLID;	/* LID */
6146 
6147 				IBTF_DPRINTF_L3(cmlog,
6148 				    "ibt_get_companion_port_gids: "
6149 				    "Remote Node: LID = 0x%X", nr_req.LID);
6150 
6151 				/* Free SA_Access memory for path record. */
6152 				kmem_free(path, len);
6153 
6154 				IBTF_DPRINTF_L3(cmlog,
6155 				    "ibt_get_companion_port_gids: SAA Call: "
6156 				    "based on LID ");
6157 
6158 				retval = ibcm_get_node_rec(saa_handle, &nr_req,
6159 				    SA_NODEINFO_COMPMASK_NODELID, &res_p, &len);
6160 				if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
6161 					IBTF_DPRINTF_L2(cmlog,
6162 					    "ibt_get_companion_port_gids: "
6163 					    "failed (%d) to get Node records",
6164 					    retval);
6165 					continue;
6166 				} else if (retval != IBT_SUCCESS) {
6167 					IBTF_DPRINTF_L2(cmlog,
6168 					    "ibt_get_companion_port_gids: "
6169 					    "failed (%d) to get Node records",
6170 					    retval);
6171 					ibcm_dec_hca_acc_cnt(hcap);
6172 					goto get_comp_pgid_exit;
6173 				}
6174 
6175 				nr_resp = (sa_node_record_t *)(uchar_t *)res_p;
6176 				/* Note down HCA GUID info. */
6177 				h_guid = nr_resp->NodeInfo.NodeGUID;
6178 
6179 				IBTF_DPRINTF_L3(cmlog,
6180 				    "ibt_get_companion_port_gids: "
6181 				    "Remote HCA GUID: 0x%llX", h_guid);
6182 
6183 				IBCM_DUMP_NODE_REC(nr_resp);
6184 
6185 				kmem_free(res_p, len);
6186 			}
6187 
6188 			bzero(&nr_req, sizeof (sa_node_record_t));
6189 			if (h_guid != 0) {
6190 				nr_req.NodeInfo.NodeGUID = h_guid;
6191 				c_mask = SA_NODEINFO_COMPMASK_NODEGUID;
6192 			}
6193 
6194 			if (sysimg_guid != 0) {
6195 				nr_req.NodeInfo.SystemImageGUID = sysimg_guid;
6196 				c_mask |= SA_NODEINFO_COMPMASK_SYSIMAGEGUID;
6197 			}
6198 
6199 			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
6200 			    "SAA Call: CMASK= 0x%llX", c_mask);
6201 
6202 			retval = ibcm_get_node_rec(saa_handle, &nr_req, c_mask,
6203 			    &res_p, &len);
6204 			if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
6205 				IBTF_DPRINTF_L3(cmlog,
6206 				    "ibt_get_companion_port_gids: "
6207 				    "failed (%d) to get Node records", retval);
6208 				continue;
6209 			} else if (retval != IBT_SUCCESS) {
6210 				IBTF_DPRINTF_L2(cmlog,
6211 				    "ibt_get_companion_port_gids: Error: (%d) "
6212 				    "while getting Node records", retval);
6213 				ibcm_dec_hca_acc_cnt(hcap);
6214 				goto get_comp_pgid_exit;
6215 			}
6216 
6217 			num_rec = len/sizeof (sa_node_record_t);
6218 
6219 			/* We will be here, only if we found some NodeRec */
6220 			if (gid.gid_prefix && gid.gid_guid) {
6221 				nr_resp = (sa_node_record_t *)res_p;
6222 				for (l = 0; l < num_rec; l++, nr_resp++) {
6223 					pg = nr_resp->NodeInfo.PortGUID;
6224 					if (gid.gid_guid != pg)
6225 						count++;
6226 				}
6227 			} else {
6228 				count = num_rec;
6229 			}
6230 
6231 			if (count != 0) {
6232 				if (multi_sm_loop == 1) {
6233 					count += k;
6234 					t_gidp = kmem_zalloc(count *
6235 					    sizeof (ib_gid_t), KM_SLEEP);
6236 
6237 					if ((k != 0) && (gidp != NULL)) {
6238 						bcopy(gidp, t_gidp,
6239 						    k * sizeof (ib_gid_t));
6240 						kmem_free(gidp,
6241 						    k * sizeof (ib_gid_t));
6242 					}
6243 					gidp = t_gidp;
6244 				} else {
6245 					gidp = kmem_zalloc(count *
6246 					    sizeof (ib_gid_t), KM_SLEEP);
6247 				}
6248 				*num_gids_p = count;
6249 				*gids_p = gidp;
6250 
6251 				nr_resp = (sa_node_record_t *)res_p;
6252 				for (l = 0; l < num_rec; l++, nr_resp++) {
6253 					IBCM_DUMP_NODE_REC(nr_resp);
6254 
6255 					pg = nr_resp->NodeInfo.PortGUID;
6256 					IBTF_DPRINTF_L4(cmlog,
6257 					    "ibt_get_companion_port_gids: "
6258 					    "PortGID %llX", pg);
6259 
6260 					if (pg != gid.gid_guid) {
6261 						gidp[k].gid_prefix =
6262 						    sgid.gid_prefix;
6263 						gidp[k].gid_guid = pg;
6264 
6265 						IBTF_DPRINTF_L3(cmlog,
6266 						    "ibt_get_companion_pgids: "
6267 						    "GID[%d] = %llX:%llX", k,
6268 						    gidp[k].gid_prefix,
6269 						    gidp[k].gid_guid);
6270 
6271 						k++;
6272 						if (k == count)
6273 							break;
6274 					}
6275 				}
6276 				retval = IBT_SUCCESS;	/* done!. */
6277 				kmem_free(res_p, len);
6278 				ibcm_dec_hca_acc_cnt(hcap);
6279 				goto get_comp_pgid_exit;
6280 			} else {
6281 				IBTF_DPRINTF_L2(cmlog,
6282 				    "ibt_get_companion_port_gids: "
6283 				    "Companion PortGIDs not available");
6284 				retval = IBT_GIDS_NOT_FOUND;
6285 			}
6286 			/* Deallocate the memory for 'res_p'. */
6287 			kmem_free(res_p, len);
6288 
6289 			/*
6290 			 * If we are on MultiSM setup, then we need to lookout
6291 			 * from that subnet port too.
6292 			 */
6293 			if (multism) {
6294 				/* break if already searched both the subnet */
6295 				if (multi_sm_loop == 1)
6296 					break;
6297 
6298 				port = (j == 0) ? 1 : 0;
6299 				multi_sm_loop = 1;
6300 				goto get_comp_for_multism;
6301 			} else {
6302 				break;
6303 			}
6304 		}
6305 		ibcm_dec_hca_acc_cnt(hcap);
6306 
6307 		/*
6308 		 * We may be on dual HCA with dual SM configured system.  And
6309 		 * the input attr GID was visible from second HCA. So in order
6310 		 * to get the companion portgid we need to re-look from the
6311 		 * first HCA ports.
6312 		 */
6313 		if ((num_hcas > 1) && (i > 0) && (h_guid != 0) &&
6314 		    (multi_hca_loop != 1)) {
6315 			multi_hca_loop = 1;
6316 			goto get_comp_for_multihca;
6317 		}
6318 	}
6319 	if (*num_gids_p == 0)
6320 		retval = IBT_GIDS_NOT_FOUND;
6321 
6322 get_comp_pgid_exit:
6323 	if (guid_array)
6324 		ibt_free_hca_list(guid_array, num_hcas);
6325 
6326 	if ((retval != IBT_SUCCESS) && (*num_gids_p != 0)) {
6327 		retval = IBT_SUCCESS;
6328 	}
6329 
6330 	IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: done. Status %d, "
6331 	    "Found %d GIDs", retval, *num_gids_p);
6332 
6333 	return (retval);
6334 }
6335 
6336 /* RDMA IP CM Support routines */
6337 ibt_status_t
6338 ibt_get_src_ip(ib_gid_t gid, ib_pkey_t pkey, ibt_ip_addr_t *src_ip)
6339 {
6340 	ibcm_arp_ip_t		*ipp;
6341 	ibcm_arp_ibd_insts_t	ibds;
6342 	int			i;
6343 	boolean_t		found = B_FALSE;
6344 	ibt_status_t		retval = IBT_SUCCESS;
6345 
6346 	IBTF_DPRINTF_L4(cmlog, "ibt_get_src_ip(%llX:%llX, %X, %p)",
6347 	    gid.gid_prefix, gid.gid_guid, pkey, src_ip);
6348 
6349 	if (gid.gid_prefix == 0 || gid.gid_guid == 0) {
6350 		IBTF_DPRINTF_L3(cmlog, "ibt_get_src_ip: Invalid GID.");
6351 		return (IBT_INVALID_PARAM);
6352 	}
6353 
6354 	if (src_ip == NULL) {
6355 		IBTF_DPRINTF_L3(cmlog, "ibt_get_src_ip: ERROR: src_ip NULL");
6356 		return (IBT_INVALID_PARAM);
6357 	}
6358 
6359 	bzero(&ibds, sizeof (ibcm_arp_ibd_insts_t));
6360 	ibds.ibcm_arp_ibd_alloc = IBCM_ARP_IBD_INSTANCES;
6361 	ibds.ibcm_arp_ibd_cnt = 0;
6362 	ibds.ibcm_arp_ip = (ibcm_arp_ip_t *)kmem_zalloc(
6363 	    ibds.ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t), KM_SLEEP);
6364 
6365 	retval = ibcm_arp_get_ibds(&ibds, AF_UNSPEC);
6366 	if (retval != IBT_SUCCESS) {
6367 		IBTF_DPRINTF_L2(cmlog, "ibt_get_src_ip: ibcm_arp_get_ibds "
6368 		    "failed to get IBD Instances: ret 0x%x", retval);
6369 		goto get_src_ip_end;
6370 	}
6371 
6372 	for (i = 0, ipp = ibds.ibcm_arp_ip; i < ibds.ibcm_arp_ibd_cnt;
6373 	    i++, ipp++) {
6374 		if (ipp->ip_inet_family == AF_UNSPEC)
6375 			continue;
6376 		if (ipp->ip_port_gid.gid_prefix == gid.gid_prefix &&
6377 		    ipp->ip_port_gid.gid_guid == gid.gid_guid) {
6378 			if (pkey) {
6379 				if (ipp->ip_pkey == pkey) {
6380 					found = B_TRUE;
6381 					break;
6382 				} else
6383 					continue;
6384 			}
6385 			found = B_TRUE;
6386 			break;
6387 		}
6388 	}
6389 
6390 	if (found == B_FALSE) {
6391 		retval = IBT_SRC_IP_NOT_FOUND;
6392 	} else {
6393 		src_ip->family = ipp->ip_inet_family;
6394 		if (src_ip->family == AF_INET) {
6395 			bcopy(&ipp->ip_cm_sin.sin_addr, &src_ip->un.ip4addr,
6396 			    sizeof (in_addr_t));
6397 		} else if (src_ip->family == AF_INET6) {
6398 			bcopy(&ipp->ip_cm_sin6.sin6_addr, &src_ip->un.ip6addr,
6399 			    sizeof (in6_addr_t));
6400 		}
6401 		IBCM_PRINT_IP("ibt_get_src_ip", src_ip);
6402 	}
6403 
6404 get_src_ip_end:
6405 	if (ibds.ibcm_arp_ip)
6406 		kmem_free(ibds.ibcm_arp_ip,
6407 		    ibds.ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t));
6408 
6409 	return (retval);
6410 }
6411 
6412 ib_svc_id_t
6413 ibt_get_ip_sid(uint8_t protocol_num, in_port_t dst_port)
6414 {
6415 	ib_svc_id_t	sid;
6416 
6417 	IBTF_DPRINTF_L4(cmlog, "ibt_get_ip_sid(%X, %lX)", protocol_num,
6418 	    dst_port);
6419 
6420 	/*
6421 	 * If protocol_num is non-zero, then formulate the SID and return it.
6422 	 * If protocol_num is zero, then we need to assign a locally generated
6423 	 * IP SID with IB_SID_IPADDR_PREFIX.
6424 	 */
6425 	if (protocol_num) {
6426 		sid = IB_SID_IPADDR_PREFIX | protocol_num << 16 | dst_port;
6427 	} else {
6428 		sid = ibcm_alloc_ip_sid();
6429 	}
6430 
6431 	IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_sid: SID: 0x%016llX", sid);
6432 	return (sid);
6433 }
6434 
6435 ibt_status_t
6436 ibt_release_ip_sid(ib_svc_id_t ip_sid)
6437 {
6438 	IBTF_DPRINTF_L4(cmlog, "ibt_release_ip_sid(%llX)", ip_sid);
6439 
6440 	if (((ip_sid & IB_SID_IPADDR_PREFIX_MASK) != 0) ||
6441 	    (!(ip_sid & IB_SID_IPADDR_PREFIX))) {
6442 		IBTF_DPRINTF_L2(cmlog, "ibt_release_ip_sid(0x%016llX): ERROR: "
6443 		    "Called for Non-RDMA IP SID", ip_sid);
6444 		return (IBT_INVALID_PARAM);
6445 	}
6446 
6447 	/*
6448 	 * If protocol_num in ip_sid are all ZEROs, then this SID is allocated
6449 	 * by IBTF. If not, then the specified ip_sid is invalid.
6450 	 */
6451 	if (ip_sid & IB_SID_IPADDR_IPNUM_MASK) {
6452 		IBTF_DPRINTF_L2(cmlog, "ibt_release_ip_sid(0x%016llX): ERROR: "
6453 		    "Called for Non-IBTF assigned RDMA IP SID", ip_sid);
6454 		return (IBT_INVALID_PARAM);
6455 	}
6456 
6457 	ibcm_free_ip_sid(ip_sid);
6458 
6459 	return (IBT_SUCCESS);
6460 }
6461 
6462 
6463 uint8_t
6464 ibt_get_ip_protocol_num(ib_svc_id_t sid)
6465 {
6466 	return ((sid & IB_SID_IPADDR_IPNUM_MASK) >> 16);
6467 }
6468 
6469 in_port_t
6470 ibt_get_ip_dst_port(ib_svc_id_t sid)
6471 {
6472 	return (sid & IB_SID_IPADDR_PORTNUM_MASK);
6473 }
6474 
6475 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibt_ip_cm_info_t))
6476 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_ip_pvtdata_t))
6477 
6478 ibt_status_t
6479 ibt_format_ip_private_data(ibt_ip_cm_info_t *ip_cm_info,
6480     ibt_priv_data_len_t priv_data_len, void *priv_data_p)
6481 {
6482 	ibcm_ip_pvtdata_t	ip_data;
6483 
6484 	IBTF_DPRINTF_L4(cmlog, "ibt_format_ip_private_data(%p, %d, %p)",
6485 	    ip_cm_info, priv_data_len, priv_data_p);
6486 
6487 	if ((ip_cm_info == NULL) || (priv_data_p == NULL) ||
6488 	    (priv_data_len < IBT_IP_HDR_PRIV_DATA_SZ)) {
6489 		IBTF_DPRINTF_L2(cmlog, "ibt_format_ip_private_data: ERROR "
6490 		    "Invalid Inputs.");
6491 		return (IBT_INVALID_PARAM);
6492 	}
6493 
6494 	bzero(&ip_data, sizeof (ibcm_ip_pvtdata_t));
6495 	ip_data.ip_srcport = ip_cm_info->src_port; /* Source Port */
6496 
6497 	IBCM_PRINT_IP("format_ip_pvt: src", &ip_cm_info->src_addr);
6498 	IBCM_PRINT_IP("format_ip_pvt: dst", &ip_cm_info->dst_addr);
6499 	/* IPV = 0x4, if IP-Addr are IPv4 format, else 0x6 for IPv6 */
6500 	if (ip_cm_info->src_addr.family == AF_INET) {
6501 		ip_data.ip_ipv = IBT_CM_IP_IPV_V4;
6502 		ip_data.ip_srcv4 = ip_cm_info->src_addr.un.ip4addr;
6503 		ip_data.ip_dstv4 = ip_cm_info->dst_addr.un.ip4addr;
6504 	} else if (ip_cm_info->src_addr.family == AF_INET6) {
6505 		ip_data.ip_ipv = IBT_CM_IP_IPV_V6;
6506 		bcopy(&ip_cm_info->src_addr.un.ip6addr,
6507 		    &ip_data.ip_srcv6, sizeof (in6_addr_t));
6508 		bcopy(&ip_cm_info->dst_addr.un.ip6addr,
6509 		    &ip_data.ip_dstv6, sizeof (in6_addr_t));
6510 	} else {
6511 		IBTF_DPRINTF_L2(cmlog, "ibt_format_ip_private_data: ERROR "
6512 		    "IP Addr needs to be either AF_INET or AF_INET6 family.");
6513 		return (IBT_INVALID_PARAM);
6514 	}
6515 
6516 	ip_data.ip_MajV = IBT_CM_IP_MAJ_VER;
6517 	ip_data.ip_MinV = IBT_CM_IP_MIN_VER;
6518 
6519 	bcopy(&ip_data, priv_data_p, IBT_IP_HDR_PRIV_DATA_SZ);
6520 
6521 	return (IBT_SUCCESS);
6522 }
6523 
6524 
6525 ibt_status_t
6526 ibt_get_ip_data(ibt_priv_data_len_t priv_data_len, void *priv_data,
6527     ibt_ip_cm_info_t *ip_cm_infop)
6528 {
6529 	ibcm_ip_pvtdata_t	ip_data;
6530 
6531 	IBTF_DPRINTF_L4(cmlog, "ibt_get_ip_data(%d, %p, %p)",
6532 	    priv_data_len, priv_data, ip_cm_infop);
6533 
6534 	if ((ip_cm_infop == NULL) || (priv_data == NULL) ||
6535 	    (priv_data_len < IBT_IP_HDR_PRIV_DATA_SZ)) {
6536 		IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_data: ERROR Invalid Inputs");
6537 		return (IBT_INVALID_PARAM);
6538 	}
6539 
6540 	bcopy(priv_data, &ip_data, IBT_IP_HDR_PRIV_DATA_SZ);
6541 	ip_cm_infop->src_port = ip_data.ip_srcport; /* Source Port */
6542 
6543 	/* IPV = 0x4, if IP Address are IPv4 format, else 0x6 for IPv6 */
6544 	if (ip_data.ip_ipv == IBT_CM_IP_IPV_V4) {
6545 		/* Copy IPv4 Addr */
6546 		ip_cm_infop->src_addr.family = ip_cm_infop->dst_addr.family =
6547 		    AF_INET;
6548 		ip_cm_infop->src_addr.un.ip4addr = ip_data.ip_srcv4;
6549 		ip_cm_infop->dst_addr.un.ip4addr = ip_data.ip_dstv4;
6550 	} else if (ip_data.ip_ipv == IBT_CM_IP_IPV_V6) {
6551 		/* Copy IPv6 Addr */
6552 		ip_cm_infop->src_addr.family = ip_cm_infop->dst_addr.family =
6553 		    AF_INET6;
6554 		bcopy(&ip_data.ip_srcv6, &ip_cm_infop->src_addr.un.ip6addr,
6555 		    sizeof (in6_addr_t));
6556 		bcopy(&ip_data.ip_dstv6, &ip_cm_infop->dst_addr.un.ip6addr,
6557 		    sizeof (in6_addr_t));
6558 	} else {
6559 		IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_data: ERROR: IP Addr needs"
6560 		    " to be either AF_INET or AF_INET6 family.");
6561 		return (IBT_INVALID_PARAM);
6562 	}
6563 	IBCM_PRINT_IP("ibt_get_ip_data: src", &ip_cm_infop->src_addr);
6564 	IBCM_PRINT_IP("ibt_get_ip_data: dst", &ip_cm_infop->dst_addr);
6565 
6566 	return (IBT_SUCCESS);
6567 }
6568 
6569 
6570 /* Routines for warlock */
6571 
6572 /* ARGSUSED */
6573 static void
6574 ibcm_dummy_mcg_handler(void *arg, ibt_status_t retval, ibt_mcg_info_t *minfo)
6575 {
6576 	ibcm_join_mcg_tqarg_t	dummy_mcg;
6577 
6578 	dummy_mcg.func = ibcm_dummy_mcg_handler;
6579 
6580 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_mcg_handler: "
6581 	    "dummy_mcg.func %p", dummy_mcg.func);
6582 }
6583 
6584 
6585 /* ARGSUSED */
6586 static void
6587 ibcm_dummy_recycle_rc_handler(ibt_status_t retval, void *arg)
6588 {
6589 	ibcm_taskq_recycle_arg_t	dummy_rc_recycle;
6590 
6591 	dummy_rc_recycle.func = ibcm_dummy_recycle_rc_handler;
6592 
6593 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_recycle_rc_handler: "
6594 	    "dummy_rc_recycle.func %p", dummy_rc_recycle.func);
6595 }
6596 
6597 
6598 /* ARGSUSED */
6599 static ibt_cm_status_t
6600 ibcm_dummy_ud_handler(void *priv, ibt_cm_ud_event_t *event,
6601     ibt_cm_ud_return_args_t *ret_args,
6602     void *priv_data, ibt_priv_data_len_t len)
6603 {
6604 	/*
6605 	 * Let warlock see that ibcm_local_handler_s::actual_cm_handler
6606 	 * points to this routine.
6607 	 */
6608 	ibcm_local_handler_t	p;
6609 	ibcm_ud_state_data_t	dummy_ud;
6610 
6611 	p.actual_cm_handler = ibcm_dummy_ud_handler;
6612 	dummy_ud.ud_cm_handler = ibcm_dummy_ud_handler;
6613 
6614 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_ud_handler: p.actual_cm_handler %p"
6615 	    "dummy_ud.ud_cm_handler %p", p.actual_cm_handler,
6616 	    dummy_ud.ud_cm_handler);
6617 	/*
6618 	 * Call all routines that the client's callback routine could call.
6619 	 */
6620 
6621 	return (IBT_CM_ACCEPT);
6622 }
6623 
6624 /* ARGSUSED */
6625 static ibt_cm_status_t
6626 ibcm_dummy_rc_handler(void *priv, ibt_cm_event_t *event,
6627     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6628 {
6629 	ibcm_state_data_t	dummy_rc;
6630 
6631 	dummy_rc.cm_handler = ibcm_dummy_rc_handler;
6632 
6633 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_rc_handler: "
6634 	    "dummy_ud.ud_cm_handler %p", dummy_rc.cm_handler);
6635 	/*
6636 	 * Call all routines that the client's callback routine could call.
6637 	 */
6638 
6639 	return (IBT_CM_ACCEPT);
6640 }
6641