xref: /illumos-gate/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c (revision 03494a9880d80f834bec10a1e8f0a2f8f7c97bf4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * ibcm_impl.c
28  *
29  * contains internal functions of IB CM module.
30  *
31  * TBD:
32  * 1. HCA CATASTROPHIC/RECOVERED not handled yet
33  */
34 
35 #include <sys/ib/mgt/ibcm/ibcm_impl.h>
36 #include <sys/disp.h>
37 
38 
39 /* function prototypes */
40 static ibcm_status_t	ibcm_init(void);
41 static ibcm_status_t	ibcm_fini(void);
42 
43 /* Routines to initialize and destory CM global locks and CVs */
44 static void		ibcm_init_locks(void);
45 static void		ibcm_fini_locks(void);
46 
47 /* Routines that initialize/teardown CM's global hca structures */
48 static void		ibcm_init_hcas();
49 static ibcm_status_t	ibcm_fini_hcas();
50 
51 static void		ibcm_init_classportinfo();
52 static void		ibcm_stop_timeout_thread();
53 
54 /* Routines that handle HCA attach/detach asyncs */
55 static void		ibcm_hca_attach(ib_guid_t);
56 static ibcm_status_t	ibcm_hca_detach(ibcm_hca_info_t *);
57 
58 /* Routines that initialize the HCA's port related fields */
59 static ibt_status_t	ibcm_hca_init_port(ibcm_hca_info_t *hcap,
60 			    uint8_t port_index);
61 static ibcm_status_t	ibcm_hca_fini_port(ibcm_hca_info_t *hcap,
62 			    uint8_t port_index);
63 
64 static void ibcm_rc_flow_control_init(void);
65 static void ibcm_rc_flow_control_fini(void);
66 
67 /*
68  * Routines that check if hca's avl trees and sidr lists are free of any
69  * active client resources ie., RC or UD state structures in certain states
70  */
71 static ibcm_status_t	ibcm_check_avl_clean(ibcm_hca_info_t *hcap);
72 static ibcm_status_t	ibcm_check_sidr_clean(ibcm_hca_info_t *hcap);
73 
74 /* Add a new hca structure to CM's global hca list */
75 static ibcm_hca_info_t	*ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports);
76 
77 static void		ibcm_comm_est_handler(ibt_async_event_t *);
78 void			ibcm_async_handler(void *, ibt_hca_hdl_t,
79 			    ibt_async_code_t, ibt_async_event_t *);
80 
81 /* Global variables */
82 char			cmlog[] = "ibcm";	/* for debug log messages */
83 ibt_clnt_hdl_t		ibcm_ibt_handle;	/* IBT handle */
84 kmutex_t		ibcm_svc_info_lock;	/* list lock */
85 kcondvar_t		ibcm_svc_info_cv;	/* cv for deregister */
86 kmutex_t		ibcm_recv_mutex;
87 avl_tree_t		ibcm_svc_avl_tree;
88 taskq_t			*ibcm_taskq = NULL;
89 int			taskq_dispatch_fail_cnt;
90 
91 kmutex_t		ibcm_trace_mutex;	/* Trace mutex */
92 kmutex_t		ibcm_trace_print_mutex;	/* Trace print mutex */
93 int			ibcm_conn_max_trcnt = IBCM_MAX_CONN_TRCNT;
94 
95 int			ibcm_enable_trace = 2;	/* Trace level 4 by default */
96 int			ibcm_dtrace = 0; /* conditionally enable more dtrace */
97 
98 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_info_s::{svc_bind_list
99     svc_ref_cnt svc_to_delete}))
100 
101 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_bind_s::{sbind_link}))
102 
103 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_mutex, ibcm_conn_trace_s))
104 
105 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_conn_trace_s))
106 
107 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_print_mutex, ibcm_debug_buf))
108 
109 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_debug_buf))
110 
111 /*
112  * Initial state is INIT. All hca dr's return success immediately in this
113  * state, without adding or deleting any hca's to CM.
114  */
115 ibcm_finit_state_t	ibcm_finit_state = IBCM_FINIT_INIT;
116 
117 /* mutex and cv to manage hca's reference and resource count(s) */
118 kmutex_t		ibcm_global_hca_lock;
119 kcondvar_t		ibcm_global_hca_cv;
120 
121 /* mutex and cv to sa session open */
122 kmutex_t		ibcm_sa_open_lock;
123 kcondvar_t		ibcm_sa_open_cv;
124 int			ibcm_sa_timeout_delay = 1;		/* in ticks */
125 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sa_open_lock,
126     ibcm_port_info_s::{port_ibmf_saa_hdl port_saa_open_in_progress}))
127 
128 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_port_info_s::{port_ibmf_saa_hdl}))
129 
130 /* serialize sm notice callbacks */
131 kmutex_t		ibcm_sm_notice_serialize_lock;
132 
133 _NOTE(LOCK_ORDER(ibcm_sm_notice_serialize_lock ibcm_global_hca_lock))
134 
135 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock, ibcm_hca_info_s::{hca_state
136     hca_svc_cnt hca_acc_cnt hca_res_cnt hca_next}))
137 
138 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock,
139     ibcm_port_info_s::{port_ibmf_hdl}))
140 
141 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sm_notice_serialize_lock,
142     ibcm_port_info_s::{port_event_status}))
143 
144 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_hca_info_s::{hca_state}))
145 _NOTE(DATA_READABLE_WITHOUT_LOCK(
146     ibcm_hca_info_s::{hca_port_info.port_ibmf_hdl}))
147 
148 /* mutex for CM's qp list management */
149 kmutex_t		ibcm_qp_list_lock;
150 
151 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_port_info_s::{port_qplist}))
152 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
153 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
154 
155 kcondvar_t		ibcm_timeout_list_cv;
156 kcondvar_t		ibcm_timeout_thread_done_cv;
157 kt_did_t		ibcm_timeout_thread_did;
158 ibcm_state_data_t	*ibcm_timeout_list_hdr, *ibcm_timeout_list_tail;
159 ibcm_ud_state_data_t	*ibcm_ud_timeout_list_hdr, *ibcm_ud_timeout_list_tail;
160 kmutex_t		ibcm_timeout_list_lock;
161 uint8_t			ibcm_timeout_list_flags = 0;
162 pri_t			ibcm_timeout_thread_pri = MINCLSYSPRI;
163 
164 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
165     ibcm_state_data_s::timeout_next))
166 
167 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
168     ibcm_ud_state_data_s::ud_timeout_next))
169 
170 /*
171  * Flow control logic for open_rc_channel uses the following.
172  */
173 
174 struct ibcm_open_s {
175 	kmutex_t		mutex;
176 	kcondvar_t		cv;
177 	uint8_t			task_running;
178 	uint_t			queued;
179 	uint_t			exit_deferred;
180 	uint_t			in_progress;
181 	uint_t			in_progress_max;
182 	uint_t			sends;
183 	uint_t			sends_max;
184 	uint_t			sends_lowat;
185 	uint_t			sends_hiwat;
186 	ibcm_state_data_t	*tail;
187 	ibcm_state_data_t	head;
188 } ibcm_open;
189 
190 static void ibcm_open_task(void *);
191 
192 /*
193  * Flow control logic for SA access and close_rc_channel calls follows.
194  */
195 
196 int ibcm_close_simul_max	= 12;
197 int ibcm_lapr_simul_max		= 12;
198 int ibcm_saa_simul_max		= 8;
199 
200 typedef struct ibcm_flow1_s {
201 	struct ibcm_flow1_s	*link;
202 	kcondvar_t		cv;
203 	uint8_t			waiters;	/* 1 to IBCM_FLOW_SIMUL_MAX */
204 } ibcm_flow1_t;
205 
206 typedef struct ibcm_flow_s {
207 	ibcm_flow1_t		*list;
208 	uint_t			simul;	/* #requests currently outstanding */
209 	uint_t			simul_max;
210 	uint_t			waiters_per_chunk;
211 	uint_t			lowat;
212 	uint_t			lowat_default;
213 	/* statistics */
214 	uint_t			total;
215 } ibcm_flow_t;
216 
217 ibcm_flow_t ibcm_saa_flow;
218 ibcm_flow_t ibcm_close_flow;
219 ibcm_flow_t ibcm_lapr_flow;
220 
221 /* NONBLOCKING close requests are queued */
222 struct ibcm_close_s {
223 	kmutex_t		mutex;
224 	ibcm_state_data_t	*tail;
225 	ibcm_state_data_t	head;
226 } ibcm_close;
227 
228 static ibt_clnt_modinfo_t ibcm_ibt_modinfo = {	/* Client's modinfop */
229 	IBTI_V_CURR,
230 	IBT_CM,
231 	ibcm_async_handler,
232 	NULL,
233 	"IBCM"
234 };
235 
236 /* IBCM's list of HCAs registered with it */
237 static ibcm_hca_info_t	*ibcm_hca_listp = NULL;	/* CM's HCA list */
238 
239 /* Array of CM state call table functions */
240 ibcm_state_handler_t	ibcm_sm_funcs_tbl[] = {
241 	ibcm_process_req_msg,
242 	ibcm_process_mra_msg,
243 	ibcm_process_rej_msg,
244 	ibcm_process_rep_msg,
245 	ibcm_process_rtu_msg,
246 	ibcm_process_dreq_msg,
247 	ibcm_process_drep_msg,
248 	ibcm_process_sidr_req_msg,
249 	ibcm_process_sidr_rep_msg,
250 	ibcm_process_lap_msg,
251 	ibcm_process_apr_msg
252 };
253 
254 /* the following globals are CM tunables */
255 ibt_rnr_nak_time_t	ibcm_default_rnr_nak_time = IBT_RNR_NAK_655ms;
256 
257 uint32_t	ibcm_max_retries = IBCM_MAX_RETRIES;
258 clock_t		ibcm_local_processing_time = IBCM_LOCAL_RESPONSE_TIME;
259 clock_t		ibcm_remote_response_time = IBCM_REMOTE_RESPONSE_TIME;
260 ib_time_t	ibcm_max_sidr_rep_proctime = IBCM_MAX_SIDR_PROCESS_TIME;
261 ib_time_t	ibcm_max_sidr_pktlife_time = IBCM_MAX_SIDR_PKT_LIFE_TIME;
262 
263 ib_time_t	ibcm_max_sidr_rep_store_time = 18;
264 uint32_t	ibcm_wait_for_acc_cnt_timeout = 500000;	/* 500 ms */
265 uint32_t	ibcm_wait_for_res_cnt_timeout = 500000;	/* 500 ms */
266 
267 ib_time_t	ibcm_max_ib_pkt_lt = IBCM_MAX_IB_PKT_LT;
268 ib_time_t	ibcm_max_ib_mad_pkt_lt = IBCM_MAX_IB_MAD_PKT_LT;
269 
270 /*
271  * This delay accounts for time involved in various activities as follows :
272  *
273  * IBMF delays for posting the MADs in non-blocking mode
274  * IBMF delays for receiving the MADs and delivering to CM
275  * CM delays in processing the MADs before invoking client handlers,
276  * Any other delays associated with HCA driver in processing the MADs and
277  * 	other subsystems that CM may invoke (ex : SA, HCA driver)
278  */
279 uint32_t	ibcm_sw_delay	= 1000;	/* 1000us / 1ms */
280 uint32_t	ibcm_max_sa_retries = IBCM_MAX_SA_RETRIES + 1;
281 
282 /*	approx boot time */
283 uint32_t	ibcm_adj_btime = 4;	/* 4 seconds */
284 
285 /*
286  * The information in ibcm_clpinfo is kept in wireformat and is setup at
287  * init time, and used read-only after that
288  */
289 ibcm_classportinfo_msg_t	ibcm_clpinfo;
290 
291 char	*event_str[] = {
292 	"NEVER SEE THIS             ",
293 	"SESSION_ID                 ",
294 	"CHAN_HDL                   ",
295 	"LOCAL_COMID/HCA/PORT       ",
296 	"LOCAL_QPN                  ",
297 	"REMOTE_COMID/HCA           ",
298 	"REMOTE_QPN                 ",
299 	"BASE_TIME                  ",
300 	"INCOMING_REQ               ",
301 	"INCOMING_REP               ",
302 	"INCOMING_RTU               ",
303 	"INCOMING_COMEST            ",
304 	"INCOMING_MRA               ",
305 	"INCOMING_REJ               ",
306 	"INCOMING_LAP               ",
307 	"INCOMING_APR               ",
308 	"INCOMING_DREQ              ",
309 	"INCOMING_DREP              ",
310 	"OUTGOING_REQ               ",
311 	"OUTGOING_REP               ",
312 	"OUTGOING_RTU               ",
313 	"OUTGOING_LAP               ",
314 	"OUTGOING_APR               ",
315 	"OUTGOING_MRA               ",
316 	"OUTGOING_REJ               ",
317 	"OUTGOING_DREQ              ",
318 	"OUTGOING_DREP              ",
319 	"REQ_POST_COMPLETE          ",
320 	"REP_POST_COMPLETE          ",
321 	"RTU_POST_COMPLETE          ",
322 	"MRA_POST_COMPLETE          ",
323 	"REJ_POST_COMPLETE          ",
324 	"LAP_POST_COMPLETE          ",
325 	"APR_POST_COMPLETE          ",
326 	"DREQ_POST_COMPLETE         ",
327 	"DREP_POST_COMPLETE         ",
328 	"TIMEOUT_REP                ",
329 	"CALLED_REQ_RCVD_EVENT      ",
330 	"RET_REQ_RCVD_EVENT         ",
331 	"CALLED_REP_RCVD_EVENT      ",
332 	"RET_REP_RCVD_EVENT         ",
333 	"CALLED_CONN_EST_EVENT      ",
334 	"RET_CONN_EST_EVENT         ",
335 	"CALLED_CONN_FAIL_EVENT     ",
336 	"RET_CONN_FAIL_EVENT        ",
337 	"CALLED_CONN_CLOSE_EVENT    ",
338 	"RET_CONN_CLOSE_EVENT       ",
339 	"INIT_INIT                  ",
340 	"INIT_INIT_FAIL             ",
341 	"INIT_RTR                   ",
342 	"INIT_RTR_FAIL              ",
343 	"RTR_RTS                    ",
344 	"RTR_RTS_FAIL               ",
345 	"RTS_RTS                    ",
346 	"RTS_RTS_FAIL               ",
347 	"TO_ERROR                   ",
348 	"ERROR_FAIL                 ",
349 	"SET_ALT                    ",
350 	"SET_ALT_FAIL               ",
351 	"STALE_DETECT               ",
352 	"OUTGOING_REQ_RETRY         ",
353 	"OUTGOING_REP_RETRY         ",
354 	"OUTGOING_LAP_RETRY         ",
355 	"OUTGOING_MRA_RETRY         ",
356 	"OUTGOING_DREQ_RETRY        ",
357 	"NEVER SEE THIS             "
358 };
359 
360 char	ibcm_debug_buf[IBCM_DEBUG_BUF_SIZE];
361 
362 _NOTE(SCHEME_PROTECTS_DATA("used in a localized function consistently",
363     ibcm_debug_buf))
364 _NOTE(READ_ONLY_DATA(ibcm_taskq))
365 
366 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_flags))
367 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_hdr))
368 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_ud_timeout_list_hdr))
369 
370 #ifdef DEBUG
371 int		ibcm_test_mode = 0;	/* set to 1, if running tests */
372 #endif
373 
374 
375 /* Module Driver Info */
376 static struct modlmisc ibcm_modlmisc = {
377 	&mod_miscops,
378 	"IB Communication Manager"
379 };
380 
381 /* Module Linkage */
382 static struct modlinkage ibcm_modlinkage = {
383 	MODREV_1,
384 	&ibcm_modlmisc,
385 	NULL
386 };
387 
388 
389 int
390 _init(void)
391 {
392 	int		rval;
393 	ibcm_status_t	status;
394 
395 	status = ibcm_init();
396 	if (status != IBCM_SUCCESS) {
397 		IBTF_DPRINTF_L2(cmlog, "_init: ibcm failed %d", status);
398 		return (EINVAL);
399 	}
400 
401 	rval = mod_install(&ibcm_modlinkage);
402 	if (rval != 0) {
403 		IBTF_DPRINTF_L2(cmlog, "_init: ibcm mod_install failed %d",
404 		    rval);
405 		(void) ibcm_fini();
406 	}
407 
408 	IBTF_DPRINTF_L5(cmlog, "_init: ibcm successful");
409 	return (rval);
410 
411 }
412 
413 
414 int
415 _info(struct modinfo *modinfop)
416 {
417 	return (mod_info(&ibcm_modlinkage, modinfop));
418 }
419 
420 
421 int
422 _fini(void)
423 {
424 	int status;
425 
426 	if (ibcm_fini() != IBCM_SUCCESS)
427 		return (EBUSY);
428 
429 	if ((status = mod_remove(&ibcm_modlinkage)) != 0) {
430 		IBTF_DPRINTF_L2(cmlog, "_fini: ibcm mod_remove failed %d",
431 		    status);
432 		return (status);
433 	}
434 
435 	IBTF_DPRINTF_L5(cmlog, "_fini: ibcm successful");
436 
437 	return (status);
438 }
439 
440 /* Initializes all global mutex and CV in cm module */
441 static void
442 ibcm_init_locks()
443 {
444 
445 	/* Verify CM MAD sizes */
446 #ifdef DEBUG
447 
448 	if (ibcm_test_mode > 1) {
449 
450 		IBTF_DPRINTF_L1(cmlog, "REQ MAD SIZE %d",
451 		    sizeof (ibcm_req_msg_t));
452 		IBTF_DPRINTF_L1(cmlog, "REP MAD SIZE %d",
453 		    sizeof (ibcm_rep_msg_t));
454 		IBTF_DPRINTF_L1(cmlog, "RTU MAD SIZE %d",
455 		    sizeof (ibcm_rtu_msg_t));
456 		IBTF_DPRINTF_L1(cmlog, "MRA MAD SIZE %d",
457 		    sizeof (ibcm_mra_msg_t));
458 		IBTF_DPRINTF_L1(cmlog, "REJ MAD SIZE %d",
459 		    sizeof (ibcm_rej_msg_t));
460 		IBTF_DPRINTF_L1(cmlog, "LAP MAD SIZE %d",
461 		    sizeof (ibcm_lap_msg_t));
462 		IBTF_DPRINTF_L1(cmlog, "APR MAD SIZE %d",
463 		    sizeof (ibcm_apr_msg_t));
464 		IBTF_DPRINTF_L1(cmlog, "DREQ MAD SIZE %d",
465 		    sizeof (ibcm_dreq_msg_t));
466 		IBTF_DPRINTF_L1(cmlog, "DREP MAD SIZE %d",
467 		    sizeof (ibcm_drep_msg_t));
468 		IBTF_DPRINTF_L1(cmlog, "SIDR REQ MAD SIZE %d",
469 		    sizeof (ibcm_sidr_req_msg_t));
470 		IBTF_DPRINTF_L1(cmlog, "SIDR REP MAD SIZE %d",
471 		    sizeof (ibcm_sidr_rep_msg_t));
472 	}
473 
474 #endif
475 
476 	/* Create all global locks within cm module */
477 	mutex_init(&ibcm_svc_info_lock, NULL, MUTEX_DEFAULT, NULL);
478 	mutex_init(&ibcm_timeout_list_lock, NULL, MUTEX_DEFAULT, NULL);
479 	mutex_init(&ibcm_global_hca_lock, NULL, MUTEX_DEFAULT, NULL);
480 	mutex_init(&ibcm_sa_open_lock, NULL, MUTEX_DEFAULT, NULL);
481 	mutex_init(&ibcm_recv_mutex, NULL, MUTEX_DEFAULT, NULL);
482 	mutex_init(&ibcm_sm_notice_serialize_lock, NULL, MUTEX_DEFAULT, NULL);
483 	mutex_init(&ibcm_qp_list_lock, NULL, MUTEX_DEFAULT, NULL);
484 	mutex_init(&ibcm_trace_mutex, NULL, MUTEX_DEFAULT, NULL);
485 	mutex_init(&ibcm_trace_print_mutex, NULL, MUTEX_DEFAULT, NULL);
486 	cv_init(&ibcm_svc_info_cv, NULL, CV_DRIVER, NULL);
487 	cv_init(&ibcm_timeout_list_cv, NULL, CV_DRIVER, NULL);
488 	cv_init(&ibcm_timeout_thread_done_cv, NULL, CV_DRIVER, NULL);
489 	cv_init(&ibcm_global_hca_cv, NULL, CV_DRIVER, NULL);
490 	cv_init(&ibcm_sa_open_cv, NULL, CV_DRIVER, NULL);
491 	avl_create(&ibcm_svc_avl_tree, ibcm_svc_compare,
492 	    sizeof (ibcm_svc_info_t),
493 	    offsetof(struct ibcm_svc_info_s, svc_link));
494 
495 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_locks: done");
496 }
497 
498 /* Destroys all global mutex and CV in cm module */
499 static void
500 ibcm_fini_locks()
501 {
502 	/* Destroy all global locks within cm module */
503 	mutex_destroy(&ibcm_svc_info_lock);
504 	mutex_destroy(&ibcm_timeout_list_lock);
505 	mutex_destroy(&ibcm_global_hca_lock);
506 	mutex_destroy(&ibcm_sa_open_lock);
507 	mutex_destroy(&ibcm_recv_mutex);
508 	mutex_destroy(&ibcm_sm_notice_serialize_lock);
509 	mutex_destroy(&ibcm_qp_list_lock);
510 	mutex_destroy(&ibcm_trace_mutex);
511 	mutex_destroy(&ibcm_trace_print_mutex);
512 	cv_destroy(&ibcm_svc_info_cv);
513 	cv_destroy(&ibcm_timeout_list_cv);
514 	cv_destroy(&ibcm_timeout_thread_done_cv);
515 	cv_destroy(&ibcm_global_hca_cv);
516 	cv_destroy(&ibcm_sa_open_cv);
517 	avl_destroy(&ibcm_svc_avl_tree);
518 
519 	IBTF_DPRINTF_L5(cmlog, "ibcm_fini_locks: done");
520 }
521 
522 
523 /* Initialize CM's classport info */
524 static void
525 ibcm_init_classportinfo()
526 {
527 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
528 
529 	ibcm_clpinfo.BaseVersion = IBCM_MAD_BASE_VERSION;
530 	ibcm_clpinfo.ClassVersion = IBCM_MAD_CLASS_VERSION;
531 
532 	/* For now, CM supports same capabilities at all ports */
533 	ibcm_clpinfo.CapabilityMask =
534 	    h2b16(IBCM_CPINFO_CAP_RC | IBCM_CPINFO_CAP_SIDR);
535 
536 	/* Bits 0-7 are all 0 for Communication Mgmt Class */
537 
538 	/* For now, CM has the same respvalue at all ports */
539 	ibcm_clpinfo.RespTimeValue_plus =
540 	    h2b32(ibt_usec2ib(ibcm_local_processing_time) & 0x1f);
541 
542 	/* For now, redirect fields are set to 0 */
543 	/* Trap fields are not applicable to CM, hence set to 0 */
544 
545 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
546 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_classportinfo: done");
547 }
548 
549 /*
550  * ibcm_init():
551  * 	- call ibt_attach()
552  * 	- create AVL trees
553  *	- Attach HCA handlers that are already present before
554  *	CM got loaded.
555  *
556  * Arguments:	NONE
557  *
558  * Return values:
559  *	IBCM_SUCCESS - success
560  */
561 static ibcm_status_t
562 ibcm_init(void)
563 {
564 	ibt_status_t	status;
565 	kthread_t	*t;
566 
567 	IBTF_DPRINTF_L3(cmlog, "ibcm_init:");
568 
569 	ibcm_init_classportinfo();
570 
571 	if (ibcm_init_ids() != IBCM_SUCCESS) {
572 		IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
573 		    "fatal error: vmem_create() failed");
574 		return (IBCM_FAILURE);
575 	}
576 	ibcm_init_locks();
577 
578 	if (ibcm_ar_init() != IBCM_SUCCESS) {
579 		IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
580 		    "fatal error: ibcm_ar_init() failed");
581 		ibcm_fini_ids();
582 		ibcm_fini_locks();
583 		return (IBCM_FAILURE);
584 	}
585 	ibcm_rc_flow_control_init();
586 
587 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_taskq))
588 	ibcm_taskq = system_taskq;
589 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_taskq))
590 
591 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
592 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
593 
594 	/* Start the timeout list processing thread */
595 	ibcm_timeout_list_flags = 0;
596 	t = thread_create(NULL, 0, ibcm_process_tlist, 0, 0, &p0, TS_RUN,
597 	    ibcm_timeout_thread_pri);
598 	ibcm_timeout_thread_did = t->t_did;
599 
600 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
601 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
602 
603 	/*
604 	 * NOTE : if ibt_attach is done after ibcm_init_hcas, then some
605 	 * HCA DR events may be lost. CM could call re-init hca list
606 	 * again, but it is more complicated. Some HCA's DR's lost may
607 	 * be HCA detach, which makes hca list re-syncing and locking more
608 	 * complex
609 	 */
610 	status = ibt_attach(&ibcm_ibt_modinfo, NULL, NULL, &ibcm_ibt_handle);
611 	if (status != IBT_SUCCESS) {
612 		IBTF_DPRINTF_L2(cmlog, "ibcm_init(): ibt_attach failed %d",
613 		    status);
614 		(void) ibcm_ar_fini();
615 		ibcm_stop_timeout_thread();
616 		ibcm_fini_ids();
617 		ibcm_fini_locks();
618 		ibcm_rc_flow_control_fini();
619 		return (IBCM_FAILURE);
620 	}
621 
622 	/* Block all HCA attach/detach asyncs */
623 	mutex_enter(&ibcm_global_hca_lock);
624 
625 	ibcm_init_hcas();
626 	ibcm_finit_state = IBCM_FINIT_IDLE;
627 
628 	ibcm_path_cache_init();
629 
630 	/* Unblock any waiting HCA DR asyncs in CM */
631 	mutex_exit(&ibcm_global_hca_lock);
632 
633 	IBTF_DPRINTF_L4(cmlog, "ibcm_init: done");
634 	return (IBCM_SUCCESS);
635 }
636 
637 /* Allocates and initializes the "per hca" global data in CM */
638 static void
639 ibcm_init_hcas()
640 {
641 	uint_t	num_hcas = 0;
642 	ib_guid_t *guid_array;
643 	int i;
644 
645 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas:");
646 
647 	/* Get the number of HCAs */
648 	num_hcas = ibt_get_hca_list(&guid_array);
649 	IBTF_DPRINTF_L4(cmlog, "ibcm_init_hcas: ibt_get_hca_list() "
650 	    "returned %d hcas", num_hcas);
651 
652 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
653 
654 	for (i = 0; i < num_hcas; i++)
655 		ibcm_hca_attach(guid_array[i]);
656 
657 	if (num_hcas)
658 		ibt_free_hca_list(guid_array, num_hcas);
659 
660 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas: done");
661 }
662 
663 
664 /*
665  * ibcm_fini():
666  * 	- Deregister w/ ibt
667  * 	- Cleanup IBCM HCA listp
668  * 	- Destroy mutexes
669  *
670  * Arguments:	NONE
671  *
672  * Return values:
673  *	IBCM_SUCCESS - success
674  */
675 static ibcm_status_t
676 ibcm_fini(void)
677 {
678 	ibt_status_t	status;
679 
680 	IBTF_DPRINTF_L3(cmlog, "ibcm_fini:");
681 
682 	/*
683 	 * CM assumes that the all general clients got rid of all the
684 	 * established connections and service registrations, completed all
685 	 * pending SIDR operations before a call to ibcm_fini()
686 	 */
687 
688 	if (ibcm_ar_fini() != IBCM_SUCCESS) {
689 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: ibcm_ar_fini failed");
690 		return (IBCM_FAILURE);
691 	}
692 
693 	/* cleanup the svcinfo list */
694 	mutex_enter(&ibcm_svc_info_lock);
695 	if (avl_first(&ibcm_svc_avl_tree) != NULL) {
696 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
697 		    "ibcm_svc_avl_tree is not empty");
698 		mutex_exit(&ibcm_svc_info_lock);
699 		return (IBCM_FAILURE);
700 	}
701 	mutex_exit(&ibcm_svc_info_lock);
702 
703 	/* disables any new hca attach/detaches */
704 	mutex_enter(&ibcm_global_hca_lock);
705 
706 	ibcm_finit_state = IBCM_FINIT_BUSY;
707 
708 	if (ibcm_fini_hcas() != IBCM_SUCCESS) {
709 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
710 		    "some hca's still have client resources");
711 
712 		/* First, re-initialize the hcas */
713 		ibcm_init_hcas();
714 		/* and then enable the HCA asyncs */
715 		ibcm_finit_state = IBCM_FINIT_IDLE;
716 		mutex_exit(&ibcm_global_hca_lock);
717 		if (ibcm_ar_init() != IBCM_SUCCESS) {
718 			IBTF_DPRINTF_L1(cmlog, "ibcm_fini:ibcm_ar_init failed");
719 		}
720 		return (IBCM_FAILURE);
721 	}
722 
723 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
724 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
725 
726 	ASSERT(ibcm_timeout_list_hdr == NULL);
727 	ASSERT(ibcm_ud_timeout_list_hdr == NULL);
728 
729 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
730 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
731 
732 	/* Release any pending asyncs on ibcm_global_hca_lock */
733 	ibcm_finit_state = IBCM_FINIT_SUCCESS;
734 	mutex_exit(&ibcm_global_hca_lock);
735 
736 	ibcm_stop_timeout_thread();
737 
738 	/*
739 	 * Detach from IBTL. Waits until all pending asyncs are complete.
740 	 * Above cv_broadcast wakes up any waiting hca attach/detach asyncs
741 	 */
742 	status = ibt_detach(ibcm_ibt_handle);
743 
744 	/* if detach fails, CM didn't free up some resources, so assert */
745 	if (status != IBT_SUCCESS)
746 		IBTF_DPRINTF_L1(cmlog, "ibcm_fini: ibt_detach failed %d",
747 		    status);
748 
749 	ibcm_rc_flow_control_fini();
750 
751 	ibcm_path_cache_fini();
752 
753 	ibcm_fini_ids();
754 	ibcm_fini_locks();
755 	IBTF_DPRINTF_L3(cmlog, "ibcm_fini: done");
756 	return (IBCM_SUCCESS);
757 }
758 
759 /* This routine exit's the ibcm timeout thread  */
760 static void
761 ibcm_stop_timeout_thread()
762 {
763 	mutex_enter(&ibcm_timeout_list_lock);
764 
765 	/* Stop the timeout list processing thread */
766 	ibcm_timeout_list_flags =
767 	    ibcm_timeout_list_flags | IBCM_TIMEOUT_THREAD_EXIT;
768 
769 	/* Wake up, if the timeout thread is on a cv_wait */
770 	cv_signal(&ibcm_timeout_list_cv);
771 
772 	mutex_exit(&ibcm_timeout_list_lock);
773 	thread_join(ibcm_timeout_thread_did);
774 
775 	IBTF_DPRINTF_L5(cmlog, "ibcm_stop_timeout_thread: done");
776 }
777 
778 
779 /* Attempts to release all the hca's associated with CM */
780 static ibcm_status_t
781 ibcm_fini_hcas()
782 {
783 	ibcm_hca_info_t *hcap, *next;
784 
785 	IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas:");
786 
787 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
788 
789 	hcap = ibcm_hca_listp;
790 	while (hcap != NULL) {
791 		next = hcap->hca_next;
792 		if (ibcm_hca_detach(hcap) != IBCM_SUCCESS) {
793 			ibcm_hca_listp = hcap;
794 			return (IBCM_FAILURE);
795 		}
796 		hcap = next;
797 	}
798 
799 	IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas: SUCCEEDED");
800 	return (IBCM_SUCCESS);
801 }
802 
803 
804 /*
805  * ibcm_hca_attach():
806  *	Called as an asynchronous event to notify CM of an attach of HCA.
807  *	Here ibcm_hca_info_t is initialized and all fields are
808  *	filled in along with SA Access handles and IBMA handles.
809  *	Also called from ibcm_init to initialize ibcm_hca_info_t's for each
810  *	hca's
811  *
812  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
813  *	hca_guid	- HCA's guid
814  *
815  * Return values: NONE
816  */
817 static void
818 ibcm_hca_attach(ib_guid_t hcaguid)
819 {
820 	int			i;
821 	ibt_status_t		status;
822 	uint_t			nports = 0;
823 	ibcm_hca_info_t		*hcap;
824 	ibt_hca_attr_t		hca_attrs;
825 
826 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: guid = 0x%llX", hcaguid);
827 
828 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
829 
830 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*hcap))
831 
832 	status = ibt_query_hca_byguid(hcaguid, &hca_attrs);
833 	if (status != IBT_SUCCESS) {
834 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
835 		    "ibt_query_hca_byguid failed = %d", status);
836 		return;
837 	}
838 	nports = hca_attrs.hca_nports;
839 
840 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_attach: num ports = %x", nports);
841 
842 	if ((hcap = ibcm_add_hca_entry(hcaguid, nports)) == NULL)
843 		return;
844 
845 	hcap->hca_guid = hcaguid;	/* Set GUID */
846 	hcap->hca_num_ports = nports;	/* Set number of ports */
847 
848 	if (ibcm_init_hca_ids(hcap) != IBCM_SUCCESS) {
849 		ibcm_delete_hca_entry(hcap);
850 		return;
851 	}
852 
853 	/* Store the static hca attribute data */
854 	hcap->hca_caps = hca_attrs.hca_flags;
855 	hcap->hca_vendor_id = hca_attrs.hca_vendor_id;
856 	hcap->hca_device_id = hca_attrs.hca_device_id;
857 	hcap->hca_ack_delay = hca_attrs.hca_local_ack_delay;
858 	hcap->hca_max_rdma_in_qp = hca_attrs.hca_max_rdma_in_qp;
859 	hcap->hca_max_rdma_out_qp = hca_attrs.hca_max_rdma_out_qp;
860 
861 	/* loop thru nports and initialize IBMF handles */
862 	for (i = 0; i < hcap->hca_num_ports; i++) {
863 		status = ibt_get_port_state_byguid(hcaguid, i + 1, NULL, NULL);
864 		if (status != IBT_SUCCESS) {
865 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
866 			    "port_num %d state DOWN", i + 1);
867 		}
868 
869 		hcap->hca_port_info[i].port_hcap = hcap;
870 		hcap->hca_port_info[i].port_num = i+1;
871 
872 		if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
873 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
874 			    "ibcm_hca_init_port failed %d port_num %d",
875 			    status, i+1);
876 	}
877 
878 	/* create the "active" CM AVL tree */
879 	avl_create(&hcap->hca_active_tree, ibcm_active_node_compare,
880 	    sizeof (ibcm_state_data_t),
881 	    offsetof(struct ibcm_state_data_s, avl_active_link));
882 
883 	/* create the "passive" CM AVL tree */
884 	avl_create(&hcap->hca_passive_tree, ibcm_passive_node_compare,
885 	    sizeof (ibcm_state_data_t),
886 	    offsetof(struct ibcm_state_data_s, avl_passive_link));
887 
888 	/* create the "passive comid" CM AVL tree */
889 	avl_create(&hcap->hca_passive_comid_tree,
890 	    ibcm_passive_comid_node_compare,
891 	    sizeof (ibcm_state_data_t),
892 	    offsetof(struct ibcm_state_data_s, avl_passive_comid_link));
893 
894 	/*
895 	 * Mark the state of the HCA to "attach" only at the end
896 	 * Now CM starts accepting incoming MADs and client API calls
897 	 */
898 	hcap->hca_state = IBCM_HCA_ACTIVE;
899 
900 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*hcap))
901 
902 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: ATTACH Done");
903 }
904 
905 /*
906  * ibcm_hca_detach():
907  *	Called as an asynchronous event to notify CM of a detach of HCA.
908  *	Here ibcm_hca_info_t is freed up and all fields that
909  *	were initialized earlier are cleaned up
910  *
911  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
912  *	hca_guid    - HCA's guid
913  *
914  * Return values:
915  *	IBCM_SUCCESS	- able to detach HCA
916  *	IBCM_FAILURE	- failed to detach HCA
917  */
918 static ibcm_status_t
919 ibcm_hca_detach(ibcm_hca_info_t *hcap)
920 {
921 	int		port_index, i;
922 	ibcm_status_t	status = IBCM_SUCCESS;
923 	clock_t		absolute_time;
924 
925 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: hcap = 0x%p guid = 0x%llX",
926 	    hcap, hcap->hca_guid);
927 
928 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
929 
930 	/*
931 	 * Declare hca is going away to all CM clients. Wait until the
932 	 * access count becomes zero.
933 	 */
934 	hcap->hca_state = IBCM_HCA_NOT_ACTIVE;
935 
936 	/* wait on response CV to 500mS */
937 	absolute_time = ddi_get_lbolt() +
938 	    drv_usectohz(ibcm_wait_for_acc_cnt_timeout);
939 
940 	while (hcap->hca_acc_cnt > 0)
941 		if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock,
942 		    absolute_time) == -1)
943 			break;
944 
945 	if (hcap->hca_acc_cnt != 0) {
946 		/* We got a timeout */
947 #ifdef DEBUG
948 		if (ibcm_test_mode > 0)
949 			IBTF_DPRINTF_L1(cmlog, "ibcm_hca_detach: Unexpected "
950 			    "abort due to timeout on acc_cnt %u",
951 			    hcap->hca_acc_cnt);
952 		else
953 #endif
954 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
955 			    " to timeout on acc_cnt %u", hcap->hca_acc_cnt);
956 		hcap->hca_state = IBCM_HCA_ACTIVE;
957 		return (IBCM_FAILURE);
958 	}
959 
960 	/*
961 	 * First make sure, there are no active users of ibma handles,
962 	 * and then de-register handles.
963 	 */
964 
965 	/* make sure that there are no "Service"s registered w/ this HCA. */
966 	if (hcap->hca_svc_cnt != 0) {
967 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
968 		    "Active services still there %d", hcap->hca_svc_cnt);
969 		hcap->hca_state = IBCM_HCA_ACTIVE;
970 		return (IBCM_FAILURE);
971 	}
972 
973 	if (ibcm_check_sidr_clean(hcap) != IBCM_SUCCESS) {
974 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach:"
975 		    "There are active SIDR operations");
976 		hcap->hca_state = IBCM_HCA_ACTIVE;
977 		return (IBCM_FAILURE);
978 	}
979 
980 	if (ibcm_check_avl_clean(hcap) != IBCM_SUCCESS) {
981 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
982 		    "There are active RC connections");
983 		hcap->hca_state = IBCM_HCA_ACTIVE;
984 		return (IBCM_FAILURE);
985 	}
986 
987 	/*
988 	 * Now, wait until all rc and sidr stateps go away
989 	 * All these stateps must be short lived ones, waiting to be cleaned
990 	 * up after some timeout value, based on the current state.
991 	 */
992 	IBTF_DPRINTF_L5(cmlog, "ibcm_hca_detach:hca_guid = 0x%llX res_cnt = %d",
993 	    hcap->hca_guid, hcap->hca_res_cnt);
994 
995 	/* wait on response CV to 500mS */
996 	absolute_time = ddi_get_lbolt() +
997 	    drv_usectohz(ibcm_wait_for_res_cnt_timeout);
998 
999 	while (hcap->hca_res_cnt > 0)
1000 		if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock,
1001 		    absolute_time) == -1)
1002 			break;
1003 
1004 	if (hcap->hca_res_cnt != 0) {
1005 		/* We got a timeout waiting for hca_res_cnt to become 0 */
1006 #ifdef DEBUG
1007 		if (ibcm_test_mode > 0)
1008 			IBTF_DPRINTF_L1(cmlog, "ibcm_hca_detach: Unexpected "
1009 			    "abort due to timeout on res_cnt %d",
1010 			    hcap->hca_res_cnt);
1011 		else
1012 #endif
1013 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
1014 			    " to timeout on res_cnt %d", hcap->hca_res_cnt);
1015 		hcap->hca_state = IBCM_HCA_ACTIVE;
1016 		return (IBCM_FAILURE);
1017 	}
1018 
1019 	/* Re-assert the while loop step above */
1020 	ASSERT(hcap->hca_sidr_list == NULL);
1021 	avl_destroy(&hcap->hca_active_tree);
1022 	avl_destroy(&hcap->hca_passive_tree);
1023 	avl_destroy(&hcap->hca_passive_comid_tree);
1024 
1025 	/*
1026 	 * Unregister all ports from IBMA
1027 	 * If there is a failure, re-initialize any free'd ibma handles. This
1028 	 * is required to receive the incoming mads
1029 	 */
1030 	status = IBCM_SUCCESS;
1031 	for (port_index = 0; port_index < hcap->hca_num_ports; port_index++) {
1032 		if ((status = ibcm_hca_fini_port(hcap, port_index)) !=
1033 		    IBCM_SUCCESS) {
1034 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
1035 			    "Failed to free IBMA Handle for port_num %d",
1036 			    port_index + 1);
1037 			break;
1038 		}
1039 	}
1040 
1041 	/* If detach fails, re-initialize ibma handles for incoming mads */
1042 	if (status != IBCM_SUCCESS)  {
1043 		for (i = 0; i < port_index; i++) {
1044 			if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
1045 				IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
1046 				    "Failed to re-allocate IBMA Handles for"
1047 				    " port_num %d", port_index + 1);
1048 		}
1049 		hcap->hca_state = IBCM_HCA_ACTIVE;
1050 		return (IBCM_FAILURE);
1051 	}
1052 
1053 	ibcm_fini_hca_ids(hcap);
1054 	ibcm_delete_hca_entry(hcap);
1055 
1056 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: DETACH succeeded");
1057 	return (IBCM_SUCCESS);
1058 }
1059 
1060 /* Checks, if there are any active sidr state entries in the specified hca */
1061 static ibcm_status_t
1062 ibcm_check_sidr_clean(ibcm_hca_info_t *hcap)
1063 {
1064 	ibcm_ud_state_data_t	*usp;
1065 	uint32_t		transient_cnt = 0;
1066 
1067 	IBTF_DPRINTF_L5(cmlog, "ibcm_check_sidr_clean:");
1068 
1069 	rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);
1070 	usp = hcap->hca_sidr_list;	/* Point to the list */
1071 	while (usp != NULL) {
1072 		mutex_enter(&usp->ud_state_mutex);
1073 		if ((usp->ud_state != IBCM_STATE_SIDR_REP_SENT) &&
1074 		    (usp->ud_state != IBCM_STATE_TIMED_OUT) &&
1075 		    (usp->ud_state != IBCM_STATE_DELETE)) {
1076 
1077 			IBTF_DPRINTF_L3(cmlog, "ibcm_check_sidr_clean:"
1078 			    "usp = %p not in transient state = %d", usp,
1079 			    usp->ud_state);
1080 
1081 			mutex_exit(&usp->ud_state_mutex);
1082 			rw_exit(&hcap->hca_sidr_list_lock);
1083 			return (IBCM_FAILURE);
1084 		} else {
1085 			mutex_exit(&usp->ud_state_mutex);
1086 			++transient_cnt;
1087 		}
1088 
1089 		usp = usp->ud_nextp;
1090 	}
1091 	rw_exit(&hcap->hca_sidr_list_lock);
1092 
1093 	IBTF_DPRINTF_L4(cmlog, "ibcm_check_sidr_clean: transient_cnt %d",
1094 	    transient_cnt);
1095 
1096 	return (IBCM_SUCCESS);
1097 }
1098 
1099 /* Checks, if there are any active rc state entries, in the specified hca */
1100 static ibcm_status_t
1101 ibcm_check_avl_clean(ibcm_hca_info_t *hcap)
1102 
1103 {
1104 	ibcm_state_data_t	*sp;
1105 	avl_tree_t		*avl_tree;
1106 	uint32_t		transient_cnt = 0;
1107 
1108 	IBTF_DPRINTF_L5(cmlog, "ibcm_check_avl_clean:");
1109 	/*
1110 	 * Both the trees ie., active and passive must reference to all
1111 	 * statep's, so let's use one
1112 	 */
1113 	avl_tree = &hcap->hca_active_tree;
1114 
1115 	rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
1116 
1117 	for (sp = avl_first(avl_tree); sp != NULL;
1118 	    sp = avl_walk(avl_tree, sp, AVL_AFTER)) {
1119 		mutex_enter(&sp->state_mutex);
1120 		if ((sp->state != IBCM_STATE_TIMEWAIT) &&
1121 		    (sp->state != IBCM_STATE_REJ_SENT) &&
1122 		    (sp->state != IBCM_STATE_DELETE)) {
1123 			IBTF_DPRINTF_L3(cmlog, "ibcm_check_avl_clean: "
1124 			    "sp = %p not in transient state = %d", sp,
1125 			    sp->state);
1126 			mutex_exit(&sp->state_mutex);
1127 			rw_exit(&hcap->hca_state_rwlock);
1128 			return (IBCM_FAILURE);
1129 		} else {
1130 			mutex_exit(&sp->state_mutex);
1131 			++transient_cnt;
1132 		}
1133 	}
1134 
1135 	rw_exit(&hcap->hca_state_rwlock);
1136 
1137 	IBTF_DPRINTF_L4(cmlog, "ibcm_check_avl_clean: transient_cnt %d",
1138 	    transient_cnt);
1139 
1140 	return (IBCM_SUCCESS);
1141 }
1142 
1143 /* Adds a new entry into CM's global hca list, if hca_guid is not there yet */
1144 static ibcm_hca_info_t *
1145 ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports)
1146 {
1147 	ibcm_hca_info_t	*hcap;
1148 
1149 	IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: guid = 0x%llX",
1150 	    hcaguid);
1151 
1152 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1153 
1154 	/*
1155 	 * Check if this hca_guid already in the list
1156 	 * If yes, then ignore this and return NULL
1157 	 */
1158 
1159 	hcap = ibcm_hca_listp;
1160 
1161 	/* search for this HCA */
1162 	while (hcap != NULL) {
1163 		if (hcap->hca_guid == hcaguid) {
1164 			/* already exists */
1165 			IBTF_DPRINTF_L2(cmlog, "ibcm_add_hca_entry: "
1166 			    "hcap %p guid 0x%llX, entry already exists !!",
1167 			    hcap, hcap->hca_guid);
1168 			return (NULL);
1169 		}
1170 		hcap = hcap->hca_next;
1171 	}
1172 
1173 	/* Allocate storage for the new HCA entry found */
1174 	hcap = kmem_zalloc(sizeof (ibcm_hca_info_t) +
1175 	    (nports - 1) * sizeof (ibcm_port_info_t), KM_SLEEP);
1176 
1177 	/* initialize RW lock */
1178 	rw_init(&hcap->hca_state_rwlock, NULL, RW_DRIVER, NULL);
1179 	/* initialize SIDR list lock */
1180 	rw_init(&hcap->hca_sidr_list_lock, NULL, RW_DRIVER, NULL);
1181 	/* Insert "hcap" into the global HCA list maintained by CM */
1182 	hcap->hca_next = ibcm_hca_listp;
1183 	ibcm_hca_listp = hcap;
1184 
1185 	IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: done hcap = 0x%p", hcap);
1186 
1187 	return (hcap);
1188 
1189 }
1190 
1191 /* deletes the given ibcm_hca_info_t from CM's global hca list */
1192 void
1193 ibcm_delete_hca_entry(ibcm_hca_info_t *hcap)
1194 {
1195 	ibcm_hca_info_t	*headp, *prevp = NULL;
1196 
1197 	/* ibcm_hca_global_lock is held */
1198 	IBTF_DPRINTF_L5(cmlog, "ibcm_delete_hca_entry: guid = 0x%llX "
1199 	    "hcap = 0x%p", hcap->hca_guid, hcap);
1200 
1201 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1202 
1203 	headp = ibcm_hca_listp;
1204 	while (headp != NULL) {
1205 		if (headp == hcap) {
1206 			IBTF_DPRINTF_L3(cmlog, "ibcm_delete_hca_entry: "
1207 			    "deleting hcap %p hcaguid %llX", hcap,
1208 			    hcap->hca_guid);
1209 			if (prevp) {
1210 				prevp->hca_next = headp->hca_next;
1211 			} else {
1212 				prevp = headp->hca_next;
1213 				ibcm_hca_listp = prevp;
1214 			}
1215 			rw_destroy(&hcap->hca_state_rwlock);
1216 			rw_destroy(&hcap->hca_sidr_list_lock);
1217 			kmem_free(hcap, sizeof (ibcm_hca_info_t) +
1218 			    (hcap->hca_num_ports - 1) *
1219 			    sizeof (ibcm_port_info_t));
1220 			return;
1221 		}
1222 
1223 		prevp = headp;
1224 		headp = headp->hca_next;
1225 	}
1226 }
1227 
1228 /*
1229  * ibcm_find_hca_entry:
1230  *	Given a HCA's GUID find out ibcm_hca_info_t entry for that HCA
1231  *	This entry can be then used to access AVL tree/SIDR list etc.
1232  *	If entry exists and in HCA ATTACH state, then hca's ref cnt is
1233  *	incremented and entry returned. Else NULL returned.
1234  *
1235  *	All functions that use ibcm_find_hca_entry and get a non-NULL
1236  *	return values must call ibcm_dec_hca_acc_cnt to decrement the
1237  *	respective hca ref cnt. There shouldn't be any usage of
1238  *	ibcm_hca_info_t * returned from ibcm_find_hca_entry,
1239  *	after decrementing the hca_acc_cnt
1240  *
1241  * INPUTS:
1242  *	hca_guid	- HCA's guid
1243  *
1244  * RETURN VALUE:
1245  *	hcap		- if a match is found, else NULL
1246  */
1247 ibcm_hca_info_t *
1248 ibcm_find_hca_entry(ib_guid_t hca_guid)
1249 {
1250 	ibcm_hca_info_t *hcap;
1251 
1252 	IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: guid = 0x%llX", hca_guid);
1253 
1254 	mutex_enter(&ibcm_global_hca_lock);
1255 
1256 	hcap = ibcm_hca_listp;
1257 	/* search for this HCA */
1258 	while (hcap != NULL) {
1259 		if (hcap->hca_guid == hca_guid)
1260 			break;
1261 		hcap = hcap->hca_next;
1262 	}
1263 
1264 	/* if no hcap for the hca_guid, return NULL */
1265 	if (hcap == NULL) {
1266 		mutex_exit(&ibcm_global_hca_lock);
1267 		return (NULL);
1268 	}
1269 
1270 	/* return hcap, only if it valid to use */
1271 	if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1272 		++(hcap->hca_acc_cnt);
1273 
1274 		IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: "
1275 		    "found hcap = 0x%p hca_acc_cnt %u", hcap,
1276 		    hcap->hca_acc_cnt);
1277 
1278 		mutex_exit(&ibcm_global_hca_lock);
1279 		return (hcap);
1280 	} else {
1281 		mutex_exit(&ibcm_global_hca_lock);
1282 
1283 		IBTF_DPRINTF_L2(cmlog, "ibcm_find_hca_entry: "
1284 		    "found hcap = 0x%p not in active state", hcap);
1285 		return (NULL);
1286 	}
1287 }
1288 
1289 /*
1290  * Searches for ibcm_hca_info_t entry based on hca_guid, but doesn't increment
1291  * the hca's reference count. This function is used, where the calling context
1292  * is attempting to delete hcap itself and hence acc_cnt cannot be incremented
1293  * OR assumes that valid hcap must be available in ibcm's global hca list.
1294  */
1295 ibcm_hca_info_t *
1296 ibcm_find_hcap_entry(ib_guid_t hca_guid)
1297 {
1298 	ibcm_hca_info_t *hcap;
1299 
1300 	IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: guid = 0x%llX", hca_guid);
1301 
1302 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1303 
1304 	hcap = ibcm_hca_listp;
1305 	/* search for this HCA */
1306 	while (hcap != NULL) {
1307 		if (hcap->hca_guid == hca_guid)
1308 			break;
1309 		hcap = hcap->hca_next;
1310 	}
1311 
1312 	if (hcap == NULL)
1313 		IBTF_DPRINTF_L2(cmlog, "ibcm_find_hcap_entry: No hcap found for"
1314 		    " hca_guid 0x%llX", hca_guid);
1315 	else
1316 		IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: hcap found for"
1317 		    " hca_guid 0x%llX", hca_guid);
1318 
1319 	return (hcap);
1320 }
1321 
1322 /* increment the hca's temporary reference count */
1323 ibcm_status_t
1324 ibcm_inc_hca_acc_cnt(ibcm_hca_info_t *hcap)
1325 {
1326 	mutex_enter(&ibcm_global_hca_lock);
1327 	if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1328 		++(hcap->hca_acc_cnt);
1329 		IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_acc_cnt: "
1330 		    "hcap = 0x%p  acc_cnt = %d ", hcap, hcap->hca_acc_cnt);
1331 		mutex_exit(&ibcm_global_hca_lock);
1332 		return (IBCM_SUCCESS);
1333 	} else {
1334 		IBTF_DPRINTF_L2(cmlog, "ibcm_inc_hca_acc_cnt: "
1335 		    "hcap INACTIVE 0x%p  acc_cnt = %d ", hcap,
1336 		    hcap->hca_acc_cnt);
1337 		mutex_exit(&ibcm_global_hca_lock);
1338 		return (IBCM_FAILURE);
1339 	}
1340 }
1341 
1342 /* decrement the hca's ref count, and wake up any waiting threads */
1343 void
1344 ibcm_dec_hca_acc_cnt(ibcm_hca_info_t *hcap)
1345 {
1346 	mutex_enter(&ibcm_global_hca_lock);
1347 	ASSERT(hcap->hca_acc_cnt > 0);
1348 	--(hcap->hca_acc_cnt);
1349 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_acc_cnt: hcap = 0x%p "
1350 	    "acc_cnt = %d", hcap, hcap->hca_acc_cnt);
1351 	if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1352 	    (hcap->hca_acc_cnt == 0)) {
1353 		IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_acc_cnt: "
1354 		    "cv_broadcast for hcap = 0x%p", hcap);
1355 		cv_broadcast(&ibcm_global_hca_cv);
1356 	}
1357 	mutex_exit(&ibcm_global_hca_lock);
1358 }
1359 
1360 /* increment the hca's resource count */
1361 void
1362 ibcm_inc_hca_res_cnt(ibcm_hca_info_t *hcap)
1363 
1364 {
1365 	mutex_enter(&ibcm_global_hca_lock);
1366 	++(hcap->hca_res_cnt);
1367 	IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_res_cnt: hcap = 0x%p "
1368 	    "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1369 	mutex_exit(&ibcm_global_hca_lock);
1370 }
1371 
1372 /* decrement the hca's resource count, and wake up any waiting threads */
1373 void
1374 ibcm_dec_hca_res_cnt(ibcm_hca_info_t *hcap)
1375 {
1376 	mutex_enter(&ibcm_global_hca_lock);
1377 	ASSERT(hcap->hca_res_cnt > 0);
1378 	--(hcap->hca_res_cnt);
1379 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_res_cnt: hcap = 0x%p "
1380 	    "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1381 	if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1382 	    (hcap->hca_res_cnt == 0)) {
1383 		IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_res_cnt: "
1384 		    "cv_broadcast for hcap = 0x%p", hcap);
1385 		cv_broadcast(&ibcm_global_hca_cv);
1386 	}
1387 	mutex_exit(&ibcm_global_hca_lock);
1388 }
1389 
1390 /* increment the hca's service count */
1391 void
1392 ibcm_inc_hca_svc_cnt(ibcm_hca_info_t *hcap)
1393 
1394 {
1395 	mutex_enter(&ibcm_global_hca_lock);
1396 	++(hcap->hca_svc_cnt);
1397 	IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_svc_cnt: hcap = 0x%p "
1398 	    "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1399 	mutex_exit(&ibcm_global_hca_lock);
1400 }
1401 
1402 /* decrement the hca's service count */
1403 void
1404 ibcm_dec_hca_svc_cnt(ibcm_hca_info_t *hcap)
1405 {
1406 	mutex_enter(&ibcm_global_hca_lock);
1407 	ASSERT(hcap->hca_svc_cnt > 0);
1408 	--(hcap->hca_svc_cnt);
1409 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_svc_cnt: hcap = 0x%p "
1410 	    "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1411 	mutex_exit(&ibcm_global_hca_lock);
1412 }
1413 
1414 /*
1415  * The following code manages three classes of requests that CM makes to
1416  * the fabric.  Those three classes are SA_ACCESS, REQ/REP/RTU, and DREQ/DREP.
1417  * The main issue is that the fabric can become very busy, and the CM
1418  * protocols rely on responses being made based on a predefined timeout
1419  * value.  By managing how many simultaneous sessions are allowed, there
1420  * is observed extremely high reliability of CM protocol succeeding when
1421  * it should.
1422  *
1423  * SA_ACCESS and DREQ/DREP are managed at the thread level, whereby the
1424  * thread blocks until there are less than some number of threads doing
1425  * similar requests.
1426  *
1427  * REQ/REP/RTU requests beyond a given limit are added to a list,
1428  * allowing the thread to return immediately to its caller in the
1429  * case where the "mode" is IBT_NONBLOCKING.  This is the mode used
1430  * by uDAPL and seems to be an important feature/behavior.
1431  */
1432 
1433 static int
1434 ibcm_ok_to_start(struct ibcm_open_s *openp)
1435 {
1436 	return (openp->sends < openp->sends_hiwat &&
1437 	    openp->in_progress < openp->in_progress_max);
1438 }
1439 
1440 void
1441 ibcm_open_done(ibcm_state_data_t *statep)
1442 {
1443 	int run;
1444 	ibcm_state_data_t **linkp, *tmp;
1445 
1446 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1447 	if (statep->open_flow == 1) {
1448 		statep->open_flow = 0;
1449 		mutex_enter(&ibcm_open.mutex);
1450 		if (statep->open_link == NULL) {
1451 			ibcm_open.in_progress--;
1452 			run = ibcm_ok_to_start(&ibcm_open);
1453 		} else {
1454 			ibcm_open.queued--;
1455 			linkp = &ibcm_open.head.open_link;
1456 			while (*linkp != statep)
1457 				linkp = &((*linkp)->open_link);
1458 			*linkp = statep->open_link;
1459 			statep->open_link = NULL;
1460 			/*
1461 			 * If we remove what tail pointed to, we need
1462 			 * to reassign tail (it is never NULL).
1463 			 * tail points to head for the empty list.
1464 			 */
1465 			if (ibcm_open.tail == statep) {
1466 				tmp = &ibcm_open.head;
1467 				while (tmp->open_link != &ibcm_open.head)
1468 					tmp = tmp->open_link;
1469 				ibcm_open.tail = tmp;
1470 			}
1471 			run = 0;
1472 		}
1473 		mutex_exit(&ibcm_open.mutex);
1474 		if (run)
1475 			ibcm_run_tlist_thread();
1476 	}
1477 }
1478 
1479 /* dtrace */
1480 void
1481 ibcm_open_wait(hrtime_t delta)
1482 {
1483 	if (delta > 1000000)
1484 		IBTF_DPRINTF_L2(cmlog, "ibcm_open_wait: flow more %lld", delta);
1485 }
1486 
1487 void
1488 ibcm_open_start(ibcm_state_data_t *statep)
1489 {
1490 	ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_REQ);
1491 
1492 	mutex_enter(&statep->state_mutex);
1493 	ibcm_open_wait(gethrtime() - statep->post_time);
1494 	mutex_exit(&statep->state_mutex);
1495 
1496 	ibcm_post_rc_mad(statep, statep->stored_msg, ibcm_post_req_complete,
1497 	    statep);
1498 
1499 	mutex_enter(&statep->state_mutex);
1500 	IBCM_REF_CNT_DECR(statep);
1501 	mutex_exit(&statep->state_mutex);
1502 }
1503 
1504 void
1505 ibcm_open_enqueue(ibcm_state_data_t *statep)
1506 {
1507 	int run;
1508 
1509 	mutex_enter(&statep->state_mutex);
1510 	statep->post_time = gethrtime();
1511 	mutex_exit(&statep->state_mutex);
1512 	mutex_enter(&ibcm_open.mutex);
1513 	if (ibcm_open.queued == 0 && ibcm_ok_to_start(&ibcm_open)) {
1514 		ibcm_open.in_progress++;
1515 		mutex_exit(&ibcm_open.mutex);
1516 		ibcm_open_start(statep);
1517 	} else {
1518 		ibcm_open.queued++;
1519 		statep->open_link = &ibcm_open.head;
1520 		ibcm_open.tail->open_link = statep;
1521 		ibcm_open.tail = statep;
1522 		run = ibcm_ok_to_start(&ibcm_open);
1523 		mutex_exit(&ibcm_open.mutex);
1524 		if (run)
1525 			ibcm_run_tlist_thread();
1526 	}
1527 }
1528 
1529 ibcm_state_data_t *
1530 ibcm_open_dequeue(void)
1531 {
1532 	ibcm_state_data_t *statep;
1533 
1534 	ASSERT(MUTEX_HELD(&ibcm_open.mutex));
1535 	ibcm_open.queued--;
1536 	ibcm_open.in_progress++;
1537 	statep = ibcm_open.head.open_link;
1538 	ibcm_open.head.open_link = statep->open_link;
1539 	statep->open_link = NULL;
1540 	/*
1541 	 * If we remove what tail pointed to, we need
1542 	 * to reassign tail (it is never NULL).
1543 	 * tail points to head for the empty list.
1544 	 */
1545 	if (ibcm_open.tail == statep)
1546 		ibcm_open.tail = &ibcm_open.head;
1547 	return (statep);
1548 }
1549 
1550 void
1551 ibcm_check_for_opens(void)
1552 {
1553 	ibcm_state_data_t 	*statep;
1554 
1555 	mutex_enter(&ibcm_open.mutex);
1556 
1557 	while (ibcm_open.queued > 0) {
1558 		if (ibcm_ok_to_start(&ibcm_open)) {
1559 			statep = ibcm_open_dequeue();
1560 			mutex_exit(&ibcm_open.mutex);
1561 
1562 			ibcm_open_start(statep);
1563 
1564 			mutex_enter(&ibcm_open.mutex);
1565 		} else {
1566 			break;
1567 		}
1568 	}
1569 	mutex_exit(&ibcm_open.mutex);
1570 }
1571 
1572 
1573 static void
1574 ibcm_flow_init(ibcm_flow_t *flow, uint_t simul_max)
1575 {
1576 	flow->list			= NULL;
1577 	flow->simul			= 0;
1578 	flow->waiters_per_chunk		= 4;
1579 	flow->simul_max			= simul_max;
1580 	flow->lowat			= simul_max - flow->waiters_per_chunk;
1581 	flow->lowat_default		= flow->lowat;
1582 	/* stats */
1583 	flow->total			= 0;
1584 }
1585 
1586 static void
1587 ibcm_rc_flow_control_init(void)
1588 {
1589 	mutex_init(&ibcm_open.mutex, NULL, MUTEX_DEFAULT, NULL);
1590 	mutex_enter(&ibcm_open.mutex);
1591 	ibcm_flow_init(&ibcm_close_flow, ibcm_close_simul_max);
1592 	ibcm_flow_init(&ibcm_lapr_flow, ibcm_lapr_simul_max);
1593 	ibcm_flow_init(&ibcm_saa_flow, ibcm_saa_simul_max);
1594 
1595 	ibcm_open.queued 		= 0;
1596 	ibcm_open.exit_deferred 	= 0;
1597 	ibcm_open.in_progress 		= 0;
1598 	ibcm_open.in_progress_max 	= 16;
1599 	ibcm_open.sends 		= 0;
1600 	ibcm_open.sends_max 		= 0;
1601 	ibcm_open.sends_lowat 		= 8;
1602 	ibcm_open.sends_hiwat 		= 16;
1603 	ibcm_open.tail 			= &ibcm_open.head;
1604 	ibcm_open.head.open_link 	= NULL;
1605 	mutex_exit(&ibcm_open.mutex);
1606 
1607 	mutex_init(&ibcm_close.mutex, NULL, MUTEX_DEFAULT, NULL);
1608 	mutex_enter(&ibcm_close.mutex);
1609 	ibcm_close.tail			= &ibcm_close.head;
1610 	ibcm_close.head.close_link 	= NULL;
1611 	mutex_exit(&ibcm_close.mutex);
1612 }
1613 
1614 static void
1615 ibcm_rc_flow_control_fini(void)
1616 {
1617 	mutex_destroy(&ibcm_open.mutex);
1618 	mutex_destroy(&ibcm_close.mutex);
1619 }
1620 
1621 static ibcm_flow1_t *
1622 ibcm_flow_find(ibcm_flow_t *flow)
1623 {
1624 	ibcm_flow1_t *flow1;
1625 	ibcm_flow1_t *f;
1626 
1627 	f = flow->list;
1628 	if (f) {	/* most likely code path */
1629 		while (f->link != NULL)
1630 			f = f->link;
1631 		if (f->waiters < flow->waiters_per_chunk)
1632 			return (f);
1633 	}
1634 
1635 	/* There was no flow1 list element ready for another waiter */
1636 	mutex_exit(&ibcm_open.mutex);
1637 	flow1 = kmem_alloc(sizeof (*flow1), KM_SLEEP);
1638 	mutex_enter(&ibcm_open.mutex);
1639 
1640 	f = flow->list;
1641 	if (f) {
1642 		while (f->link != NULL)
1643 			f = f->link;
1644 		if (f->waiters < flow->waiters_per_chunk) {
1645 			kmem_free(flow1, sizeof (*flow1));
1646 			return (f);
1647 		}
1648 		f->link = flow1;
1649 	} else {
1650 		flow->list = flow1;
1651 	}
1652 	cv_init(&flow1->cv, NULL, CV_DRIVER, NULL);
1653 	flow1->waiters = 0;
1654 	flow1->link = NULL;
1655 	return (flow1);
1656 }
1657 
1658 static void
1659 ibcm_flow_enter(ibcm_flow_t *flow)
1660 {
1661 	mutex_enter(&ibcm_open.mutex);
1662 	if (flow->list == NULL && flow->simul < flow->simul_max) {
1663 		flow->simul++;
1664 		flow->total++;
1665 		mutex_exit(&ibcm_open.mutex);
1666 	} else {
1667 		ibcm_flow1_t *flow1;
1668 
1669 		flow1 = ibcm_flow_find(flow);
1670 		flow1->waiters++;
1671 		cv_wait(&flow1->cv, &ibcm_open.mutex);
1672 		if (--flow1->waiters == 0) {
1673 			cv_destroy(&flow1->cv);
1674 			mutex_exit(&ibcm_open.mutex);
1675 			kmem_free(flow1, sizeof (*flow1));
1676 		} else
1677 			mutex_exit(&ibcm_open.mutex);
1678 	}
1679 }
1680 
1681 static void
1682 ibcm_flow_exit(ibcm_flow_t *flow)
1683 {
1684 	mutex_enter(&ibcm_open.mutex);
1685 	if (--flow->simul < flow->lowat) {
1686 		if (flow->lowat < flow->lowat_default)
1687 			flow->lowat++;
1688 		if (flow->list) {
1689 			ibcm_flow1_t *flow1;
1690 
1691 			flow1 = flow->list;
1692 			flow->list = flow1->link;	/* unlink */
1693 			flow1->link = NULL;		/* be clean */
1694 			flow->total += flow1->waiters;
1695 			flow->simul += flow1->waiters;
1696 			cv_broadcast(&flow1->cv);
1697 		}
1698 	}
1699 	mutex_exit(&ibcm_open.mutex);
1700 }
1701 
1702 void
1703 ibcm_flow_inc(void)
1704 {
1705 	mutex_enter(&ibcm_open.mutex);
1706 	if (++ibcm_open.sends > ibcm_open.sends_max) {
1707 		ibcm_open.sends_max = ibcm_open.sends;
1708 		IBTF_DPRINTF_L2(cmlog, "ibcm_flow_inc: sends max = %d",
1709 		    ibcm_open.sends_max);
1710 	}
1711 	mutex_exit(&ibcm_open.mutex);
1712 }
1713 
1714 static void
1715 ibcm_check_send_cmpltn_time(hrtime_t delta, char *event_msg)
1716 {
1717 	if (delta > 4000000LL) {
1718 		IBTF_DPRINTF_L2(cmlog, "ibcm_check_send_cmpltn_time: "
1719 		    "%s: %lldns", event_msg, delta);
1720 	}
1721 }
1722 
1723 void
1724 ibcm_flow_dec(hrtime_t time, char *mad_type)
1725 {
1726 	int flow_exit = 0;
1727 	int run = 0;
1728 
1729 	if (ibcm_dtrace)
1730 		ibcm_check_send_cmpltn_time(gethrtime() - time, mad_type);
1731 	mutex_enter(&ibcm_open.mutex);
1732 	ibcm_open.sends--;
1733 	if (ibcm_open.sends < ibcm_open.sends_lowat) {
1734 		run = ibcm_ok_to_start(&ibcm_open);
1735 		if (ibcm_open.exit_deferred) {
1736 			ibcm_open.exit_deferred--;
1737 			flow_exit = 1;
1738 		}
1739 	}
1740 	mutex_exit(&ibcm_open.mutex);
1741 	if (flow_exit)
1742 		ibcm_flow_exit(&ibcm_close_flow);
1743 	if (run)
1744 		ibcm_run_tlist_thread();
1745 }
1746 
1747 void
1748 ibcm_close_enqueue(ibcm_state_data_t *statep)
1749 {
1750 	mutex_enter(&ibcm_close.mutex);
1751 	statep->close_link = NULL;
1752 	ibcm_close.tail->close_link = statep;
1753 	ibcm_close.tail = statep;
1754 	mutex_exit(&ibcm_close.mutex);
1755 	ibcm_run_tlist_thread();
1756 }
1757 
1758 void
1759 ibcm_check_for_async_close()
1760 {
1761 	ibcm_state_data_t 	*statep;
1762 
1763 	mutex_enter(&ibcm_close.mutex);
1764 
1765 	while (ibcm_close.head.close_link) {
1766 		statep = ibcm_close.head.close_link;
1767 		ibcm_close.head.close_link = statep->close_link;
1768 		statep->close_link = NULL;
1769 		if (ibcm_close.tail == statep)
1770 			ibcm_close.tail = &ibcm_close.head;
1771 		mutex_exit(&ibcm_close.mutex);
1772 		ibcm_close_start(statep);
1773 		mutex_enter(&ibcm_close.mutex);
1774 	}
1775 	mutex_exit(&ibcm_close.mutex);
1776 }
1777 
1778 void
1779 ibcm_close_enter(void)
1780 {
1781 	ibcm_flow_enter(&ibcm_close_flow);
1782 }
1783 
1784 void
1785 ibcm_close_exit(void)
1786 {
1787 	int flow_exit;
1788 
1789 	mutex_enter(&ibcm_open.mutex);
1790 	if (ibcm_open.sends < ibcm_open.sends_lowat ||
1791 	    ibcm_open.exit_deferred >= 4)
1792 		flow_exit = 1;
1793 	else {
1794 		flow_exit = 0;
1795 		ibcm_open.exit_deferred++;
1796 	}
1797 	mutex_exit(&ibcm_open.mutex);
1798 	if (flow_exit)
1799 		ibcm_flow_exit(&ibcm_close_flow);
1800 }
1801 
1802 /*
1803  * This function needs to be called twice to finish our flow
1804  * control accounting when closing down a connection.  One
1805  * call has send_done set to 1, while the other has it set to 0.
1806  * Because of retries, this could get called more than once
1807  * with either 0 or 1, but additional calls have no effect.
1808  */
1809 void
1810 ibcm_close_done(ibcm_state_data_t *statep, int send_done)
1811 {
1812 	int flow_exit;
1813 
1814 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1815 	if (statep->close_flow == 1) {
1816 		if (send_done)
1817 			statep->close_flow = 3;
1818 		else
1819 			statep->close_flow = 2;
1820 	} else if ((send_done && statep->close_flow == 2) ||
1821 	    (!send_done && statep->close_flow == 3)) {
1822 		statep->close_flow = 0;
1823 		mutex_enter(&ibcm_open.mutex);
1824 		if (ibcm_open.sends < ibcm_open.sends_lowat ||
1825 		    ibcm_open.exit_deferred >= 4)
1826 			flow_exit = 1;
1827 		else {
1828 			flow_exit = 0;
1829 			ibcm_open.exit_deferred++;
1830 		}
1831 		mutex_exit(&ibcm_open.mutex);
1832 		if (flow_exit)
1833 			ibcm_flow_exit(&ibcm_close_flow);
1834 	}
1835 }
1836 
1837 void
1838 ibcm_lapr_enter(void)
1839 {
1840 	ibcm_flow_enter(&ibcm_lapr_flow);
1841 }
1842 
1843 void
1844 ibcm_lapr_exit(void)
1845 {
1846 	ibcm_flow_exit(&ibcm_lapr_flow);
1847 }
1848 
1849 void
1850 ibcm_sa_access_enter()
1851 {
1852 	ibcm_flow_enter(&ibcm_saa_flow);
1853 }
1854 
1855 void
1856 ibcm_sa_access_exit()
1857 {
1858 	ibcm_flow_exit(&ibcm_saa_flow);
1859 }
1860 
1861 static void
1862 ibcm_sm_notice_handler(ibmf_saa_handle_t saa_handle,
1863     ibmf_saa_subnet_event_t saa_event_code,
1864     ibmf_saa_event_details_t *saa_event_details,
1865     void *callback_arg)
1866 {
1867 	ibcm_port_info_t	*portp = (ibcm_port_info_t *)callback_arg;
1868 	ibt_subnet_event_code_t code;
1869 	ibt_subnet_event_t	event;
1870 	uint8_t			event_status;
1871 
1872 	IBTF_DPRINTF_L3(cmlog, "ibcm_sm_notice_handler: saa_hdl %p, code = %d",
1873 	    saa_handle, saa_event_code);
1874 
1875 	mutex_enter(&ibcm_sm_notice_serialize_lock);
1876 
1877 	switch (saa_event_code) {
1878 	case IBMF_SAA_EVENT_MCG_CREATED:
1879 		code = IBT_SM_EVENT_MCG_CREATED;
1880 		break;
1881 	case IBMF_SAA_EVENT_MCG_DELETED:
1882 		code = IBT_SM_EVENT_MCG_DELETED;
1883 		break;
1884 	case IBMF_SAA_EVENT_GID_AVAILABLE:
1885 		code = IBT_SM_EVENT_GID_AVAIL;
1886 		ibcm_path_cache_purge();
1887 		break;
1888 	case IBMF_SAA_EVENT_GID_UNAVAILABLE:
1889 		code = IBT_SM_EVENT_GID_UNAVAIL;
1890 		ibcm_path_cache_purge();
1891 		break;
1892 	case IBMF_SAA_EVENT_SUBSCRIBER_STATUS_CHG:
1893 		event_status =
1894 		    saa_event_details->ie_producer_event_status_mask &
1895 		    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
1896 		if (event_status == (portp->port_event_status &
1897 		    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)) {
1898 			mutex_exit(&ibcm_sm_notice_serialize_lock);
1899 			return;	/* no change */
1900 		}
1901 		portp->port_event_status = event_status;
1902 		if (event_status == IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)
1903 			code = IBT_SM_EVENT_AVAILABLE;
1904 		else
1905 			code = IBT_SM_EVENT_UNAVAILABLE;
1906 		break;
1907 	default:
1908 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1909 		return;
1910 	}
1911 
1912 	mutex_enter(&ibcm_global_hca_lock);
1913 
1914 	/* don't send the event if we're tearing down */
1915 	if (!IBCM_ACCESS_HCA_OK(portp->port_hcap)) {
1916 		mutex_exit(&ibcm_global_hca_lock);
1917 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1918 		return;
1919 	}
1920 
1921 	++(portp->port_hcap->hca_acc_cnt);
1922 	mutex_exit(&ibcm_global_hca_lock);
1923 
1924 	event.sm_notice_gid = saa_event_details->ie_gid;
1925 	ibtl_cm_sm_notice_handler(portp->port_sgid0, code, &event);
1926 
1927 	mutex_exit(&ibcm_sm_notice_serialize_lock);
1928 
1929 	ibcm_dec_hca_acc_cnt(portp->port_hcap);
1930 }
1931 
1932 void
1933 ibt_register_subnet_notices(ibt_clnt_hdl_t ibt_hdl,
1934     ibt_sm_notice_handler_t sm_notice_handler, void *private)
1935 {
1936 	ibcm_port_info_t	*portp;
1937 	ibcm_hca_info_t		*hcap;
1938 	uint8_t			port;
1939 	int			num_failed_sgids;
1940 	ibtl_cm_sm_init_fail_t	*ifail;
1941 	ib_gid_t		*sgidp;
1942 
1943 	IBTF_DPRINTF_L3(cmlog, "ibt_register_subnet_notices: ibt_hdl = %p",
1944 	    ibt_hdl);
1945 
1946 	mutex_enter(&ibcm_sm_notice_serialize_lock);
1947 
1948 	ibtl_cm_set_sm_notice_handler(ibt_hdl, sm_notice_handler, private);
1949 	if (sm_notice_handler == NULL) {
1950 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1951 		return;
1952 	}
1953 
1954 	/* for each port, if service is not available, make a call */
1955 	mutex_enter(&ibcm_global_hca_lock);
1956 	num_failed_sgids = 0;
1957 	hcap = ibcm_hca_listp;
1958 	while (hcap != NULL) {
1959 		portp = hcap->hca_port_info;
1960 		for (port = 0; port < hcap->hca_num_ports; port++) {
1961 			if (!(portp->port_event_status &
1962 			    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1963 				num_failed_sgids++;
1964 			portp++;
1965 		}
1966 		hcap = hcap->hca_next;
1967 	}
1968 	if (num_failed_sgids != 0) {
1969 		ifail = kmem_alloc(sizeof (*ifail) +
1970 		    (num_failed_sgids - 1) * sizeof (ib_gid_t), KM_SLEEP);
1971 		ifail->smf_num_sgids = num_failed_sgids;
1972 		ifail->smf_ibt_hdl = ibt_hdl;
1973 		sgidp = &ifail->smf_sgid[0];
1974 		hcap = ibcm_hca_listp;
1975 		while (hcap != NULL) {
1976 			portp = hcap->hca_port_info;
1977 			for (port = 0; port < hcap->hca_num_ports; port++) {
1978 				if (!(portp->port_event_status &
1979 				    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1980 					*sgidp++ = portp->port_sgid0;
1981 				portp++;
1982 			}
1983 			hcap = hcap->hca_next;
1984 		}
1985 	}
1986 	mutex_exit(&ibcm_global_hca_lock);
1987 
1988 	if (num_failed_sgids != 0) {
1989 		ibtl_cm_sm_notice_init_failure(ifail);
1990 		kmem_free(ifail, sizeof (*ifail) +
1991 		    (num_failed_sgids - 1) * sizeof (ib_gid_t));
1992 	}
1993 	mutex_exit(&ibcm_sm_notice_serialize_lock);
1994 }
1995 
1996 /* The following is run from a taskq because we've seen the stack overflow. */
1997 static void
1998 ibcm_init_saa(void *arg)
1999 {
2000 	ibcm_port_info_t		*portp = (ibcm_port_info_t *)arg;
2001 	int				status;
2002 	ib_guid_t			port_guid;
2003 	ibmf_saa_subnet_event_args_t	event_args;
2004 
2005 	port_guid = portp->port_sgid0.gid_guid;
2006 
2007 	IBTF_DPRINTF_L3(cmlog, "ibcm_init_saa: port guid %llX", port_guid);
2008 
2009 	event_args.is_event_callback_arg = portp;
2010 	event_args.is_event_callback = ibcm_sm_notice_handler;
2011 
2012 	if ((status = ibmf_sa_session_open(port_guid, 0, &event_args,
2013 	    IBMF_VERSION, 0, &portp->port_ibmf_saa_hdl)) != IBMF_SUCCESS) {
2014 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
2015 		    "ibmf_sa_session_open failed for port guid %llX "
2016 		    "status = %d", port_guid, status);
2017 	} else {
2018 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
2019 		    "registered sa_hdl 0x%p for port guid %llX",
2020 		    portp->port_ibmf_saa_hdl, port_guid);
2021 	}
2022 
2023 	mutex_enter(&ibcm_sa_open_lock);
2024 	portp->port_saa_open_in_progress = 0;
2025 	cv_broadcast(&ibcm_sa_open_cv);
2026 	mutex_exit(&ibcm_sa_open_lock);
2027 }
2028 
2029 void
2030 ibcm_init_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
2031 {
2032 	ibmf_saa_handle_t	saa_handle;
2033 	uint8_t			port_index = port - 1;
2034 	ibcm_port_info_t	*portp = &hcap->hca_port_info[port_index];
2035 	ibt_status_t		ibt_status;
2036 
2037 	if (port_index >= hcap->hca_num_ports)
2038 		return;
2039 
2040 	mutex_enter(&ibcm_sa_open_lock);
2041 	if (portp->port_saa_open_in_progress) {
2042 		mutex_exit(&ibcm_sa_open_lock);
2043 		return;
2044 	}
2045 
2046 	saa_handle = portp->port_ibmf_saa_hdl;
2047 	if (saa_handle != NULL) {
2048 		mutex_exit(&ibcm_sa_open_lock);
2049 		return;
2050 	}
2051 
2052 	portp->port_saa_open_in_progress = 1;
2053 	mutex_exit(&ibcm_sa_open_lock);
2054 
2055 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(portp->port_event_status))
2056 
2057 	/* The assumption is that we're getting event notifications */
2058 	portp->port_event_status = IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
2059 
2060 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(portp->port_event_status))
2061 
2062 	ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
2063 	    portp->port_num, &portp->port_sgid0, NULL);
2064 	if (ibt_status != IBT_SUCCESS) {
2065 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa_handle: "
2066 		    "ibt_get_port_state_byguid failed for guid %llX "
2067 		    "with status %d", portp->port_hcap->hca_guid, ibt_status);
2068 		mutex_enter(&ibcm_sa_open_lock);
2069 		portp->port_saa_open_in_progress = 0;
2070 		cv_broadcast(&ibcm_sa_open_cv);
2071 		mutex_exit(&ibcm_sa_open_lock);
2072 		return;
2073 	}
2074 	/* if the port is UP, try sa_session_open */
2075 	(void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
2076 }
2077 
2078 
2079 ibmf_saa_handle_t
2080 ibcm_get_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
2081 {
2082 	ibmf_saa_handle_t	saa_handle;
2083 	uint8_t			port_index = port - 1;
2084 	ibcm_port_info_t	*portp = &hcap->hca_port_info[port_index];
2085 	ibt_status_t		ibt_status;
2086 
2087 	if (port_index >= hcap->hca_num_ports)
2088 		return (NULL);
2089 
2090 	mutex_enter(&ibcm_sa_open_lock);
2091 	while (portp->port_saa_open_in_progress) {
2092 		cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
2093 	}
2094 
2095 	saa_handle = portp->port_ibmf_saa_hdl;
2096 	if (saa_handle != NULL) {
2097 		mutex_exit(&ibcm_sa_open_lock);
2098 		return (saa_handle);
2099 	}
2100 
2101 	portp->port_saa_open_in_progress = 1;
2102 	mutex_exit(&ibcm_sa_open_lock);
2103 
2104 	ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
2105 	    portp->port_num, &portp->port_sgid0, NULL);
2106 	if (ibt_status != IBT_SUCCESS) {
2107 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_saa_handle: "
2108 		    "ibt_get_port_state_byguid failed for guid %llX "
2109 		    "with status %d", portp->port_hcap->hca_guid, ibt_status);
2110 		mutex_enter(&ibcm_sa_open_lock);
2111 		portp->port_saa_open_in_progress = 0;
2112 		cv_broadcast(&ibcm_sa_open_cv);
2113 		mutex_exit(&ibcm_sa_open_lock);
2114 		return (NULL);
2115 	}
2116 	/* if the port is UP, try sa_session_open */
2117 	(void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
2118 
2119 	mutex_enter(&ibcm_sa_open_lock);
2120 	while (portp->port_saa_open_in_progress) {
2121 		cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
2122 	}
2123 	saa_handle = portp->port_ibmf_saa_hdl;
2124 	mutex_exit(&ibcm_sa_open_lock);
2125 	return (saa_handle);
2126 }
2127 
2128 
2129 /*
2130  * ibcm_hca_init_port():
2131  * 	- Register port with IBMA
2132  *
2133  * Arguments:
2134  *	hcap		- HCA's guid
2135  *	port_index	- port number minus 1
2136  *
2137  * Return values:
2138  *	IBCM_SUCCESS - success
2139  */
2140 ibt_status_t
2141 ibcm_hca_init_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2142 {
2143 	int			status;
2144 	ibmf_register_info_t	*ibmf_reg;
2145 
2146 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_init_port: hcap = 0x%p port_num %d",
2147 	    hcap, port_index + 1);
2148 
2149 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
2150 
2151 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hcap->hca_port_info))
2152 
2153 	if (hcap->hca_port_info[port_index].port_ibmf_hdl == NULL) {
2154 		/* Register with IBMF */
2155 		ibmf_reg = &hcap->hca_port_info[port_index].port_ibmf_reg;
2156 		ibmf_reg->ir_ci_guid = hcap->hca_guid;
2157 		ibmf_reg->ir_port_num = port_index + 1;
2158 		ibmf_reg->ir_client_class = COMM_MGT_MANAGER_AGENT;
2159 
2160 		/*
2161 		 * register with management framework
2162 		 */
2163 		status = ibmf_register(ibmf_reg, IBMF_VERSION,
2164 		    IBMF_REG_FLAG_NO_OFFLOAD, NULL, NULL,
2165 		    &(hcap->hca_port_info[port_index].port_ibmf_hdl),
2166 		    &(hcap->hca_port_info[port_index].port_ibmf_caps));
2167 
2168 		if (status != IBMF_SUCCESS) {
2169 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_init_port: "
2170 			    "ibmf_register failed for port_num %x, "
2171 			    "status = %x", port_index + 1, status);
2172 			return (ibcm_ibmf_analyze_error(status));
2173 		}
2174 
2175 		hcap->hca_port_info[port_index].port_qp1.qp_cm =
2176 		    IBMF_QP_HANDLE_DEFAULT;
2177 		hcap->hca_port_info[port_index].port_qp1.qp_port =
2178 		    &(hcap->hca_port_info[port_index]);
2179 
2180 		/*
2181 		 * Register the read callback with IBMF.
2182 		 * Since we just did an ibmf_register, handle is
2183 		 * valid and ibcm_recv_cb() is valid so we can
2184 		 * safely assert for success of ibmf_setup_recv_cb()
2185 		 *
2186 		 * Depending on the "state" of the HCA,
2187 		 * CM may drop incoming packets
2188 		 */
2189 		status = ibmf_setup_async_cb(
2190 		    hcap->hca_port_info[port_index].port_ibmf_hdl,
2191 		    IBMF_QP_HANDLE_DEFAULT, ibcm_recv_cb,
2192 		    &(hcap->hca_port_info[port_index].port_qp1), 0);
2193 		ASSERT(status == IBMF_SUCCESS);
2194 
2195 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_init_port: "
2196 		    "IBMF hdl[%x] = 0x%p", port_index,
2197 		    hcap->hca_port_info[port_index].port_ibmf_hdl);
2198 
2199 		/* Attempt to get the saa_handle for this port */
2200 		ibcm_init_saa_handle(hcap, port_index + 1);
2201 	}
2202 
2203 	return (IBT_SUCCESS);
2204 }
2205 
2206 /*
2207  * useful, to re attempt to initialize port ibma handles from elsewhere in
2208  * cm code
2209  */
2210 ibt_status_t
2211 ibcm_hca_reinit_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2212 {
2213 	ibt_status_t	status;
2214 
2215 	IBTF_DPRINTF_L5(cmlog, "ibcm_hca_reinit_port: hcap 0x%p port_num %d",
2216 	    hcap, port_index + 1);
2217 
2218 	mutex_enter(&ibcm_global_hca_lock);
2219 	status = ibcm_hca_init_port(hcap, port_index);
2220 	mutex_exit(&ibcm_global_hca_lock);
2221 	return (status);
2222 }
2223 
2224 
2225 /*
2226  * ibcm_hca_fini_port():
2227  * 	- Deregister port with IBMA
2228  *
2229  * Arguments:
2230  *	hcap		- HCA's guid
2231  *	port_index	- port number minus 1
2232  *
2233  * Return values:
2234  *	IBCM_SUCCESS - success
2235  */
2236 static ibcm_status_t
2237 ibcm_hca_fini_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2238 {
2239 	int			ibmf_status;
2240 	ibcm_status_t		ibcm_status;
2241 
2242 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: hcap = 0x%p port_num %d ",
2243 	    hcap, port_index + 1);
2244 
2245 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
2246 
2247 	if (hcap->hca_port_info[port_index].port_ibmf_saa_hdl != NULL) {
2248 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
2249 		    "ibmf_sa_session_close IBMF SAA hdl %p",
2250 		    hcap->hca_port_info[port_index].port_ibmf_saa_hdl);
2251 
2252 		ibmf_status = ibmf_sa_session_close(
2253 		    &hcap->hca_port_info[port_index].port_ibmf_saa_hdl, 0);
2254 		if (ibmf_status != IBMF_SUCCESS) {
2255 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2256 			    "ibmf_sa_session_close of port %d returned %x",
2257 			    port_index + 1, ibmf_status);
2258 			return (IBCM_FAILURE);
2259 		}
2260 	}
2261 
2262 	if (hcap->hca_port_info[port_index].port_ibmf_hdl != NULL) {
2263 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
2264 		    "ibmf_unregister IBMF Hdl %p",
2265 		    hcap->hca_port_info[port_index].port_ibmf_hdl);
2266 
2267 		/* clean-up all the ibmf qp's allocated on this port */
2268 		ibcm_status = ibcm_free_allqps(hcap, port_index + 1);
2269 
2270 		if (ibcm_status != IBCM_SUCCESS) {
2271 
2272 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2273 			    "ibcm_free_allqps failed for port_num %d",
2274 			    port_index + 1);
2275 			return (IBCM_FAILURE);
2276 		}
2277 
2278 		/* Tear down the receive callback */
2279 		ibmf_status = ibmf_tear_down_async_cb(
2280 		    hcap->hca_port_info[port_index].port_ibmf_hdl,
2281 		    IBMF_QP_HANDLE_DEFAULT, 0);
2282 
2283 		if (ibmf_status != IBMF_SUCCESS) {
2284 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2285 			    "ibmf_tear_down_async_cb failed %d port_num %d",
2286 			    ibmf_status, port_index + 1);
2287 			return (IBCM_FAILURE);
2288 		}
2289 
2290 		/* Now, unregister with IBMF */
2291 		ibmf_status = ibmf_unregister(
2292 		    &hcap->hca_port_info[port_index].port_ibmf_hdl, 0);
2293 		IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: "
2294 		    "ibmf_unregister of port_num %x returned %x",
2295 		    port_index + 1, ibmf_status);
2296 
2297 		if (ibmf_status == IBMF_SUCCESS)
2298 			hcap->hca_port_info[port_index].port_ibmf_hdl = NULL;
2299 		else {
2300 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2301 			    "ibmf_unregister failed %d port_num %d",
2302 			    ibmf_status, port_index + 1);
2303 			return (IBCM_FAILURE);
2304 		}
2305 	}
2306 	return (IBCM_SUCCESS);
2307 }
2308 
2309 /*
2310  * ibcm_comm_est_handler():
2311  *	Check if the given channel is in ESTABLISHED state or not
2312  *
2313  * Arguments:
2314  *	eventp	- A pointer to an ibt_async_event_t struct
2315  *
2316  * Return values: NONE
2317  */
2318 static void
2319 ibcm_comm_est_handler(ibt_async_event_t *eventp)
2320 {
2321 	ibcm_state_data_t	*statep;
2322 
2323 	IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler:");
2324 
2325 	/* Both QP and EEC handles can't be NULL */
2326 	if (eventp->ev_chan_hdl == NULL) {
2327 		IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2328 		    "both QP and EEC handles are NULL");
2329 		return;
2330 	}
2331 
2332 	/* get the "statep" from qp/eec handles */
2333 	IBCM_GET_CHAN_PRIVATE(eventp->ev_chan_hdl, statep);
2334 	if (statep == NULL) {
2335 		IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: statep is NULL");
2336 		return;
2337 	}
2338 
2339 	mutex_enter(&statep->state_mutex);
2340 
2341 	IBCM_RELEASE_CHAN_PRIVATE(eventp->ev_chan_hdl);
2342 
2343 	IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: statep = %p", statep);
2344 
2345 	IBCM_REF_CNT_INCR(statep);
2346 
2347 	if ((statep->state == IBCM_STATE_REP_SENT) ||
2348 	    (statep->state == IBCM_STATE_MRA_REP_RCVD)) {
2349 		timeout_id_t	timer_val = statep->timerid;
2350 
2351 		statep->state = IBCM_STATE_TRANSIENT_ESTABLISHED;
2352 
2353 		if (timer_val) {
2354 			statep->timerid = 0;
2355 			mutex_exit(&statep->state_mutex);
2356 			(void) untimeout(timer_val);
2357 		} else
2358 			mutex_exit(&statep->state_mutex);
2359 
2360 		/* CM doesn't have RTU message here */
2361 		ibcm_cep_state_rtu(statep, NULL);
2362 
2363 	} else {
2364 		if (statep->state == IBCM_STATE_ESTABLISHED ||
2365 		    statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) {
2366 			IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: "
2367 			    "Channel already in ESTABLISHED state");
2368 		} else {
2369 			/* An unexpected behavior from remote */
2370 			IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2371 			    "Unexpected in state = %d", statep->state);
2372 		}
2373 		mutex_exit(&statep->state_mutex);
2374 
2375 		ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_COMEST);
2376 	}
2377 
2378 	mutex_enter(&statep->state_mutex);
2379 	IBCM_REF_CNT_DECR(statep);
2380 	mutex_exit(&statep->state_mutex);
2381 }
2382 
2383 
2384 /*
2385  * ibcm_async_handler():
2386  *	CM's Async Handler
2387  *	(Handles ATTACH, DETACH, COM_EST events)
2388  *
2389  * Arguments:
2390  *	eventp	- A pointer to an ibt_async_event_t struct
2391  *
2392  * Return values: None
2393  *
2394  * NOTE : CM assumes that all HCA DR events are delivered sequentially
2395  * i.e., until ibcm_async_handler  completes for a given HCA DR, framework
2396  * shall not invoke ibcm_async_handler with another DR event for the same
2397  * HCA
2398  */
2399 /* ARGSUSED */
2400 void
2401 ibcm_async_handler(void *clnt_hdl, ibt_hca_hdl_t hca_hdl,
2402     ibt_async_code_t code, ibt_async_event_t *eventp)
2403 {
2404 	ibcm_hca_info_t		*hcap;
2405 	ibcm_port_up_t		*pup;
2406 
2407 	IBTF_DPRINTF_L3(cmlog, "ibcm_async_handler: "
2408 	    "clnt_hdl = %p, code = 0x%x, eventp = 0x%p",
2409 	    clnt_hdl, code, eventp);
2410 
2411 	mutex_enter(&ibcm_global_hca_lock);
2412 
2413 	/* If fini is going to complete successfully, then return */
2414 	if (ibcm_finit_state != IBCM_FINIT_IDLE) {
2415 
2416 		/*
2417 		 * This finit state implies one of the following:
2418 		 * Init either didn't start or didn't complete OR
2419 		 * Fini is about to return SUCCESS and release the global lock.
2420 		 * In all these cases, it is safe to ignore the async.
2421 		 */
2422 
2423 		IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler: ignoring event %x, "
2424 		    "as either init didn't complete or fini about to succeed",
2425 		    code);
2426 		mutex_exit(&ibcm_global_hca_lock);
2427 		return;
2428 	}
2429 
2430 	switch (code) {
2431 	case IBT_EVENT_PORT_UP:
2432 		mutex_exit(&ibcm_global_hca_lock);
2433 		pup = kmem_alloc(sizeof (ibcm_port_up_t), KM_SLEEP);
2434 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pup))
2435 		pup->pup_hca_guid = eventp->ev_hca_guid;
2436 		pup->pup_port = eventp->ev_port;
2437 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pup))
2438 		(void) taskq_dispatch(ibcm_taskq,
2439 		    ibcm_service_record_rewrite_task, pup, TQ_SLEEP);
2440 		ibcm_path_cache_purge();
2441 		return;
2442 
2443 	case IBT_HCA_ATTACH_EVENT:
2444 
2445 		/* eventp->ev_hcaguid is the HCA GUID of interest */
2446 		ibcm_hca_attach(eventp->ev_hca_guid);
2447 		break;
2448 
2449 	case IBT_HCA_DETACH_EVENT:
2450 
2451 		/* eventp->ev_hca_guid is the HCA GUID of interest */
2452 		if ((hcap = ibcm_find_hcap_entry(eventp->ev_hca_guid)) ==
2453 		    NULL) {
2454 			IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler:"
2455 			    " hca %llX doesn't exist", eventp->ev_hca_guid);
2456 			break;
2457 		}
2458 
2459 		(void) ibcm_hca_detach(hcap);
2460 		break;
2461 
2462 	case IBT_EVENT_COM_EST_QP:
2463 		/* eventp->ev_qp_hdl is the ibt_qp_hdl_t of interest */
2464 	case IBT_EVENT_COM_EST_EEC:
2465 		/* eventp->ev_eec_hdl is the ibt_eec_hdl_t of interest */
2466 		ibcm_comm_est_handler(eventp);
2467 		break;
2468 	default:
2469 		break;
2470 	}
2471 
2472 	/* Unblock, any blocked fini/init operations */
2473 	mutex_exit(&ibcm_global_hca_lock);
2474 }
2475