xref: /illumos-gate/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c (revision 448978d332dcc806dc5948167fb3ee68e28e31c7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * ibcm_impl.c
30  *
31  * contains internal functions of IB CM module.
32  *
33  * TBD:
34  * 1. HCA CATASTROPHIC/RECOVERED not handled yet
35  */
36 
37 #include <sys/ib/mgt/ibcm/ibcm_impl.h>
38 #include <sys/disp.h>
39 
40 
41 /* function prototypes */
42 static ibcm_status_t	ibcm_init(void);
43 static ibcm_status_t	ibcm_fini(void);
44 
45 /* Routines to initialize and destory CM global locks and CVs */
46 static void		ibcm_init_locks(void);
47 static void		ibcm_fini_locks(void);
48 
49 /* Routines that initialize/teardown CM's global hca structures */
50 static void		ibcm_init_hcas();
51 static ibcm_status_t	ibcm_fini_hcas();
52 
53 static void		ibcm_init_classportinfo();
54 static void		ibcm_stop_timeout_thread();
55 
56 /* Routines that handle HCA attach/detach asyncs */
57 static void		ibcm_hca_attach(ib_guid_t);
58 static ibcm_status_t	ibcm_hca_detach(ibcm_hca_info_t *);
59 
60 /* Routines that initialize the HCA's port related fields */
61 static ibt_status_t	ibcm_hca_init_port(ibcm_hca_info_t *hcap,
62 			    uint8_t port_index);
63 static ibcm_status_t	ibcm_hca_fini_port(ibcm_hca_info_t *hcap,
64 			    uint8_t port_index);
65 
66 static void ibcm_rc_flow_control_init(void);
67 static void ibcm_rc_flow_control_fini(void);
68 
69 /*
70  * Routines that check if hca's avl trees and sidr lists are free of any
71  * active client resources ie., RC or UD state structures in certain states
72  */
73 static ibcm_status_t	ibcm_check_avl_clean(ibcm_hca_info_t *hcap);
74 static ibcm_status_t	ibcm_check_sidr_clean(ibcm_hca_info_t *hcap);
75 
76 /* Add a new hca structure to CM's global hca list */
77 static ibcm_hca_info_t	*ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports);
78 
79 static void		ibcm_comm_est_handler(ibt_async_event_t *);
80 void			ibcm_async_handler(void *, ibt_hca_hdl_t,
81 			    ibt_async_code_t, ibt_async_event_t *);
82 
83 /* Global variables */
84 char			cmlog[] = "ibcm";	/* for debug log messages */
85 ibt_clnt_hdl_t		ibcm_ibt_handle;	/* IBT handle */
86 kmutex_t		ibcm_svc_info_lock;	/* list lock */
87 kcondvar_t		ibcm_svc_info_cv;	/* cv for deregister */
88 kmutex_t		ibcm_recv_mutex;
89 avl_tree_t		ibcm_svc_avl_tree;
90 taskq_t			*ibcm_taskq = NULL;
91 int			taskq_dispatch_fail_cnt;
92 
93 kmutex_t		ibcm_trace_mutex;	/* Trace mutex */
94 kmutex_t		ibcm_trace_print_mutex;	/* Trace print mutex */
95 int			ibcm_conn_max_trcnt = IBCM_MAX_CONN_TRCNT;
96 
97 int			ibcm_enable_trace = 2;	/* Trace level 4 by default */
98 int			ibcm_dtrace = 0; /* conditionally enable more dtrace */
99 
100 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_info_s::{svc_bind_list
101     svc_ref_cnt svc_to_delete}))
102 
103 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_bind_s::{sbind_link}))
104 
105 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_mutex, ibcm_conn_trace_s))
106 
107 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_conn_trace_s))
108 
109 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_print_mutex, ibcm_debug_buf))
110 
111 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_debug_buf))
112 
113 /*
114  * Initial state is INIT. All hca dr's return success immediately in this
115  * state, without adding or deleting any hca's to CM.
116  */
117 ibcm_finit_state_t	ibcm_finit_state = IBCM_FINIT_INIT;
118 
119 /* mutex and cv to manage hca's reference and resource count(s) */
120 kmutex_t		ibcm_global_hca_lock;
121 kcondvar_t		ibcm_global_hca_cv;
122 
123 /* mutex and cv to sa session open */
124 kmutex_t		ibcm_sa_open_lock;
125 kcondvar_t		ibcm_sa_open_cv;
126 int			ibcm_sa_timeout_delay = 1;		/* in ticks */
127 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sa_open_lock,
128     ibcm_port_info_s::{port_ibmf_saa_hdl port_saa_open_in_progress}))
129 
130 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_port_info_s::{port_ibmf_saa_hdl}))
131 
132 /* serialize sm notice callbacks */
133 kmutex_t		ibcm_sm_notice_serialize_lock;
134 
135 _NOTE(LOCK_ORDER(ibcm_sm_notice_serialize_lock ibcm_global_hca_lock))
136 
137 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock, ibcm_hca_info_s::{hca_state
138     hca_svc_cnt hca_acc_cnt hca_res_cnt hca_next}))
139 
140 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock,
141     ibcm_port_info_s::{port_ibmf_hdl}))
142 
143 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sm_notice_serialize_lock,
144     ibcm_port_info_s::{port_event_status}))
145 
146 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_hca_info_s::{hca_state}))
147 _NOTE(DATA_READABLE_WITHOUT_LOCK(
148     ibcm_hca_info_s::{hca_port_info.port_ibmf_hdl}))
149 
150 /* mutex for CM's qp list management */
151 kmutex_t		ibcm_qp_list_lock;
152 
153 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_port_info_s::{port_qplist}))
154 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
155 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
156 
157 kcondvar_t		ibcm_timeout_list_cv;
158 kcondvar_t		ibcm_timeout_thread_done_cv;
159 kt_did_t		ibcm_timeout_thread_did;
160 ibcm_state_data_t	*ibcm_timeout_list_hdr, *ibcm_timeout_list_tail;
161 ibcm_ud_state_data_t	*ibcm_ud_timeout_list_hdr, *ibcm_ud_timeout_list_tail;
162 kmutex_t		ibcm_timeout_list_lock;
163 uint8_t			ibcm_timeout_list_flags = 0;
164 pri_t			ibcm_timeout_thread_pri = MINCLSYSPRI;
165 
166 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
167     ibcm_state_data_s::timeout_next))
168 
169 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
170     ibcm_ud_state_data_s::ud_timeout_next))
171 
172 /*
173  * Flow control logic for open_rc_channel uses the following.
174  */
175 
176 struct ibcm_open_s {
177 	kmutex_t		mutex;
178 	kcondvar_t		cv;
179 	uint8_t			task_running;
180 	uint_t			queued;
181 	uint_t			exit_deferred;
182 	uint_t			in_progress;
183 	uint_t			in_progress_max;
184 	uint_t			sends;
185 	uint_t			sends_max;
186 	uint_t			sends_lowat;
187 	uint_t			sends_hiwat;
188 	ibcm_state_data_t	*tail;
189 	ibcm_state_data_t	head;
190 } ibcm_open;
191 
192 static void ibcm_open_task(void *);
193 
194 /*
195  * Flow control logic for SA access and close_rc_channel calls follows.
196  */
197 
198 int ibcm_close_simul_max	= 12;
199 int ibcm_lapr_simul_max		= 12;
200 int ibcm_saa_simul_max		= 8;
201 
202 typedef struct ibcm_flow1_s {
203 	struct ibcm_flow1_s	*link;
204 	kcondvar_t		cv;
205 	uint8_t			waiters;	/* 1 to IBCM_FLOW_SIMUL_MAX */
206 } ibcm_flow1_t;
207 
208 typedef struct ibcm_flow_s {
209 	ibcm_flow1_t		*list;
210 	uint_t			simul;	/* #requests currently outstanding */
211 	uint_t			simul_max;
212 	uint_t			waiters_per_chunk;
213 	uint_t			lowat;
214 	uint_t			lowat_default;
215 	/* statistics */
216 	uint_t			total;
217 } ibcm_flow_t;
218 
219 ibcm_flow_t ibcm_saa_flow;
220 ibcm_flow_t ibcm_close_flow;
221 ibcm_flow_t ibcm_lapr_flow;
222 
223 static ibt_clnt_modinfo_t ibcm_ibt_modinfo = {	/* Client's modinfop */
224 	IBTI_V2,
225 	IBT_CM,
226 	ibcm_async_handler,
227 	NULL,
228 	"IBCM"
229 };
230 
231 /* IBCM's list of HCAs registered with it */
232 static ibcm_hca_info_t	*ibcm_hca_listp = NULL;	/* CM's HCA list */
233 
234 /* Array of CM state call table functions */
235 ibcm_state_handler_t	ibcm_sm_funcs_tbl[] = {
236 	ibcm_process_req_msg,
237 	ibcm_process_mra_msg,
238 	ibcm_process_rej_msg,
239 	ibcm_process_rep_msg,
240 	ibcm_process_rtu_msg,
241 	ibcm_process_dreq_msg,
242 	ibcm_process_drep_msg,
243 	ibcm_process_sidr_req_msg,
244 	ibcm_process_sidr_rep_msg,
245 	ibcm_process_lap_msg,
246 	ibcm_process_apr_msg
247 };
248 
249 /* the following globals are CM tunables */
250 ibt_rnr_nak_time_t	ibcm_default_rnr_nak_time = IBT_RNR_NAK_655ms;
251 
252 uint32_t	ibcm_max_retries = IBCM_MAX_RETRIES;
253 clock_t		ibcm_local_processing_time = IBCM_LOCAL_RESPONSE_TIME;
254 clock_t		ibcm_remote_response_time = IBCM_REMOTE_RESPONSE_TIME;
255 ib_time_t	ibcm_max_sidr_rep_proctime = IBCM_MAX_SIDR_PROCESS_TIME;
256 ib_time_t	ibcm_max_sidr_pktlife_time = IBCM_MAX_SIDR_PKT_LIFE_TIME;
257 
258 ib_time_t	ibcm_max_sidr_rep_store_time = 18;
259 uint32_t	ibcm_wait_for_acc_cnt_timeout = 500000;	/* 500 ms */
260 uint32_t	ibcm_wait_for_res_cnt_timeout = 500000;	/* 500 ms */
261 
262 ib_time_t	ibcm_max_ib_pkt_lt = IBCM_MAX_IB_PKT_LT;
263 ib_time_t	ibcm_max_ib_mad_pkt_lt = IBCM_MAX_IB_MAD_PKT_LT;
264 
265 /*
266  * This delay accounts for time involved in various activities as follows :
267  *
268  * IBMF delays for posting the MADs in non-blocking mode
269  * IBMF delays for receiving the MADs and delivering to CM
270  * CM delays in processing the MADs before invoking client handlers,
271  * Any other delays associated with HCA driver in processing the MADs and
272  * 	other subsystems that CM may invoke (ex : SA, HCA driver)
273  */
274 uint32_t	ibcm_sw_delay	= 1000;	/* 1000us / 1ms */
275 uint32_t	ibcm_max_sa_retries = IBCM_MAX_SA_RETRIES + 1;
276 
277 /*	approx boot time */
278 uint32_t	ibcm_adj_btime = 4;	/* 4 seconds */
279 
280 /*
281  * The information in ibcm_clpinfo is kept in wireformat and is setup at
282  * init time, and used read-only after that
283  */
284 ibcm_classportinfo_msg_t	ibcm_clpinfo;
285 
286 char	*event_str[] = {
287 	"NEVER SEE THIS             ",
288 	"SESSION_ID                 ",
289 	"CHAN_HDL                   ",
290 	"LOCAL_COMID/HCA/PORT       ",
291 	"LOCAL_QPN                  ",
292 	"REMOTE_COMID/HCA           ",
293 	"REMOTE_QPN                 ",
294 	"BASE_TIME                  ",
295 	"INCOMING_REQ               ",
296 	"INCOMING_REP               ",
297 	"INCOMING_RTU               ",
298 	"INCOMING_COMEST            ",
299 	"INCOMING_MRA               ",
300 	"INCOMING_REJ               ",
301 	"INCOMING_LAP               ",
302 	"INCOMING_APR               ",
303 	"INCOMING_DREQ              ",
304 	"INCOMING_DREP              ",
305 	"OUTGOING_REQ               ",
306 	"OUTGOING_REP               ",
307 	"OUTGOING_RTU               ",
308 	"OUTGOING_LAP               ",
309 	"OUTGOING_APR               ",
310 	"OUTGOING_MRA               ",
311 	"OUTGOING_REJ               ",
312 	"OUTGOING_DREQ              ",
313 	"OUTGOING_DREP              ",
314 	"REQ_POST_COMPLETE          ",
315 	"REP_POST_COMPLETE          ",
316 	"RTU_POST_COMPLETE          ",
317 	"MRA_POST_COMPLETE          ",
318 	"REJ_POST_COMPLETE          ",
319 	"LAP_POST_COMPLETE          ",
320 	"APR_POST_COMPLETE          ",
321 	"DREQ_POST_COMPLETE         ",
322 	"DREP_POST_COMPLETE         ",
323 	"TIMEOUT_REP                ",
324 	"CALLED_REQ_RCVD_EVENT      ",
325 	"RET_REQ_RCVD_EVENT         ",
326 	"CALLED_REP_RCVD_EVENT      ",
327 	"RET_REP_RCVD_EVENT         ",
328 	"CALLED_CONN_EST_EVENT      ",
329 	"RET_CONN_EST_EVENT         ",
330 	"CALLED_CONN_FAIL_EVENT     ",
331 	"RET_CONN_FAIL_EVENT        ",
332 	"CALLED_CONN_CLOSE_EVENT    ",
333 	"RET_CONN_CLOSE_EVENT       ",
334 	"INIT_INIT                  ",
335 	"INIT_INIT_FAIL             ",
336 	"INIT_RTR                   ",
337 	"INIT_RTR_FAIL              ",
338 	"RTR_RTS                    ",
339 	"RTR_RTS_FAIL               ",
340 	"RTS_RTS                    ",
341 	"RTS_RTS_FAIL               ",
342 	"TO_ERROR                   ",
343 	"ERROR_FAIL                 ",
344 	"SET_ALT                    ",
345 	"SET_ALT_FAIL               ",
346 	"STALE_DETECT               ",
347 	"OUTGOING_REQ_RETRY         ",
348 	"OUTGOING_REP_RETRY         ",
349 	"OUTGOING_LAP_RETRY         ",
350 	"OUTGOING_MRA_RETRY         ",
351 	"OUTGOING_DREQ_RETRY        ",
352 	"NEVER SEE THIS             "
353 };
354 
355 char	ibcm_debug_buf[IBCM_DEBUG_BUF_SIZE];
356 
357 _NOTE(SCHEME_PROTECTS_DATA("used in a localized function consistently",
358     ibcm_debug_buf))
359 _NOTE(READ_ONLY_DATA(ibcm_taskq))
360 
361 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_flags))
362 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_hdr))
363 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_ud_timeout_list_hdr))
364 
365 #ifdef DEBUG
366 int		ibcm_test_mode = 0;	/* set to 1, if running tests */
367 #endif
368 
369 
370 /* Module Driver Info */
371 static struct modlmisc ibcm_modlmisc = {
372 	&mod_miscops,
373 	"IB Communication Manager %I%"
374 };
375 
376 /* Module Linkage */
377 static struct modlinkage ibcm_modlinkage = {
378 	MODREV_1,
379 	&ibcm_modlmisc,
380 	NULL
381 };
382 
383 
384 int
385 _init(void)
386 {
387 	int		rval;
388 	ibcm_status_t	status;
389 
390 	status = ibcm_init();
391 	if (status != IBCM_SUCCESS) {
392 		IBTF_DPRINTF_L2(cmlog, "_init: ibcm failed %d", status);
393 		return (EINVAL);
394 	}
395 
396 	rval = mod_install(&ibcm_modlinkage);
397 	if (rval != 0) {
398 		IBTF_DPRINTF_L2(cmlog, "_init: ibcm mod_install failed %d",
399 		    rval);
400 		(void) ibcm_fini();
401 	}
402 
403 	IBTF_DPRINTF_L5(cmlog, "_init: ibcm successful");
404 	return (rval);
405 
406 }
407 
408 
409 int
410 _info(struct modinfo *modinfop)
411 {
412 	return (mod_info(&ibcm_modlinkage, modinfop));
413 }
414 
415 
416 int
417 _fini(void)
418 {
419 	int status;
420 
421 	if (ibcm_fini() != IBCM_SUCCESS)
422 		return (EBUSY);
423 
424 	if ((status = mod_remove(&ibcm_modlinkage)) != 0) {
425 		IBTF_DPRINTF_L2(cmlog, "_fini: ibcm mod_remove failed %d",
426 		    status);
427 		return (status);
428 	}
429 
430 	IBTF_DPRINTF_L5(cmlog, "_fini: ibcm successful");
431 
432 	return (status);
433 }
434 
435 /* Initializes all global mutex and CV in cm module */
436 static void
437 ibcm_init_locks()
438 {
439 
440 	/* Verify CM MAD sizes */
441 #ifdef DEBUG
442 
443 	if (ibcm_test_mode > 1) {
444 
445 		IBTF_DPRINTF_L1(cmlog, "REQ MAD SIZE %d",
446 		    sizeof (ibcm_req_msg_t));
447 		IBTF_DPRINTF_L1(cmlog, "REP MAD SIZE %d",
448 		    sizeof (ibcm_rep_msg_t));
449 		IBTF_DPRINTF_L1(cmlog, "RTU MAD SIZE %d",
450 		    sizeof (ibcm_rtu_msg_t));
451 		IBTF_DPRINTF_L1(cmlog, "MRA MAD SIZE %d",
452 		    sizeof (ibcm_mra_msg_t));
453 		IBTF_DPRINTF_L1(cmlog, "REJ MAD SIZE %d",
454 		    sizeof (ibcm_rej_msg_t));
455 		IBTF_DPRINTF_L1(cmlog, "LAP MAD SIZE %d",
456 		    sizeof (ibcm_lap_msg_t));
457 		IBTF_DPRINTF_L1(cmlog, "APR MAD SIZE %d",
458 		    sizeof (ibcm_apr_msg_t));
459 		IBTF_DPRINTF_L1(cmlog, "DREQ MAD SIZE %d",
460 		    sizeof (ibcm_dreq_msg_t));
461 		IBTF_DPRINTF_L1(cmlog, "DREP MAD SIZE %d",
462 		    sizeof (ibcm_drep_msg_t));
463 		IBTF_DPRINTF_L1(cmlog, "SIDR REQ MAD SIZE %d",
464 		    sizeof (ibcm_sidr_req_msg_t));
465 		IBTF_DPRINTF_L1(cmlog, "SIDR REP MAD SIZE %d",
466 		    sizeof (ibcm_sidr_rep_msg_t));
467 	}
468 
469 #endif
470 
471 	/* Create all global locks within cm module */
472 	mutex_init(&ibcm_svc_info_lock, NULL, MUTEX_DEFAULT, NULL);
473 	mutex_init(&ibcm_timeout_list_lock, NULL, MUTEX_DEFAULT, NULL);
474 	mutex_init(&ibcm_global_hca_lock, NULL, MUTEX_DEFAULT, NULL);
475 	mutex_init(&ibcm_sa_open_lock, NULL, MUTEX_DEFAULT, NULL);
476 	mutex_init(&ibcm_recv_mutex, NULL, MUTEX_DEFAULT, NULL);
477 	mutex_init(&ibcm_sm_notice_serialize_lock, NULL, MUTEX_DEFAULT, NULL);
478 	mutex_init(&ibcm_qp_list_lock, NULL, MUTEX_DEFAULT, NULL);
479 	mutex_init(&ibcm_trace_mutex, NULL, MUTEX_DEFAULT, NULL);
480 	mutex_init(&ibcm_trace_print_mutex, NULL, MUTEX_DEFAULT, NULL);
481 	cv_init(&ibcm_svc_info_cv, NULL, CV_DRIVER, NULL);
482 	cv_init(&ibcm_timeout_list_cv, NULL, CV_DRIVER, NULL);
483 	cv_init(&ibcm_timeout_thread_done_cv, NULL, CV_DRIVER, NULL);
484 	cv_init(&ibcm_global_hca_cv, NULL, CV_DRIVER, NULL);
485 	cv_init(&ibcm_sa_open_cv, NULL, CV_DRIVER, NULL);
486 	avl_create(&ibcm_svc_avl_tree, ibcm_svc_compare,
487 	    sizeof (ibcm_svc_info_t),
488 	    offsetof(struct ibcm_svc_info_s, svc_link));
489 
490 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_locks: done");
491 }
492 
493 /* Destroys all global mutex and CV in cm module */
494 static void
495 ibcm_fini_locks()
496 {
497 	/* Destroy all global locks within cm module */
498 	mutex_destroy(&ibcm_svc_info_lock);
499 	mutex_destroy(&ibcm_timeout_list_lock);
500 	mutex_destroy(&ibcm_global_hca_lock);
501 	mutex_destroy(&ibcm_sa_open_lock);
502 	mutex_destroy(&ibcm_recv_mutex);
503 	mutex_destroy(&ibcm_sm_notice_serialize_lock);
504 	mutex_destroy(&ibcm_qp_list_lock);
505 	mutex_destroy(&ibcm_trace_mutex);
506 	mutex_destroy(&ibcm_trace_print_mutex);
507 	cv_destroy(&ibcm_svc_info_cv);
508 	cv_destroy(&ibcm_timeout_list_cv);
509 	cv_destroy(&ibcm_timeout_thread_done_cv);
510 	cv_destroy(&ibcm_global_hca_cv);
511 	cv_destroy(&ibcm_sa_open_cv);
512 	avl_destroy(&ibcm_svc_avl_tree);
513 
514 	IBTF_DPRINTF_L5(cmlog, "ibcm_fini_locks: done");
515 }
516 
517 
518 /* Initialize CM's classport info */
519 static void
520 ibcm_init_classportinfo()
521 {
522 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
523 
524 	ibcm_clpinfo.BaseVersion = IBCM_MAD_BASE_VERSION;
525 	ibcm_clpinfo.ClassVersion = IBCM_MAD_CLASS_VERSION;
526 
527 	/* For now, CM supports same capabilities at all ports */
528 	ibcm_clpinfo.CapabilityMask =
529 	    h2b16(IBCM_CPINFO_CAP_RC | IBCM_CPINFO_CAP_SIDR);
530 
531 	/* Bits 0-7 are all 0 for Communication Mgmt Class */
532 
533 	/* For now, CM has the same respvalue at all ports */
534 	ibcm_clpinfo.RespTimeValue_plus =
535 	    h2b32(ibt_usec2ib(ibcm_local_processing_time) & 0x1f);
536 
537 	/* For now, redirect fields are set to 0 */
538 	/* Trap fields are not applicable to CM, hence set to 0 */
539 
540 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
541 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_classportinfo: done");
542 }
543 
544 /*
545  * ibcm_init():
546  * 	- call ibt_attach()
547  * 	- create AVL trees
548  *	- Attach HCA handlers that are already present before
549  *	CM got loaded.
550  *
551  * Arguments:	NONE
552  *
553  * Return values:
554  *	IBCM_SUCCESS - success
555  */
556 static ibcm_status_t
557 ibcm_init(void)
558 {
559 	ibt_status_t	status;
560 	kthread_t	*t;
561 
562 	IBTF_DPRINTF_L3(cmlog, "ibcm_init:");
563 
564 	ibcm_init_classportinfo();
565 
566 	if (ibcm_init_ids() != IBCM_SUCCESS) {
567 		IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
568 		    "fatal error: vmem_create() failed");
569 		return (IBCM_FAILURE);
570 	}
571 	ibcm_init_locks();
572 
573 	if (ibcm_ar_init() != IBCM_SUCCESS) {
574 		IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
575 		    "fatal error: ibcm_ar_init() failed");
576 		ibcm_fini_ids();
577 		ibcm_fini_locks();
578 		return (IBCM_FAILURE);
579 	}
580 	ibcm_rc_flow_control_init();
581 
582 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_taskq))
583 	ibcm_taskq = system_taskq;
584 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_taskq))
585 
586 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
587 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
588 
589 	/* Start the timeout list processing thread */
590 	ibcm_timeout_list_flags = 0;
591 	t = thread_create(NULL, 0, ibcm_process_tlist, 0, 0, &p0, TS_RUN,
592 	    ibcm_timeout_thread_pri);
593 	ibcm_timeout_thread_did = t->t_did;
594 
595 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
596 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
597 
598 	/*
599 	 * NOTE : if ibt_attach is done after ibcm_init_hcas, then some
600 	 * HCA DR events may be lost. CM could call re-init hca list
601 	 * again, but it is more complicated. Some HCA's DR's lost may
602 	 * be HCA detach, which makes hca list re-syncing and locking more
603 	 * complex
604 	 */
605 	status = ibt_attach(&ibcm_ibt_modinfo, NULL, NULL, &ibcm_ibt_handle);
606 	if (status != IBT_SUCCESS) {
607 		IBTF_DPRINTF_L2(cmlog, "ibcm_init(): ibt_attach failed %d",
608 		    status);
609 		(void) ibcm_ar_fini();
610 		ibcm_stop_timeout_thread();
611 		ibcm_fini_ids();
612 		ibcm_fini_locks();
613 		ibcm_rc_flow_control_fini();
614 		return (IBCM_FAILURE);
615 	}
616 
617 	/* Block all HCA attach/detach asyncs */
618 	mutex_enter(&ibcm_global_hca_lock);
619 
620 	ibcm_init_hcas();
621 	ibcm_finit_state = IBCM_FINIT_IDLE;
622 
623 	ibcm_path_cache_init();
624 
625 	/* Unblock any waiting HCA DR asyncs in CM */
626 	mutex_exit(&ibcm_global_hca_lock);
627 
628 	IBTF_DPRINTF_L4(cmlog, "ibcm_init: done");
629 	return (IBCM_SUCCESS);
630 }
631 
632 /* Allocates and initializes the "per hca" global data in CM */
633 static void
634 ibcm_init_hcas()
635 {
636 	uint_t	num_hcas = 0;
637 	ib_guid_t *guid_array;
638 	int i;
639 
640 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas:");
641 
642 	/* Get the number of HCAs */
643 	num_hcas = ibt_get_hca_list(&guid_array);
644 	IBTF_DPRINTF_L4(cmlog, "ibcm_init_hcas: ibt_get_hca_list() "
645 	    "returned %d hcas", num_hcas);
646 
647 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
648 
649 	for (i = 0; i < num_hcas; i++)
650 		ibcm_hca_attach(guid_array[i]);
651 
652 	if (num_hcas)
653 		ibt_free_hca_list(guid_array, num_hcas);
654 
655 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas: done");
656 }
657 
658 
659 /*
660  * ibcm_fini():
661  * 	- Deregister w/ ibt
662  * 	- Cleanup IBCM HCA listp
663  * 	- Destroy mutexes
664  *
665  * Arguments:	NONE
666  *
667  * Return values:
668  *	IBCM_SUCCESS - success
669  */
670 static ibcm_status_t
671 ibcm_fini(void)
672 {
673 	ibt_status_t	status;
674 
675 	IBTF_DPRINTF_L3(cmlog, "ibcm_fini:");
676 
677 	/*
678 	 * CM assumes that the all general clients got rid of all the
679 	 * established connections and service registrations, completed all
680 	 * pending SIDR operations before a call to ibcm_fini()
681 	 */
682 
683 	if (ibcm_ar_fini() != IBCM_SUCCESS) {
684 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: ibcm_ar_fini failed");
685 		return (IBCM_FAILURE);
686 	}
687 
688 	/* cleanup the svcinfo list */
689 	mutex_enter(&ibcm_svc_info_lock);
690 	if (avl_first(&ibcm_svc_avl_tree) != NULL) {
691 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
692 		    "ibcm_svc_avl_tree is not empty");
693 		mutex_exit(&ibcm_svc_info_lock);
694 		return (IBCM_FAILURE);
695 	}
696 	mutex_exit(&ibcm_svc_info_lock);
697 
698 	/* disables any new hca attach/detaches */
699 	mutex_enter(&ibcm_global_hca_lock);
700 
701 	ibcm_finit_state = IBCM_FINIT_BUSY;
702 
703 	if (ibcm_fini_hcas() != IBCM_SUCCESS) {
704 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
705 		    "some hca's still have client resources");
706 
707 		/* First, re-initialize the hcas */
708 		ibcm_init_hcas();
709 		/* and then enable the HCA asyncs */
710 		ibcm_finit_state = IBCM_FINIT_IDLE;
711 		mutex_exit(&ibcm_global_hca_lock);
712 		if (ibcm_ar_init() != IBCM_SUCCESS) {
713 			IBTF_DPRINTF_L1(cmlog, "ibcm_fini:ibcm_ar_init failed");
714 		}
715 		return (IBCM_FAILURE);
716 	}
717 
718 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
719 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
720 
721 	ASSERT(ibcm_timeout_list_hdr == NULL);
722 	ASSERT(ibcm_ud_timeout_list_hdr == NULL);
723 
724 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
725 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
726 
727 	/* Release any pending asyncs on ibcm_global_hca_lock */
728 	ibcm_finit_state = IBCM_FINIT_SUCCESS;
729 	mutex_exit(&ibcm_global_hca_lock);
730 
731 	ibcm_stop_timeout_thread();
732 
733 	/*
734 	 * Detach from IBTL. Waits until all pending asyncs are complete.
735 	 * Above cv_broadcast wakes up any waiting hca attach/detach asyncs
736 	 */
737 	status = ibt_detach(ibcm_ibt_handle);
738 
739 	/* if detach fails, CM didn't free up some resources, so assert */
740 	if (status != IBT_SUCCESS)
741 	    IBTF_DPRINTF_L1(cmlog, "ibcm_fini: ibt_detach failed %d", status);
742 
743 	ibcm_rc_flow_control_fini();
744 
745 	ibcm_path_cache_fini();
746 
747 	ibcm_fini_ids();
748 	ibcm_fini_locks();
749 	IBTF_DPRINTF_L3(cmlog, "ibcm_fini: done");
750 	return (IBCM_SUCCESS);
751 }
752 
753 /* This routine exit's the ibcm timeout thread  */
754 static void
755 ibcm_stop_timeout_thread()
756 {
757 	mutex_enter(&ibcm_timeout_list_lock);
758 
759 	/* Stop the timeout list processing thread */
760 	ibcm_timeout_list_flags =
761 	    ibcm_timeout_list_flags | IBCM_TIMEOUT_THREAD_EXIT;
762 
763 	/* Wake up, if the timeout thread is on a cv_wait */
764 	cv_signal(&ibcm_timeout_list_cv);
765 
766 	mutex_exit(&ibcm_timeout_list_lock);
767 	thread_join(ibcm_timeout_thread_did);
768 
769 	IBTF_DPRINTF_L5(cmlog, "ibcm_stop_timeout_thread: done");
770 }
771 
772 
773 /* Attempts to release all the hca's associated with CM */
774 static ibcm_status_t
775 ibcm_fini_hcas()
776 {
777 	ibcm_hca_info_t *hcap, *next;
778 
779 	IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas:");
780 
781 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
782 
783 	hcap = ibcm_hca_listp;
784 	while (hcap != NULL) {
785 		next = hcap->hca_next;
786 		if (ibcm_hca_detach(hcap) != IBCM_SUCCESS) {
787 			ibcm_hca_listp = hcap;
788 			return (IBCM_FAILURE);
789 		}
790 		hcap = next;
791 	}
792 
793 	IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas: SUCCEEDED");
794 	return (IBCM_SUCCESS);
795 }
796 
797 
798 /*
799  * ibcm_hca_attach():
800  *	Called as an asynchronous event to notify CM of an attach of HCA.
801  *	Here ibcm_hca_info_t is initialized and all fields are
802  *	filled in along with SA Access handles and IBMA handles.
803  *	Also called from ibcm_init to initialize ibcm_hca_info_t's for each
804  *	hca's
805  *
806  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
807  *	hca_guid	- HCA's guid
808  *
809  * Return values: NONE
810  */
811 static void
812 ibcm_hca_attach(ib_guid_t hcaguid)
813 {
814 	int			i;
815 	ibt_status_t		status;
816 	uint_t			nports = 0;
817 	ibcm_hca_info_t		*hcap;
818 	ibt_hca_attr_t		hca_attrs;
819 
820 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: guid = 0x%llX", hcaguid);
821 
822 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
823 
824 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*hcap))
825 
826 	status = ibt_query_hca_byguid(hcaguid, &hca_attrs);
827 	if (status != IBT_SUCCESS) {
828 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
829 		    "ibt_query_hca_byguid failed = %d", status);
830 		return;
831 	}
832 	nports = hca_attrs.hca_nports;
833 
834 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_attach: num ports = %x", nports);
835 
836 	if ((hcap = ibcm_add_hca_entry(hcaguid, nports)) == NULL)
837 		return;
838 
839 	hcap->hca_guid = hcaguid;	/* Set GUID */
840 	hcap->hca_num_ports = nports;	/* Set number of ports */
841 
842 	if (ibcm_init_hca_ids(hcap) != IBCM_SUCCESS) {
843 		ibcm_delete_hca_entry(hcap);
844 		return;
845 	}
846 
847 	/* Store the static hca attribute data */
848 	hcap->hca_caps = hca_attrs.hca_flags;
849 	hcap->hca_ack_delay = hca_attrs.hca_local_ack_delay;
850 	hcap->hca_max_rdma_in_qp = hca_attrs.hca_max_rdma_in_qp;
851 	hcap->hca_max_rdma_out_qp = hca_attrs.hca_max_rdma_out_qp;
852 
853 	/* loop thru nports and initialize IBMF handles */
854 	for (i = 0; i < hcap->hca_num_ports; i++) {
855 		status = ibt_get_port_state_byguid(hcaguid, i + 1, NULL, NULL);
856 		if (status != IBT_SUCCESS) {
857 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
858 			    "port_num %d state DOWN", i + 1);
859 		}
860 
861 		hcap->hca_port_info[i].port_hcap = hcap;
862 		hcap->hca_port_info[i].port_num = i+1;
863 
864 		if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
865 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
866 			    "ibcm_hca_init_port failed %d port_num %d",
867 			    status, i+1);
868 	}
869 
870 	/* create the "active" CM AVL tree */
871 	avl_create(&hcap->hca_active_tree, ibcm_active_node_compare,
872 	    sizeof (ibcm_state_data_t),
873 	    offsetof(struct ibcm_state_data_s, avl_active_link));
874 
875 	/* create the "passive" CM AVL tree */
876 	avl_create(&hcap->hca_passive_tree, ibcm_passive_node_compare,
877 	    sizeof (ibcm_state_data_t),
878 	    offsetof(struct ibcm_state_data_s, avl_passive_link));
879 
880 	/* create the "passive comid" CM AVL tree */
881 	avl_create(&hcap->hca_passive_comid_tree,
882 	    ibcm_passive_comid_node_compare,
883 	    sizeof (ibcm_state_data_t),
884 	    offsetof(struct ibcm_state_data_s, avl_passive_comid_link));
885 
886 	/*
887 	 * Mark the state of the HCA to "attach" only at the end
888 	 * Now CM starts accepting incoming MADs and client API calls
889 	 */
890 	hcap->hca_state = IBCM_HCA_ACTIVE;
891 
892 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*hcap))
893 
894 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: ATTACH Done");
895 }
896 
897 /*
898  * ibcm_hca_detach():
899  *	Called as an asynchronous event to notify CM of a detach of HCA.
900  *	Here ibcm_hca_info_t is freed up and all fields that
901  *	were initialized earlier are cleaned up
902  *
903  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
904  *	hca_guid    - HCA's guid
905  *
906  * Return values:
907  *	IBCM_SUCCESS	- able to detach HCA
908  *	IBCM_FAILURE	- failed to detach HCA
909  */
910 static ibcm_status_t
911 ibcm_hca_detach(ibcm_hca_info_t *hcap)
912 {
913 	int		port_index, i;
914 	ibcm_status_t	status = IBCM_SUCCESS;
915 	clock_t		absolute_time;
916 
917 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: hcap = 0x%p guid = 0x%llX",
918 	    hcap, hcap->hca_guid);
919 
920 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
921 
922 	/*
923 	 * Declare hca is going away to all CM clients. Wait until the
924 	 * access count becomes zero.
925 	 */
926 	hcap->hca_state = IBCM_HCA_NOT_ACTIVE;
927 
928 	/* wait on response CV to 500mS */
929 	absolute_time = ddi_get_lbolt() +
930 	    drv_usectohz(ibcm_wait_for_acc_cnt_timeout);
931 
932 	while (hcap->hca_acc_cnt > 0)
933 		if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock,
934 		    absolute_time) == -1)
935 			break;
936 
937 	if (hcap->hca_acc_cnt != 0) {
938 		/* We got a timeout */
939 #ifdef DEBUG
940 		if (ibcm_test_mode > 0)
941 			IBTF_DPRINTF_L1(cmlog, "ibcm_hca_detach: Unexpected "
942 			    "abort due to timeout on acc_cnt %u",
943 			    hcap->hca_acc_cnt);
944 		else
945 #endif
946 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
947 			    " to timeout on acc_cnt %u", hcap->hca_acc_cnt);
948 		hcap->hca_state = IBCM_HCA_ACTIVE;
949 		return (IBCM_FAILURE);
950 	}
951 
952 	/*
953 	 * First make sure, there are no active users of ibma handles,
954 	 * and then de-register handles.
955 	 */
956 
957 	/* make sure that there are no "Service"s registered w/ this HCA. */
958 	if (hcap->hca_svc_cnt != 0) {
959 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
960 		    "Active services still there %d", hcap->hca_svc_cnt);
961 		hcap->hca_state = IBCM_HCA_ACTIVE;
962 		return (IBCM_FAILURE);
963 	}
964 
965 	if (ibcm_check_sidr_clean(hcap) != IBCM_SUCCESS) {
966 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach:"
967 		    "There are active SIDR operations");
968 		hcap->hca_state = IBCM_HCA_ACTIVE;
969 		return (IBCM_FAILURE);
970 	}
971 
972 	if (ibcm_check_avl_clean(hcap) != IBCM_SUCCESS) {
973 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
974 		    "There are active RC connections");
975 		hcap->hca_state = IBCM_HCA_ACTIVE;
976 		return (IBCM_FAILURE);
977 	}
978 
979 	/*
980 	 * Now, wait until all rc and sidr stateps go away
981 	 * All these stateps must be short lived ones, waiting to be cleaned
982 	 * up after some timeout value, based on the current state.
983 	 */
984 	IBTF_DPRINTF_L5(cmlog, "ibcm_hca_detach:hca_guid = 0x%llX res_cnt = %d",
985 	    hcap->hca_guid, hcap->hca_res_cnt);
986 
987 	/* wait on response CV to 500mS */
988 	absolute_time = ddi_get_lbolt() +
989 	    drv_usectohz(ibcm_wait_for_res_cnt_timeout);
990 
991 	while (hcap->hca_res_cnt > 0)
992 		if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock,
993 		    absolute_time) == -1)
994 			break;
995 
996 	if (hcap->hca_res_cnt != 0) {
997 		/* We got a timeout waiting for hca_res_cnt to become 0 */
998 #ifdef DEBUG
999 		if (ibcm_test_mode > 0)
1000 			IBTF_DPRINTF_L1(cmlog, "ibcm_hca_detach: Unexpected "
1001 			    "abort due to timeout on res_cnt %d",
1002 			    hcap->hca_res_cnt);
1003 		else
1004 #endif
1005 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
1006 			    " to timeout on res_cnt %d", hcap->hca_res_cnt);
1007 		hcap->hca_state = IBCM_HCA_ACTIVE;
1008 		return (IBCM_FAILURE);
1009 	}
1010 
1011 	/* Re-assert the while loop step above */
1012 	ASSERT(hcap->hca_sidr_list == NULL);
1013 	avl_destroy(&hcap->hca_active_tree);
1014 	avl_destroy(&hcap->hca_passive_tree);
1015 	avl_destroy(&hcap->hca_passive_comid_tree);
1016 
1017 	/*
1018 	 * Unregister all ports from IBMA
1019 	 * If there is a failure, re-initialize any free'd ibma handles. This
1020 	 * is required to receive the incoming mads
1021 	 */
1022 	status = IBCM_SUCCESS;
1023 	for (port_index = 0; port_index < hcap->hca_num_ports; port_index++) {
1024 		if ((status = ibcm_hca_fini_port(hcap, port_index)) !=
1025 		    IBCM_SUCCESS) {
1026 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
1027 			    "Failed to free IBMA Handle for port_num %d",
1028 			    port_index + 1);
1029 			break;
1030 		}
1031 	}
1032 
1033 	/* If detach fails, re-initialize ibma handles for incoming mads */
1034 	if (status != IBCM_SUCCESS)  {
1035 		for (i = 0; i < port_index; i++) {
1036 			if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
1037 				IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
1038 				    "Failed to re-allocate IBMA Handles for"
1039 				    " port_num %d", port_index + 1);
1040 		}
1041 		hcap->hca_state = IBCM_HCA_ACTIVE;
1042 		return (IBCM_FAILURE);
1043 	}
1044 
1045 	ibcm_fini_hca_ids(hcap);
1046 	ibcm_delete_hca_entry(hcap);
1047 
1048 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: DETACH succeeded");
1049 	return (IBCM_SUCCESS);
1050 }
1051 
1052 /* Checks, if there are any active sidr state entries in the specified hca */
1053 static ibcm_status_t
1054 ibcm_check_sidr_clean(ibcm_hca_info_t *hcap)
1055 {
1056 	ibcm_ud_state_data_t	*usp;
1057 	uint32_t		transient_cnt = 0;
1058 
1059 	IBTF_DPRINTF_L5(cmlog, "ibcm_check_sidr_clean:");
1060 
1061 	rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);
1062 	usp = hcap->hca_sidr_list;	/* Point to the list */
1063 	while (usp != NULL) {
1064 		mutex_enter(&usp->ud_state_mutex);
1065 		if ((usp->ud_state != IBCM_STATE_SIDR_REP_SENT) &&
1066 		    (usp->ud_state != IBCM_STATE_TIMED_OUT) &&
1067 		    (usp->ud_state != IBCM_STATE_DELETE)) {
1068 
1069 			IBTF_DPRINTF_L3(cmlog, "ibcm_check_sidr_clean:"
1070 			    "usp = %p not in transient state = %d", usp,
1071 			    usp->ud_state);
1072 
1073 			mutex_exit(&usp->ud_state_mutex);
1074 			rw_exit(&hcap->hca_sidr_list_lock);
1075 			return (IBCM_FAILURE);
1076 		} else {
1077 			mutex_exit(&usp->ud_state_mutex);
1078 			++transient_cnt;
1079 		}
1080 
1081 		usp = usp->ud_nextp;
1082 	}
1083 	rw_exit(&hcap->hca_sidr_list_lock);
1084 
1085 	IBTF_DPRINTF_L4(cmlog, "ibcm_check_sidr_clean: transient_cnt %d",
1086 	    transient_cnt);
1087 
1088 	return (IBCM_SUCCESS);
1089 }
1090 
1091 /* Checks, if there are any active rc state entries, in the specified hca */
1092 static ibcm_status_t
1093 ibcm_check_avl_clean(ibcm_hca_info_t *hcap)
1094 
1095 {
1096 	ibcm_state_data_t	*sp;
1097 	avl_tree_t		*avl_tree;
1098 	uint32_t		transient_cnt = 0;
1099 
1100 	IBTF_DPRINTF_L5(cmlog, "ibcm_check_avl_clean:");
1101 	/*
1102 	 * Both the trees ie., active and passive must reference to all
1103 	 * statep's, so let's use one
1104 	 */
1105 	avl_tree = &hcap->hca_active_tree;
1106 
1107 	rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
1108 
1109 	for (sp = avl_first(avl_tree); sp != NULL;
1110 	    sp = avl_walk(avl_tree, sp, AVL_AFTER)) {
1111 		mutex_enter(&sp->state_mutex);
1112 		if ((sp->state != IBCM_STATE_TIMEWAIT) &&
1113 		    (sp->state != IBCM_STATE_REJ_SENT) &&
1114 		    (sp->state != IBCM_STATE_DELETE)) {
1115 			IBTF_DPRINTF_L3(cmlog, "ibcm_check_avl_clean: "
1116 			    "sp = %p not in transient state = %d", sp,
1117 			    sp->state);
1118 			mutex_exit(&sp->state_mutex);
1119 			rw_exit(&hcap->hca_state_rwlock);
1120 			return (IBCM_FAILURE);
1121 		} else {
1122 			mutex_exit(&sp->state_mutex);
1123 			++transient_cnt;
1124 		}
1125 	}
1126 
1127 	rw_exit(&hcap->hca_state_rwlock);
1128 
1129 	IBTF_DPRINTF_L4(cmlog, "ibcm_check_avl_clean: transient_cnt %d",
1130 	    transient_cnt);
1131 
1132 	return (IBCM_SUCCESS);
1133 }
1134 
1135 /* Adds a new entry into CM's global hca list, if hca_guid is not there yet */
1136 static ibcm_hca_info_t *
1137 ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports)
1138 {
1139 	ibcm_hca_info_t	*hcap;
1140 
1141 	IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: guid = 0x%llX",
1142 	    hcaguid);
1143 
1144 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1145 
1146 	/*
1147 	 * Check if this hca_guid already in the list
1148 	 * If yes, then ignore this and return NULL
1149 	 */
1150 
1151 	hcap = ibcm_hca_listp;
1152 
1153 	/* search for this HCA */
1154 	while (hcap != NULL) {
1155 		if (hcap->hca_guid == hcaguid) {
1156 			/* already exists */
1157 			IBTF_DPRINTF_L2(cmlog, "ibcm_add_hca_entry: "
1158 			    "hcap %p guid 0x%llX, entry already exists !!",
1159 			    hcap, hcap->hca_guid);
1160 			return (NULL);
1161 		}
1162 		hcap = hcap->hca_next;
1163 	}
1164 
1165 	/* Allocate storage for the new HCA entry found */
1166 	hcap = kmem_zalloc(sizeof (ibcm_hca_info_t) +
1167 	    (nports - 1) * sizeof (ibcm_port_info_t), KM_SLEEP);
1168 
1169 	/* initialize RW lock */
1170 	rw_init(&hcap->hca_state_rwlock, NULL, RW_DRIVER, NULL);
1171 	/* initialize SIDR list lock */
1172 	rw_init(&hcap->hca_sidr_list_lock, NULL, RW_DRIVER, NULL);
1173 	/* Insert "hcap" into the global HCA list maintained by CM */
1174 	hcap->hca_next = ibcm_hca_listp;
1175 	ibcm_hca_listp = hcap;
1176 
1177 	IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: done hcap = 0x%p", hcap);
1178 
1179 	return (hcap);
1180 
1181 }
1182 
1183 /* deletes the given ibcm_hca_info_t from CM's global hca list */
1184 void
1185 ibcm_delete_hca_entry(ibcm_hca_info_t *hcap)
1186 {
1187 	ibcm_hca_info_t	*headp, *prevp = NULL;
1188 
1189 	/* ibcm_hca_global_lock is held */
1190 	IBTF_DPRINTF_L5(cmlog, "ibcm_delete_hca_entry: guid = 0x%llX "
1191 	    "hcap = 0x%p", hcap->hca_guid, hcap);
1192 
1193 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1194 
1195 	headp = ibcm_hca_listp;
1196 	while (headp != NULL) {
1197 		if (headp == hcap) {
1198 			IBTF_DPRINTF_L3(cmlog, "ibcm_delete_hca_entry: "
1199 			    "deleting hcap %p hcaguid %llX", hcap,
1200 			    hcap->hca_guid);
1201 			if (prevp) {
1202 				prevp->hca_next = headp->hca_next;
1203 			} else {
1204 				prevp = headp->hca_next;
1205 				ibcm_hca_listp = prevp;
1206 			}
1207 			rw_destroy(&hcap->hca_state_rwlock);
1208 			rw_destroy(&hcap->hca_sidr_list_lock);
1209 			kmem_free(hcap, sizeof (ibcm_hca_info_t) +
1210 			    (hcap->hca_num_ports - 1) *
1211 			    sizeof (ibcm_port_info_t));
1212 			return;
1213 		}
1214 
1215 		prevp = headp;
1216 		headp = headp->hca_next;
1217 	}
1218 }
1219 
1220 /*
1221  * ibcm_find_hca_entry:
1222  *	Given a HCA's GUID find out ibcm_hca_info_t entry for that HCA
1223  *	This entry can be then used to access AVL tree/SIDR list etc.
1224  *	If entry exists and in HCA ATTACH state, then hca's ref cnt is
1225  *	incremented and entry returned. Else NULL returned.
1226  *
1227  *	All functions that use ibcm_find_hca_entry and get a non-NULL
1228  *	return values must call ibcm_dec_hca_acc_cnt to decrement the
1229  *	respective hca ref cnt. There shouldn't be any usage of
1230  *	ibcm_hca_info_t * returned from ibcm_find_hca_entry,
1231  *	after decrementing the hca_acc_cnt
1232  *
1233  * INPUTS:
1234  *	hca_guid	- HCA's guid
1235  *
1236  * RETURN VALUE:
1237  *	hcap		- if a match is found, else NULL
1238  */
1239 ibcm_hca_info_t *
1240 ibcm_find_hca_entry(ib_guid_t hca_guid)
1241 {
1242 	ibcm_hca_info_t *hcap;
1243 
1244 	IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: guid = 0x%llX", hca_guid);
1245 
1246 	mutex_enter(&ibcm_global_hca_lock);
1247 
1248 	hcap = ibcm_hca_listp;
1249 	/* search for this HCA */
1250 	while (hcap != NULL) {
1251 		if (hcap->hca_guid == hca_guid)
1252 			break;
1253 		hcap = hcap->hca_next;
1254 	}
1255 
1256 	/* if no hcap for the hca_guid, return NULL */
1257 	if (hcap == NULL) {
1258 		mutex_exit(&ibcm_global_hca_lock);
1259 		return (NULL);
1260 	}
1261 
1262 	/* return hcap, only if it valid to use */
1263 	if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1264 		++(hcap->hca_acc_cnt);
1265 
1266 		IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: "
1267 		    "found hcap = 0x%p hca_acc_cnt %u", hcap,
1268 		    hcap->hca_acc_cnt);
1269 
1270 		mutex_exit(&ibcm_global_hca_lock);
1271 		return (hcap);
1272 	} else {
1273 		mutex_exit(&ibcm_global_hca_lock);
1274 
1275 		IBTF_DPRINTF_L2(cmlog, "ibcm_find_hca_entry: "
1276 		    "found hcap = 0x%p not in active state", hcap);
1277 		return (NULL);
1278 	}
1279 }
1280 
1281 /*
1282  * Searches for ibcm_hca_info_t entry based on hca_guid, but doesn't increment
1283  * the hca's reference count. This function is used, where the calling context
1284  * is attempting to delete hcap itself and hence acc_cnt cannot be incremented
1285  * OR assumes that valid hcap must be available in ibcm's global hca list.
1286  */
1287 ibcm_hca_info_t *
1288 ibcm_find_hcap_entry(ib_guid_t hca_guid)
1289 {
1290 	ibcm_hca_info_t *hcap;
1291 
1292 	IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: guid = 0x%llX", hca_guid);
1293 
1294 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1295 
1296 	hcap = ibcm_hca_listp;
1297 	/* search for this HCA */
1298 	while (hcap != NULL) {
1299 		if (hcap->hca_guid == hca_guid)
1300 			break;
1301 		hcap = hcap->hca_next;
1302 	}
1303 
1304 	if (hcap == NULL)
1305 		IBTF_DPRINTF_L2(cmlog, "ibcm_find_hcap_entry: No hcap found for"
1306 		    " hca_guid 0x%llX", hca_guid);
1307 	else
1308 		IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: hcap found for"
1309 		    " hca_guid 0x%llX", hca_guid);
1310 
1311 	return (hcap);
1312 }
1313 
1314 /* increment the hca's temporary reference count */
1315 ibcm_status_t
1316 ibcm_inc_hca_acc_cnt(ibcm_hca_info_t *hcap)
1317 {
1318 	mutex_enter(&ibcm_global_hca_lock);
1319 	if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1320 		++(hcap->hca_acc_cnt);
1321 		IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_acc_cnt: "
1322 		    "hcap = 0x%p  acc_cnt = %d ", hcap, hcap->hca_acc_cnt);
1323 		mutex_exit(&ibcm_global_hca_lock);
1324 		return (IBCM_SUCCESS);
1325 	} else {
1326 		IBTF_DPRINTF_L2(cmlog, "ibcm_inc_hca_acc_cnt: "
1327 		    "hcap INACTIVE 0x%p  acc_cnt = %d ", hcap,
1328 		    hcap->hca_acc_cnt);
1329 		mutex_exit(&ibcm_global_hca_lock);
1330 		return (IBCM_FAILURE);
1331 	}
1332 }
1333 
1334 /* decrement the hca's ref count, and wake up any waiting threads */
1335 void
1336 ibcm_dec_hca_acc_cnt(ibcm_hca_info_t *hcap)
1337 {
1338 	mutex_enter(&ibcm_global_hca_lock);
1339 	ASSERT(hcap->hca_acc_cnt > 0);
1340 	--(hcap->hca_acc_cnt);
1341 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_acc_cnt: hcap = 0x%p "
1342 	    "acc_cnt = %d", hcap, hcap->hca_acc_cnt);
1343 	if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1344 	    (hcap->hca_acc_cnt == 0)) {
1345 		IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_acc_cnt: "
1346 		    "cv_broadcast for hcap = 0x%p", hcap);
1347 		cv_broadcast(&ibcm_global_hca_cv);
1348 	}
1349 	mutex_exit(&ibcm_global_hca_lock);
1350 }
1351 
1352 /* increment the hca's resource count */
1353 void
1354 ibcm_inc_hca_res_cnt(ibcm_hca_info_t *hcap)
1355 
1356 {
1357 	mutex_enter(&ibcm_global_hca_lock);
1358 	++(hcap->hca_res_cnt);
1359 	IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_res_cnt: hcap = 0x%p "
1360 	    "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1361 	mutex_exit(&ibcm_global_hca_lock);
1362 }
1363 
1364 /* decrement the hca's resource count, and wake up any waiting threads */
1365 void
1366 ibcm_dec_hca_res_cnt(ibcm_hca_info_t *hcap)
1367 {
1368 	mutex_enter(&ibcm_global_hca_lock);
1369 	ASSERT(hcap->hca_res_cnt > 0);
1370 	--(hcap->hca_res_cnt);
1371 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_res_cnt: hcap = 0x%p "
1372 	    "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1373 	if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1374 	    (hcap->hca_res_cnt == 0)) {
1375 		IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_res_cnt: "
1376 		    "cv_broadcast for hcap = 0x%p", hcap);
1377 		cv_broadcast(&ibcm_global_hca_cv);
1378 	}
1379 	mutex_exit(&ibcm_global_hca_lock);
1380 }
1381 
1382 /* increment the hca's service count */
1383 void
1384 ibcm_inc_hca_svc_cnt(ibcm_hca_info_t *hcap)
1385 
1386 {
1387 	mutex_enter(&ibcm_global_hca_lock);
1388 	++(hcap->hca_svc_cnt);
1389 	IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_svc_cnt: hcap = 0x%p "
1390 	    "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1391 	mutex_exit(&ibcm_global_hca_lock);
1392 }
1393 
1394 /* decrement the hca's service count */
1395 void
1396 ibcm_dec_hca_svc_cnt(ibcm_hca_info_t *hcap)
1397 {
1398 	mutex_enter(&ibcm_global_hca_lock);
1399 	ASSERT(hcap->hca_svc_cnt > 0);
1400 	--(hcap->hca_svc_cnt);
1401 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_svc_cnt: hcap = 0x%p "
1402 	    "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1403 	mutex_exit(&ibcm_global_hca_lock);
1404 }
1405 
1406 /*
1407  * The following code manages three classes of requests that CM makes to
1408  * the fabric.  Those three classes are SA_ACCESS, REQ/REP/RTU, and DREQ/DREP.
1409  * The main issue is that the fabric can become very busy, and the CM
1410  * protocols rely on responses being made based on a predefined timeout
1411  * value.  By managing how many simultaneous sessions are allowed, there
1412  * is observed extremely high reliability of CM protocol succeeding when
1413  * it should.
1414  *
1415  * SA_ACCESS and DREQ/DREP are managed at the thread level, whereby the
1416  * thread blocks until there are less than some number of threads doing
1417  * similar requests.
1418  *
1419  * REQ/REP/RTU requests beyond a given limit are added to a list,
1420  * allowing the thread to return immediately to its caller in the
1421  * case where the "mode" is IBT_NONBLOCKING.  This is the mode used
1422  * by uDAPL and seems to be an important feature/behavior.
1423  */
1424 
1425 static int
1426 ibcm_ok_to_start(struct ibcm_open_s *openp)
1427 {
1428 	return (openp->sends < openp->sends_hiwat &&
1429 	    openp->in_progress < openp->in_progress_max);
1430 }
1431 
1432 void
1433 ibcm_open_done(ibcm_state_data_t *statep)
1434 {
1435 	int run;
1436 
1437 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1438 	if (statep->open_flow == 1) {
1439 		statep->open_flow = 0;
1440 		mutex_enter(&ibcm_open.mutex);
1441 		ibcm_open.in_progress--;
1442 		run = ibcm_ok_to_start(&ibcm_open);
1443 		mutex_exit(&ibcm_open.mutex);
1444 		if (run)
1445 			ibcm_run_tlist_thread();
1446 	}
1447 }
1448 
1449 /* dtrace */
1450 void
1451 ibcm_open_wait(hrtime_t delta)
1452 {
1453 	if (delta > 1000000)
1454 		IBTF_DPRINTF_L2(cmlog, "ibcm_open_wait: flow more %lld", delta);
1455 }
1456 
1457 void
1458 ibcm_open_start(ibcm_state_data_t *statep)
1459 {
1460 	ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_REQ);
1461 
1462 	mutex_enter(&statep->state_mutex);
1463 	ibcm_open_wait(gethrtime() - statep->post_time);
1464 	mutex_exit(&statep->state_mutex);
1465 
1466 	ibcm_post_rc_mad(statep, statep->stored_msg, ibcm_post_req_complete,
1467 	    statep);
1468 
1469 	mutex_enter(&statep->state_mutex);
1470 	IBCM_REF_CNT_DECR(statep);
1471 	mutex_exit(&statep->state_mutex);
1472 }
1473 
1474 void
1475 ibcm_open_enqueue(ibcm_state_data_t *statep)
1476 {
1477 	int run;
1478 
1479 	mutex_enter(&statep->state_mutex);
1480 	statep->post_time = gethrtime();
1481 	mutex_exit(&statep->state_mutex);
1482 	mutex_enter(&ibcm_open.mutex);
1483 	if (ibcm_open.queued == 0 && ibcm_ok_to_start(&ibcm_open)) {
1484 		ibcm_open.in_progress++;
1485 		mutex_exit(&ibcm_open.mutex);
1486 		ibcm_open_start(statep);
1487 	} else {
1488 		ibcm_open.queued++;
1489 		statep->open_link = NULL;
1490 		ibcm_open.tail->open_link = statep;
1491 		ibcm_open.tail = statep;
1492 		run = ibcm_ok_to_start(&ibcm_open);
1493 		mutex_exit(&ibcm_open.mutex);
1494 		if (run)
1495 			ibcm_run_tlist_thread();
1496 	}
1497 }
1498 
1499 ibcm_state_data_t *
1500 ibcm_open_dequeue(void)
1501 {
1502 	ibcm_state_data_t *statep;
1503 
1504 	ASSERT(MUTEX_HELD(&ibcm_open.mutex));
1505 	ibcm_open.queued--;
1506 	ibcm_open.in_progress++;
1507 	statep = ibcm_open.head.open_link;
1508 	ibcm_open.head.open_link = statep->open_link;
1509 	statep->open_link = NULL;
1510 	if (ibcm_open.tail == statep)
1511 		ibcm_open.tail = &ibcm_open.head;
1512 	return (statep);
1513 }
1514 
1515 void
1516 ibcm_check_for_opens(void)
1517 {
1518 	ibcm_state_data_t 	*statep;
1519 
1520 	mutex_enter(&ibcm_open.mutex);
1521 
1522 	while (ibcm_open.queued > 0) {
1523 		if (ibcm_ok_to_start(&ibcm_open)) {
1524 			statep = ibcm_open_dequeue();
1525 			mutex_exit(&ibcm_open.mutex);
1526 
1527 			ibcm_open_start(statep);
1528 
1529 			mutex_enter(&ibcm_open.mutex);
1530 		} else {
1531 			break;
1532 		}
1533 	}
1534 	mutex_exit(&ibcm_open.mutex);
1535 }
1536 
1537 
1538 static void
1539 ibcm_flow_init(ibcm_flow_t *flow, uint_t simul_max)
1540 {
1541 	flow->list			= NULL;
1542 	flow->simul			= 0;
1543 	flow->waiters_per_chunk		= 4;
1544 	flow->simul_max			= simul_max;
1545 	flow->lowat			= simul_max - flow->waiters_per_chunk;
1546 	flow->lowat_default		= flow->lowat;
1547 	/* stats */
1548 	flow->total			= 0;
1549 }
1550 
1551 static void
1552 ibcm_rc_flow_control_init(void)
1553 {
1554 	mutex_init(&ibcm_open.mutex, NULL, MUTEX_DEFAULT, NULL);
1555 	mutex_enter(&ibcm_open.mutex);
1556 	ibcm_flow_init(&ibcm_close_flow, ibcm_close_simul_max);
1557 	ibcm_flow_init(&ibcm_lapr_flow, ibcm_lapr_simul_max);
1558 	ibcm_flow_init(&ibcm_saa_flow, ibcm_saa_simul_max);
1559 
1560 	ibcm_open.queued 		= 0;
1561 	ibcm_open.exit_deferred 	= 0;
1562 	ibcm_open.in_progress 		= 0;
1563 	ibcm_open.in_progress_max 	= 16;
1564 	ibcm_open.sends 		= 0;
1565 	ibcm_open.sends_max 		= 0;
1566 	ibcm_open.sends_lowat 		= 8;
1567 	ibcm_open.sends_hiwat 		= 16;
1568 	ibcm_open.tail 			= &ibcm_open.head;
1569 	ibcm_open.head.open_link 	= NULL;
1570 	mutex_exit(&ibcm_open.mutex);
1571 }
1572 
1573 static void
1574 ibcm_rc_flow_control_fini(void)
1575 {
1576 	mutex_destroy(&ibcm_open.mutex);
1577 }
1578 
1579 static ibcm_flow1_t *
1580 ibcm_flow_find(ibcm_flow_t *flow)
1581 {
1582 	ibcm_flow1_t *flow1;
1583 	ibcm_flow1_t *f;
1584 
1585 	f = flow->list;
1586 	if (f) {	/* most likely code path */
1587 		while (f->link != NULL)
1588 			f = f->link;
1589 		if (f->waiters < flow->waiters_per_chunk)
1590 			return (f);
1591 	}
1592 
1593 	/* There was no flow1 list element ready for another waiter */
1594 	mutex_exit(&ibcm_open.mutex);
1595 	flow1 = kmem_alloc(sizeof (*flow1), KM_SLEEP);
1596 	mutex_enter(&ibcm_open.mutex);
1597 
1598 	f = flow->list;
1599 	if (f) {
1600 		while (f->link != NULL)
1601 			f = f->link;
1602 		if (f->waiters < flow->waiters_per_chunk) {
1603 			kmem_free(flow1, sizeof (*flow1));
1604 			return (f);
1605 		}
1606 		f->link = flow1;
1607 	} else {
1608 		flow->list = flow1;
1609 	}
1610 	cv_init(&flow1->cv, NULL, CV_DRIVER, NULL);
1611 	flow1->waiters = 0;
1612 	flow1->link = NULL;
1613 	return (flow1);
1614 }
1615 
1616 static void
1617 ibcm_flow_enter(ibcm_flow_t *flow)
1618 {
1619 	mutex_enter(&ibcm_open.mutex);
1620 	if (flow->list == NULL && flow->simul < flow->simul_max) {
1621 		flow->simul++;
1622 		flow->total++;
1623 		mutex_exit(&ibcm_open.mutex);
1624 	} else {
1625 		ibcm_flow1_t *flow1;
1626 
1627 		flow1 = ibcm_flow_find(flow);
1628 		flow1->waiters++;
1629 		cv_wait(&flow1->cv, &ibcm_open.mutex);
1630 		if (--flow1->waiters == 0) {
1631 			cv_destroy(&flow1->cv);
1632 			mutex_exit(&ibcm_open.mutex);
1633 			kmem_free(flow1, sizeof (*flow1));
1634 		} else
1635 			mutex_exit(&ibcm_open.mutex);
1636 	}
1637 }
1638 
1639 static void
1640 ibcm_flow_exit(ibcm_flow_t *flow)
1641 {
1642 	mutex_enter(&ibcm_open.mutex);
1643 	if (--flow->simul < flow->lowat) {
1644 		if (flow->lowat < flow->lowat_default)
1645 			flow->lowat++;
1646 		if (flow->list) {
1647 			ibcm_flow1_t *flow1;
1648 
1649 			flow1 = flow->list;
1650 			flow->list = flow1->link;	/* unlink */
1651 			flow1->link = NULL;		/* be clean */
1652 			flow->total += flow1->waiters;
1653 			flow->simul += flow1->waiters;
1654 			cv_broadcast(&flow1->cv);
1655 		}
1656 	}
1657 	mutex_exit(&ibcm_open.mutex);
1658 }
1659 
1660 void
1661 ibcm_flow_inc(void)
1662 {
1663 	mutex_enter(&ibcm_open.mutex);
1664 	if (++ibcm_open.sends > ibcm_open.sends_max) {
1665 		ibcm_open.sends_max = ibcm_open.sends;
1666 		IBTF_DPRINTF_L2(cmlog, "ibcm_flow_inc: sends max = %d",
1667 		    ibcm_open.sends_max);
1668 	}
1669 	mutex_exit(&ibcm_open.mutex);
1670 }
1671 
1672 static void
1673 ibcm_check_send_cmpltn_time(hrtime_t delta, char *event_msg)
1674 {
1675 	if (delta > 4000000LL) {
1676 		IBTF_DPRINTF_L2(cmlog, "ibcm_check_send_cmpltn_time: "
1677 		    "%s: %lldns", event_msg, delta);
1678 	}
1679 }
1680 
1681 void
1682 ibcm_flow_dec(hrtime_t time, char *mad_type)
1683 {
1684 	int flow_exit = 0;
1685 	int run = 0;
1686 
1687 	if (ibcm_dtrace)
1688 		ibcm_check_send_cmpltn_time(gethrtime() - time, mad_type);
1689 	mutex_enter(&ibcm_open.mutex);
1690 	ibcm_open.sends--;
1691 	if (ibcm_open.sends < ibcm_open.sends_lowat) {
1692 		run = ibcm_ok_to_start(&ibcm_open);
1693 		if (ibcm_open.exit_deferred) {
1694 			ibcm_open.exit_deferred--;
1695 			flow_exit = 1;
1696 		}
1697 	}
1698 	mutex_exit(&ibcm_open.mutex);
1699 	if (flow_exit)
1700 		ibcm_flow_exit(&ibcm_close_flow);
1701 	if (run)
1702 		ibcm_run_tlist_thread();
1703 }
1704 
1705 void
1706 ibcm_close_enter(void)
1707 {
1708 	ibcm_flow_enter(&ibcm_close_flow);
1709 }
1710 
1711 void
1712 ibcm_close_exit(void)
1713 {
1714 	int flow_exit;
1715 
1716 	mutex_enter(&ibcm_open.mutex);
1717 	if (ibcm_open.sends < ibcm_open.sends_lowat ||
1718 	    ibcm_open.exit_deferred >= 4)
1719 		flow_exit = 1;
1720 	else {
1721 		flow_exit = 0;
1722 		ibcm_open.exit_deferred++;
1723 	}
1724 	mutex_exit(&ibcm_open.mutex);
1725 	if (flow_exit)
1726 		ibcm_flow_exit(&ibcm_close_flow);
1727 }
1728 
1729 /*
1730  * This function needs to be called twice to finish our flow
1731  * control accounting when closing down a connection.  One
1732  * call has send_done set to 1, while the other has it set to 0.
1733  * Because of retries, this could get called more than once
1734  * with either 0 or 1, but additional calls have no effect.
1735  */
1736 void
1737 ibcm_close_done(ibcm_state_data_t *statep, int send_done)
1738 {
1739 	int flow_exit;
1740 
1741 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1742 	if (statep->close_flow == 1) {
1743 		if (send_done)
1744 			statep->close_flow = 3;
1745 		else
1746 			statep->close_flow = 2;
1747 	} else if ((send_done && statep->close_flow == 2) ||
1748 	    (!send_done && statep->close_flow == 3)) {
1749 		statep->close_flow = 0;
1750 		mutex_enter(&ibcm_open.mutex);
1751 		if (ibcm_open.sends < ibcm_open.sends_lowat ||
1752 		    ibcm_open.exit_deferred >= 4)
1753 			flow_exit = 1;
1754 		else {
1755 			flow_exit = 0;
1756 			ibcm_open.exit_deferred++;
1757 		}
1758 		mutex_exit(&ibcm_open.mutex);
1759 		if (flow_exit)
1760 			ibcm_flow_exit(&ibcm_close_flow);
1761 	}
1762 }
1763 
1764 void
1765 ibcm_lapr_enter(void)
1766 {
1767 	ibcm_flow_enter(&ibcm_lapr_flow);
1768 }
1769 
1770 void
1771 ibcm_lapr_exit(void)
1772 {
1773 	ibcm_flow_exit(&ibcm_lapr_flow);
1774 }
1775 
1776 void
1777 ibcm_sa_access_enter()
1778 {
1779 	ibcm_flow_enter(&ibcm_saa_flow);
1780 }
1781 
1782 void
1783 ibcm_sa_access_exit()
1784 {
1785 	ibcm_flow_exit(&ibcm_saa_flow);
1786 }
1787 
1788 static void
1789 ibcm_sm_notice_handler(ibmf_saa_handle_t saa_handle,
1790     ibmf_saa_subnet_event_t saa_event_code,
1791     ibmf_saa_event_details_t *saa_event_details,
1792     void *callback_arg)
1793 {
1794 	ibcm_port_info_t	*portp = (ibcm_port_info_t *)callback_arg;
1795 	ibt_subnet_event_code_t code;
1796 	ibt_subnet_event_t	event;
1797 	uint8_t			event_status;
1798 
1799 	IBTF_DPRINTF_L3(cmlog, "ibcm_sm_notice_handler: saa_hdl %p, code = %d",
1800 	    saa_handle, saa_event_code);
1801 
1802 	mutex_enter(&ibcm_sm_notice_serialize_lock);
1803 
1804 	switch (saa_event_code) {
1805 	case IBMF_SAA_EVENT_MCG_CREATED:
1806 		code = IBT_SM_EVENT_MCG_CREATED;
1807 		break;
1808 	case IBMF_SAA_EVENT_MCG_DELETED:
1809 		code = IBT_SM_EVENT_MCG_DELETED;
1810 		break;
1811 	case IBMF_SAA_EVENT_GID_AVAILABLE:
1812 		code = IBT_SM_EVENT_GID_AVAIL;
1813 		ibcm_path_cache_purge();
1814 		break;
1815 	case IBMF_SAA_EVENT_GID_UNAVAILABLE:
1816 		code = IBT_SM_EVENT_GID_UNAVAIL;
1817 		ibcm_path_cache_purge();
1818 		break;
1819 	case IBMF_SAA_EVENT_SUBSCRIBER_STATUS_CHG:
1820 		event_status =
1821 		    saa_event_details->ie_producer_event_status_mask &
1822 		    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
1823 		if (event_status == (portp->port_event_status &
1824 		    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)) {
1825 			mutex_exit(&ibcm_sm_notice_serialize_lock);
1826 			return;	/* no change */
1827 		}
1828 		portp->port_event_status = event_status;
1829 		if (event_status == IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)
1830 			code = IBT_SM_EVENT_AVAILABLE;
1831 		else
1832 			code = IBT_SM_EVENT_UNAVAILABLE;
1833 		break;
1834 	default:
1835 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1836 		return;
1837 	}
1838 
1839 	mutex_enter(&ibcm_global_hca_lock);
1840 
1841 	/* don't send the event if we're tearing down */
1842 	if (!IBCM_ACCESS_HCA_OK(portp->port_hcap)) {
1843 		mutex_exit(&ibcm_global_hca_lock);
1844 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1845 		return;
1846 	}
1847 
1848 	++(portp->port_hcap->hca_acc_cnt);
1849 	mutex_exit(&ibcm_global_hca_lock);
1850 
1851 	event.sm_notice_gid = saa_event_details->ie_gid;
1852 	ibtl_cm_sm_notice_handler(portp->port_sgid0, code, &event);
1853 
1854 	mutex_exit(&ibcm_sm_notice_serialize_lock);
1855 
1856 	ibcm_dec_hca_acc_cnt(portp->port_hcap);
1857 }
1858 
1859 void
1860 ibt_register_subnet_notices(ibt_clnt_hdl_t ibt_hdl,
1861     ibt_sm_notice_handler_t sm_notice_handler, void *private)
1862 {
1863 	ibcm_port_info_t	*portp;
1864 	ibcm_hca_info_t		*hcap;
1865 	uint8_t			port;
1866 	int			num_failed_sgids;
1867 	ibtl_cm_sm_init_fail_t	*ifail;
1868 	ib_gid_t		*sgidp;
1869 
1870 	IBTF_DPRINTF_L3(cmlog, "ibt_register_subnet_notices: ibt_hdl = %p",
1871 	    ibt_hdl);
1872 
1873 	mutex_enter(&ibcm_sm_notice_serialize_lock);
1874 
1875 	ibtl_cm_set_sm_notice_handler(ibt_hdl, sm_notice_handler, private);
1876 	if (sm_notice_handler == NULL) {
1877 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1878 		return;
1879 	}
1880 
1881 	/* for each port, if service is not available, make a call */
1882 	mutex_enter(&ibcm_global_hca_lock);
1883 	num_failed_sgids = 0;
1884 	hcap = ibcm_hca_listp;
1885 	while (hcap != NULL) {
1886 		portp = hcap->hca_port_info;
1887 		for (port = 0; port < hcap->hca_num_ports; port++) {
1888 			if (!(portp->port_event_status &
1889 			    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1890 				num_failed_sgids++;
1891 			portp++;
1892 		}
1893 		hcap = hcap->hca_next;
1894 	}
1895 	if (num_failed_sgids != 0) {
1896 		ifail = kmem_alloc(sizeof (*ifail) +
1897 		    (num_failed_sgids - 1) * sizeof (ib_gid_t), KM_SLEEP);
1898 		ifail->smf_num_sgids = num_failed_sgids;
1899 		ifail->smf_ibt_hdl = ibt_hdl;
1900 		sgidp = &ifail->smf_sgid[0];
1901 		hcap = ibcm_hca_listp;
1902 		while (hcap != NULL) {
1903 			portp = hcap->hca_port_info;
1904 			for (port = 0; port < hcap->hca_num_ports; port++) {
1905 				if (!(portp->port_event_status &
1906 				    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1907 					*sgidp++ = portp->port_sgid0;
1908 				portp++;
1909 			}
1910 			hcap = hcap->hca_next;
1911 		}
1912 	}
1913 	mutex_exit(&ibcm_global_hca_lock);
1914 
1915 	if (num_failed_sgids != 0) {
1916 		ibtl_cm_sm_notice_init_failure(ifail);
1917 		kmem_free(ifail, sizeof (*ifail) +
1918 		    (num_failed_sgids - 1) * sizeof (ib_gid_t));
1919 	}
1920 	mutex_exit(&ibcm_sm_notice_serialize_lock);
1921 }
1922 
1923 /* The following is run from a taskq because we've seen the stack overflow. */
1924 static void
1925 ibcm_init_saa(void *arg)
1926 {
1927 	ibcm_port_info_t		*portp = (ibcm_port_info_t *)arg;
1928 	int				status;
1929 	ib_guid_t			port_guid;
1930 	ibmf_saa_subnet_event_args_t	event_args;
1931 
1932 	port_guid = portp->port_sgid0.gid_guid;
1933 
1934 	IBTF_DPRINTF_L3(cmlog, "ibcm_init_saa: port guid %llX", port_guid);
1935 
1936 	event_args.is_event_callback_arg = portp;
1937 	event_args.is_event_callback = ibcm_sm_notice_handler;
1938 
1939 	if ((status = ibmf_sa_session_open(port_guid, 0, &event_args,
1940 	    IBMF_VERSION, 0, &portp->port_ibmf_saa_hdl)) != IBMF_SUCCESS) {
1941 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
1942 		    "ibmf_sa_session_open failed for port guid %llX "
1943 		    "status = %d", port_guid, status);
1944 	} else {
1945 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
1946 		    "registered sa_hdl 0x%p for port guid %llX",
1947 		    portp->port_ibmf_saa_hdl, port_guid);
1948 	}
1949 
1950 	mutex_enter(&ibcm_sa_open_lock);
1951 	portp->port_saa_open_in_progress = 0;
1952 	cv_broadcast(&ibcm_sa_open_cv);
1953 	mutex_exit(&ibcm_sa_open_lock);
1954 }
1955 
1956 void
1957 ibcm_init_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
1958 {
1959 	ibmf_saa_handle_t	saa_handle;
1960 	uint8_t			port_index = port - 1;
1961 	ibcm_port_info_t	*portp = &hcap->hca_port_info[port_index];
1962 	ibt_status_t		ibt_status;
1963 
1964 	if (port_index >= hcap->hca_num_ports)
1965 		return;
1966 
1967 	mutex_enter(&ibcm_sa_open_lock);
1968 	if (portp->port_saa_open_in_progress) {
1969 		mutex_exit(&ibcm_sa_open_lock);
1970 		return;
1971 	}
1972 
1973 	saa_handle = portp->port_ibmf_saa_hdl;
1974 	if (saa_handle != NULL) {
1975 		mutex_exit(&ibcm_sa_open_lock);
1976 		return;
1977 	}
1978 
1979 	portp->port_saa_open_in_progress = 1;
1980 	mutex_exit(&ibcm_sa_open_lock);
1981 
1982 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(portp->port_event_status))
1983 
1984 	/* The assumption is that we're getting event notifications */
1985 	portp->port_event_status = IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
1986 
1987 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(portp->port_event_status))
1988 
1989 	ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
1990 	    portp->port_num, &portp->port_sgid0, NULL);
1991 	if (ibt_status != IBT_SUCCESS) {
1992 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa_handle: "
1993 		    "ibt_get_port_state_byguid failed for guid %llX "
1994 		    "with status %d", portp->port_hcap->hca_guid, ibt_status);
1995 		mutex_enter(&ibcm_sa_open_lock);
1996 		portp->port_saa_open_in_progress = 0;
1997 		cv_broadcast(&ibcm_sa_open_cv);
1998 		mutex_exit(&ibcm_sa_open_lock);
1999 		return;
2000 	}
2001 	/* if the port is UP, try sa_session_open */
2002 	(void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
2003 }
2004 
2005 
2006 ibmf_saa_handle_t
2007 ibcm_get_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
2008 {
2009 	ibmf_saa_handle_t	saa_handle;
2010 	uint8_t			port_index = port - 1;
2011 	ibcm_port_info_t	*portp = &hcap->hca_port_info[port_index];
2012 	ibt_status_t		ibt_status;
2013 
2014 	if (port_index >= hcap->hca_num_ports)
2015 		return (NULL);
2016 
2017 	mutex_enter(&ibcm_sa_open_lock);
2018 	while (portp->port_saa_open_in_progress) {
2019 		cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
2020 	}
2021 
2022 	saa_handle = portp->port_ibmf_saa_hdl;
2023 	if (saa_handle != NULL) {
2024 		mutex_exit(&ibcm_sa_open_lock);
2025 		return (saa_handle);
2026 	}
2027 
2028 	portp->port_saa_open_in_progress = 1;
2029 	mutex_exit(&ibcm_sa_open_lock);
2030 
2031 	ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
2032 	    portp->port_num, &portp->port_sgid0, NULL);
2033 	if (ibt_status != IBT_SUCCESS) {
2034 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_saa_handle: "
2035 		    "ibt_get_port_state_byguid failed for guid %llX "
2036 		    "with status %d", portp->port_hcap->hca_guid, ibt_status);
2037 		mutex_enter(&ibcm_sa_open_lock);
2038 		portp->port_saa_open_in_progress = 0;
2039 		cv_broadcast(&ibcm_sa_open_cv);
2040 		mutex_exit(&ibcm_sa_open_lock);
2041 		return (NULL);
2042 	}
2043 	/* if the port is UP, try sa_session_open */
2044 	(void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
2045 
2046 	mutex_enter(&ibcm_sa_open_lock);
2047 	while (portp->port_saa_open_in_progress) {
2048 		cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
2049 	}
2050 	saa_handle = portp->port_ibmf_saa_hdl;
2051 	mutex_exit(&ibcm_sa_open_lock);
2052 	return (saa_handle);
2053 }
2054 
2055 
2056 /*
2057  * ibcm_hca_init_port():
2058  * 	- Register port with IBMA
2059  *
2060  * Arguments:
2061  *	hcap		- HCA's guid
2062  *	port_index	- port number minus 1
2063  *
2064  * Return values:
2065  *	IBCM_SUCCESS - success
2066  */
2067 ibt_status_t
2068 ibcm_hca_init_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2069 {
2070 	int			status;
2071 	ibmf_register_info_t	*ibmf_reg;
2072 
2073 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_init_port: hcap = 0x%p port_num %d",
2074 	    hcap, port_index + 1);
2075 
2076 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
2077 
2078 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hcap->hca_port_info))
2079 
2080 	if (hcap->hca_port_info[port_index].port_ibmf_hdl == NULL) {
2081 		/* Register with IBMF */
2082 		ibmf_reg = &hcap->hca_port_info[port_index].port_ibmf_reg;
2083 		ibmf_reg->ir_ci_guid = hcap->hca_guid;
2084 		ibmf_reg->ir_port_num = port_index + 1;
2085 		ibmf_reg->ir_client_class = COMM_MGT_MANAGER_AGENT;
2086 
2087 		/*
2088 		 * register with management framework
2089 		 */
2090 		status = ibmf_register(ibmf_reg, IBMF_VERSION,
2091 		    IBMF_REG_FLAG_NO_OFFLOAD, NULL, NULL,
2092 		    &(hcap->hca_port_info[port_index].port_ibmf_hdl),
2093 		    &(hcap->hca_port_info[port_index].port_ibmf_caps));
2094 
2095 		if (status != IBMF_SUCCESS) {
2096 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_init_port: "
2097 			    "ibmf_register failed for port_num %x, "
2098 			    "status = %x", port_index + 1, status);
2099 			return (ibcm_ibmf_analyze_error(status));
2100 		}
2101 
2102 		hcap->hca_port_info[port_index].port_qp1.qp_cm =
2103 		    IBMF_QP_HANDLE_DEFAULT;
2104 		hcap->hca_port_info[port_index].port_qp1.qp_port =
2105 		    &(hcap->hca_port_info[port_index]);
2106 
2107 		/*
2108 		 * Register the read callback with IBMF.
2109 		 * Since we just did an ibmf_register, handle is
2110 		 * valid and ibcm_recv_cb() is valid so we can
2111 		 * safely assert for success of ibmf_setup_recv_cb()
2112 		 *
2113 		 * Depending on the "state" of the HCA,
2114 		 * CM may drop incoming packets
2115 		 */
2116 		status = ibmf_setup_async_cb(
2117 		    hcap->hca_port_info[port_index].port_ibmf_hdl,
2118 		    IBMF_QP_HANDLE_DEFAULT, ibcm_recv_cb,
2119 		    &(hcap->hca_port_info[port_index].port_qp1), 0);
2120 		ASSERT(status == IBMF_SUCCESS);
2121 
2122 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_init_port: "
2123 		    "IBMF hdl[%x] = 0x%p", port_index,
2124 		    hcap->hca_port_info[port_index].port_ibmf_hdl);
2125 
2126 		/* Attempt to get the saa_handle for this port */
2127 		ibcm_init_saa_handle(hcap, port_index + 1);
2128 	}
2129 
2130 	return (IBT_SUCCESS);
2131 }
2132 
2133 /*
2134  * useful, to re attempt to initialize port ibma handles from elsewhere in
2135  * cm code
2136  */
2137 ibt_status_t
2138 ibcm_hca_reinit_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2139 {
2140 	ibt_status_t	status;
2141 
2142 	IBTF_DPRINTF_L5(cmlog, "ibcm_hca_reinit_port: hcap 0x%p port_num %d",
2143 	    hcap, port_index + 1);
2144 
2145 	mutex_enter(&ibcm_global_hca_lock);
2146 	status = ibcm_hca_init_port(hcap, port_index);
2147 	mutex_exit(&ibcm_global_hca_lock);
2148 	return (status);
2149 }
2150 
2151 
2152 /*
2153  * ibcm_hca_fini_port():
2154  * 	- Deregister port with IBMA
2155  *
2156  * Arguments:
2157  *	hcap		- HCA's guid
2158  *	port_index	- port number minus 1
2159  *
2160  * Return values:
2161  *	IBCM_SUCCESS - success
2162  */
2163 static ibcm_status_t
2164 ibcm_hca_fini_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2165 {
2166 	int			ibmf_status;
2167 	ibcm_status_t		ibcm_status;
2168 
2169 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: hcap = 0x%p port_num %d ",
2170 	    hcap, port_index + 1);
2171 
2172 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
2173 
2174 	if (hcap->hca_port_info[port_index].port_ibmf_saa_hdl != NULL) {
2175 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
2176 		    "ibmf_sa_session_close IBMF SAA hdl %p",
2177 		    hcap->hca_port_info[port_index].port_ibmf_saa_hdl);
2178 
2179 		ibmf_status = ibmf_sa_session_close(
2180 		    &hcap->hca_port_info[port_index].port_ibmf_saa_hdl, 0);
2181 		if (ibmf_status != IBMF_SUCCESS) {
2182 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2183 			    "ibmf_sa_session_close of port %d returned %x",
2184 			    port_index + 1, ibmf_status);
2185 			return (IBCM_FAILURE);
2186 		}
2187 	}
2188 
2189 	if (hcap->hca_port_info[port_index].port_ibmf_hdl != NULL) {
2190 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
2191 		    "ibmf_unregister IBMF Hdl %p",
2192 		    hcap->hca_port_info[port_index].port_ibmf_hdl);
2193 
2194 		/* clean-up all the ibmf qp's allocated on this port */
2195 		ibcm_status = ibcm_free_allqps(hcap, port_index + 1);
2196 
2197 		if (ibcm_status != IBCM_SUCCESS) {
2198 
2199 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2200 			    "ibcm_free_allqps failed for port_num %d",
2201 			    port_index + 1);
2202 			return (IBCM_FAILURE);
2203 		}
2204 
2205 		/* Tear down the receive callback */
2206 		ibmf_status = ibmf_tear_down_async_cb(
2207 		    hcap->hca_port_info[port_index].port_ibmf_hdl,
2208 		    IBMF_QP_HANDLE_DEFAULT, 0);
2209 
2210 		if (ibmf_status != IBMF_SUCCESS) {
2211 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2212 			    "ibmf_tear_down_async_cb failed %d port_num %d",
2213 			    ibmf_status, port_index + 1);
2214 			return (IBCM_FAILURE);
2215 		}
2216 
2217 		/* Now, unregister with IBMF */
2218 		ibmf_status = ibmf_unregister(
2219 		    &hcap->hca_port_info[port_index].port_ibmf_hdl, 0);
2220 		IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: "
2221 		    "ibmf_unregister of port_num %x returned %x",
2222 		    port_index + 1, ibmf_status);
2223 
2224 		if (ibmf_status == IBMF_SUCCESS)
2225 			hcap->hca_port_info[port_index].port_ibmf_hdl =
2226 								NULL;
2227 		else {
2228 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2229 			    "ibmf_unregister failed %d port_num %d",
2230 			    ibmf_status, port_index + 1);
2231 			return (IBCM_FAILURE);
2232 		}
2233 	}
2234 	return (IBCM_SUCCESS);
2235 }
2236 
2237 /*
2238  * ibcm_comm_est_handler():
2239  *	Check if the given channel is in ESTABLISHED state or not
2240  *
2241  * Arguments:
2242  *	eventp	- A pointer to an ibt_async_event_t struct
2243  *
2244  * Return values: NONE
2245  */
2246 static void
2247 ibcm_comm_est_handler(ibt_async_event_t *eventp)
2248 {
2249 	ibcm_state_data_t	*statep;
2250 
2251 	IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler:");
2252 
2253 	/* Both QP and EEC handles can't be NULL */
2254 	if (eventp->ev_chan_hdl == NULL) {
2255 		IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2256 		    "both QP and EEC handles are NULL");
2257 		return;
2258 	}
2259 
2260 	/* get the "statep" from qp/eec handles */
2261 	IBCM_GET_CHAN_PRIVATE(eventp->ev_chan_hdl, statep);
2262 	if (statep == NULL) {
2263 		IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: statep is NULL");
2264 		return;
2265 	}
2266 
2267 	mutex_enter(&statep->state_mutex);
2268 
2269 	IBCM_RELEASE_CHAN_PRIVATE(eventp->ev_chan_hdl);
2270 
2271 	IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: statep = %p", statep);
2272 
2273 	IBCM_REF_CNT_INCR(statep);
2274 
2275 	if ((statep->state == IBCM_STATE_REP_SENT) ||
2276 	    (statep->state == IBCM_STATE_MRA_REP_RCVD)) {
2277 		timeout_id_t	timer_val = statep->timerid;
2278 
2279 		statep->state = IBCM_STATE_TRANSIENT_ESTABLISHED;
2280 
2281 		if (timer_val) {
2282 			statep->timerid = 0;
2283 			mutex_exit(&statep->state_mutex);
2284 			(void) untimeout(timer_val);
2285 		} else
2286 			mutex_exit(&statep->state_mutex);
2287 
2288 		/* CM doesn't have RTU message here */
2289 		ibcm_cep_state_rtu(statep, NULL);
2290 
2291 	} else {
2292 		if (statep->state == IBCM_STATE_ESTABLISHED ||
2293 		    statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) {
2294 			IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: "
2295 			    "Channel already in ESTABLISHED state");
2296 		} else {
2297 			/* An unexpected behavior from remote */
2298 			IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2299 			    "Unexpected in state = %d", statep->state);
2300 		}
2301 		mutex_exit(&statep->state_mutex);
2302 
2303 		ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_COMEST);
2304 	}
2305 
2306 	mutex_enter(&statep->state_mutex);
2307 	IBCM_REF_CNT_DECR(statep);
2308 	mutex_exit(&statep->state_mutex);
2309 }
2310 
2311 
2312 /*
2313  * ibcm_async_handler():
2314  *	CM's Async Handler
2315  *	(Handles ATTACH, DETACH, COM_EST events)
2316  *
2317  * Arguments:
2318  *	eventp	- A pointer to an ibt_async_event_t struct
2319  *
2320  * Return values: None
2321  *
2322  * NOTE : CM assumes that all HCA DR events are delivered sequentially
2323  * i.e., until ibcm_async_handler  completes for a given HCA DR, framework
2324  * shall not invoke ibcm_async_handler with another DR event for the same
2325  * HCA
2326  */
2327 /* ARGSUSED */
2328 void
2329 ibcm_async_handler(void *clnt_hdl, ibt_hca_hdl_t hca_hdl,
2330     ibt_async_code_t code, ibt_async_event_t *eventp)
2331 {
2332 	ibcm_hca_info_t		*hcap;
2333 	ibcm_port_up_t		*pup;
2334 
2335 	IBTF_DPRINTF_L3(cmlog, "ibcm_async_handler: "
2336 	    "clnt_hdl = %p, code = 0x%x, eventp = 0x%p",
2337 	    clnt_hdl, code, eventp);
2338 
2339 	mutex_enter(&ibcm_global_hca_lock);
2340 
2341 	/* If fini is going to complete successfully, then return */
2342 	if (ibcm_finit_state != IBCM_FINIT_IDLE) {
2343 
2344 		/*
2345 		 * This finit state implies one of the following:
2346 		 * Init either didn't start or didn't complete OR
2347 		 * Fini is about to return SUCCESS and release the global lock.
2348 		 * In all these cases, it is safe to ignore the async.
2349 		 */
2350 
2351 		IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler: ignoring event %x, "
2352 		    "as either init didn't complete or fini about to succeed",
2353 		    code);
2354 		mutex_exit(&ibcm_global_hca_lock);
2355 		return;
2356 	}
2357 
2358 	switch (code) {
2359 	case IBT_EVENT_PORT_UP:
2360 		mutex_exit(&ibcm_global_hca_lock);
2361 		pup = kmem_alloc(sizeof (ibcm_port_up_t), KM_SLEEP);
2362 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pup))
2363 		pup->pup_hca_guid = eventp->ev_hca_guid;
2364 		pup->pup_port = eventp->ev_port;
2365 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pup))
2366 		(void) taskq_dispatch(ibcm_taskq,
2367 		    ibcm_service_record_rewrite_task, pup, TQ_SLEEP);
2368 		ibcm_path_cache_purge();
2369 		return;
2370 
2371 	case IBT_HCA_ATTACH_EVENT:
2372 
2373 		/* eventp->ev_hcaguid is the HCA GUID of interest */
2374 		ibcm_hca_attach(eventp->ev_hca_guid);
2375 		break;
2376 
2377 	case IBT_HCA_DETACH_EVENT:
2378 
2379 		/* eventp->ev_hca_guid is the HCA GUID of interest */
2380 		if ((hcap = ibcm_find_hcap_entry(eventp->ev_hca_guid)) ==
2381 		    NULL) {
2382 			IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler:"
2383 			    " hca %llX doesn't exist", eventp->ev_hca_guid);
2384 			break;
2385 		}
2386 
2387 		(void) ibcm_hca_detach(hcap);
2388 		break;
2389 
2390 	case IBT_EVENT_COM_EST_QP:
2391 		/* eventp->ev_qp_hdl is the ibt_qp_hdl_t of interest */
2392 	case IBT_EVENT_COM_EST_EEC:
2393 		/* eventp->ev_eec_hdl is the ibt_eec_hdl_t of interest */
2394 		ibcm_comm_est_handler(eventp);
2395 		break;
2396 	default:
2397 		break;
2398 	}
2399 
2400 	/* Unblock, any blocked fini/init operations */
2401 	mutex_exit(&ibcm_global_hca_lock);
2402 }
2403