xref: /illumos-gate/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c (revision 5a602e4081a5577eb282a52aaa46f488c3722eee)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * ibcm_impl.c
30  *
31  * contains internal functions of IB CM module.
32  *
33  * TBD:
34  * 1. HCA CATASTROPHIC/RECOVERED not handled yet
35  */
36 
37 #include <sys/ib/mgt/ibcm/ibcm_impl.h>
38 #include <sys/disp.h>
39 
40 
41 /* function prototypes */
42 static ibcm_status_t	ibcm_init(void);
43 static ibcm_status_t	ibcm_fini(void);
44 
45 /* Routines to initialize and destory CM global locks and CVs */
46 static void		ibcm_init_locks(void);
47 static void		ibcm_fini_locks(void);
48 
49 /* Routines that initialize/teardown CM's global hca structures */
50 static void		ibcm_init_hcas();
51 static ibcm_status_t	ibcm_fini_hcas();
52 
53 static void		ibcm_init_classportinfo();
54 static void		ibcm_stop_timeout_thread();
55 
56 /* Routines that handle HCA attach/detach asyncs */
57 static void		ibcm_hca_attach(ib_guid_t);
58 static ibcm_status_t	ibcm_hca_detach(ibcm_hca_info_t *);
59 
60 /* Routines that initialize the HCA's port related fields */
61 static ibt_status_t	ibcm_hca_init_port(ibcm_hca_info_t *hcap,
62 			    uint8_t port_index);
63 static ibcm_status_t	ibcm_hca_fini_port(ibcm_hca_info_t *hcap,
64 			    uint8_t port_index);
65 
66 static void ibcm_rc_flow_control_init(void);
67 static void ibcm_rc_flow_control_fini(void);
68 
69 /*
70  * Routines that check if hca's avl trees and sidr lists are free of any
71  * active client resources ie., RC or UD state structures in certain states
72  */
73 static ibcm_status_t	ibcm_check_avl_clean(ibcm_hca_info_t *hcap);
74 static ibcm_status_t	ibcm_check_sidr_clean(ibcm_hca_info_t *hcap);
75 
76 /* Add a new hca structure to CM's global hca list */
77 static ibcm_hca_info_t	*ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports);
78 
79 static void		ibcm_comm_est_handler(ibt_async_event_t *);
80 void			ibcm_async_handler(void *, ibt_hca_hdl_t,
81 			    ibt_async_code_t, ibt_async_event_t *);
82 
83 /* Global variables */
84 char			cmlog[] = "ibcm";	/* for debug log messages */
85 ibt_clnt_hdl_t		ibcm_ibt_handle;	/* IBT handle */
86 kmutex_t		ibcm_svc_info_lock;	/* list lock */
87 kcondvar_t		ibcm_svc_info_cv;	/* cv for deregister */
88 kmutex_t		ibcm_recv_mutex;
89 avl_tree_t		ibcm_svc_avl_tree;
90 taskq_t			*ibcm_taskq = NULL;
91 int			taskq_dispatch_fail_cnt;
92 
93 kmutex_t		ibcm_trace_mutex;	/* Trace mutex */
94 kmutex_t		ibcm_trace_print_mutex;	/* Trace print mutex */
95 int			ibcm_conn_max_trcnt = IBCM_MAX_CONN_TRCNT;
96 
97 int			ibcm_enable_trace = 2;	/* Trace level 4 by default */
98 int			ibcm_dtrace = 0; /* conditionally enable more dtrace */
99 
100 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_info_s::{svc_bind_list
101     svc_ref_cnt svc_to_delete}))
102 
103 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_bind_s::{sbind_link}))
104 
105 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_mutex, ibcm_conn_trace_s))
106 
107 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_conn_trace_s))
108 
109 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_print_mutex, ibcm_debug_buf))
110 
111 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_debug_buf))
112 
113 /*
114  * Initial state is INIT. All hca dr's return success immediately in this
115  * state, without adding or deleting any hca's to CM.
116  */
117 ibcm_finit_state_t	ibcm_finit_state = IBCM_FINIT_INIT;
118 
119 /* mutex and cv to manage hca's reference and resource count(s) */
120 kmutex_t		ibcm_global_hca_lock;
121 kcondvar_t		ibcm_global_hca_cv;
122 
123 /* mutex and cv to sa session open */
124 kmutex_t		ibcm_sa_open_lock;
125 kcondvar_t		ibcm_sa_open_cv;
126 int			ibcm_sa_timeout_delay = 1;		/* in ticks */
127 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sa_open_lock,
128     ibcm_port_info_s::{port_ibmf_saa_hdl port_saa_open_in_progress}))
129 
130 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_port_info_s::{port_ibmf_saa_hdl}))
131 
132 /* serialize sm notice callbacks */
133 kmutex_t		ibcm_sm_notice_serialize_lock;
134 
135 _NOTE(LOCK_ORDER(ibcm_sm_notice_serialize_lock ibcm_global_hca_lock))
136 
137 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock, ibcm_hca_info_s::{hca_state
138     hca_svc_cnt hca_acc_cnt hca_res_cnt hca_next}))
139 
140 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock,
141     ibcm_port_info_s::{port_ibmf_hdl}))
142 
143 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sm_notice_serialize_lock,
144     ibcm_port_info_s::{port_event_status}))
145 
146 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_hca_info_s::{hca_state}))
147 _NOTE(DATA_READABLE_WITHOUT_LOCK(
148     ibcm_hca_info_s::{hca_port_info.port_ibmf_hdl}))
149 
150 /* mutex for CM's qp list management */
151 kmutex_t		ibcm_qp_list_lock;
152 
153 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_port_info_s::{port_qplist}))
154 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
155 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
156 
157 kcondvar_t		ibcm_timeout_list_cv;
158 kcondvar_t		ibcm_timeout_thread_done_cv;
159 kt_did_t		ibcm_timeout_thread_did;
160 ibcm_state_data_t	*ibcm_timeout_list_hdr, *ibcm_timeout_list_tail;
161 ibcm_ud_state_data_t	*ibcm_ud_timeout_list_hdr, *ibcm_ud_timeout_list_tail;
162 kmutex_t		ibcm_timeout_list_lock;
163 uint8_t			ibcm_timeout_list_flags = 0;
164 pri_t			ibcm_timeout_thread_pri = MINCLSYSPRI;
165 
166 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
167     ibcm_state_data_s::timeout_next))
168 
169 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
170     ibcm_ud_state_data_s::ud_timeout_next))
171 
172 /*
173  * Flow control logic for open_rc_channel uses the following.
174  */
175 
176 struct ibcm_open_s {
177 	kmutex_t		mutex;
178 	kcondvar_t		cv;
179 	uint8_t			task_running;
180 	uint_t			queued;
181 	uint_t			exit_deferred;
182 	uint_t			in_progress;
183 	uint_t			in_progress_max;
184 	uint_t			sends;
185 	uint_t			sends_max;
186 	uint_t			sends_lowat;
187 	uint_t			sends_hiwat;
188 	ibcm_state_data_t	*tail;
189 	ibcm_state_data_t	head;
190 } ibcm_open;
191 
192 static void ibcm_open_task(void *);
193 
194 /*
195  * Flow control logic for SA access and close_rc_channel calls follows.
196  */
197 
198 int ibcm_close_simul_max	= 12;
199 int ibcm_lapr_simul_max		= 12;
200 int ibcm_saa_simul_max		= 8;
201 
202 typedef struct ibcm_flow1_s {
203 	struct ibcm_flow1_s	*link;
204 	kcondvar_t		cv;
205 	uint8_t			waiters;	/* 1 to IBCM_FLOW_SIMUL_MAX */
206 } ibcm_flow1_t;
207 
208 typedef struct ibcm_flow_s {
209 	ibcm_flow1_t		*list;
210 	uint_t			simul;	/* #requests currently outstanding */
211 	uint_t			simul_max;
212 	uint_t			waiters_per_chunk;
213 	uint_t			lowat;
214 	uint_t			lowat_default;
215 	/* statistics */
216 	uint_t			total;
217 } ibcm_flow_t;
218 
219 ibcm_flow_t ibcm_saa_flow;
220 ibcm_flow_t ibcm_close_flow;
221 ibcm_flow_t ibcm_lapr_flow;
222 
223 /* NONBLOCKING close requests are queued */
224 struct ibcm_close_s {
225 	kmutex_t		mutex;
226 	ibcm_state_data_t	*tail;
227 	ibcm_state_data_t	head;
228 } ibcm_close;
229 
230 static ibt_clnt_modinfo_t ibcm_ibt_modinfo = {	/* Client's modinfop */
231 	IBTI_V2,
232 	IBT_CM,
233 	ibcm_async_handler,
234 	NULL,
235 	"IBCM"
236 };
237 
238 /* IBCM's list of HCAs registered with it */
239 static ibcm_hca_info_t	*ibcm_hca_listp = NULL;	/* CM's HCA list */
240 
241 /* Array of CM state call table functions */
242 ibcm_state_handler_t	ibcm_sm_funcs_tbl[] = {
243 	ibcm_process_req_msg,
244 	ibcm_process_mra_msg,
245 	ibcm_process_rej_msg,
246 	ibcm_process_rep_msg,
247 	ibcm_process_rtu_msg,
248 	ibcm_process_dreq_msg,
249 	ibcm_process_drep_msg,
250 	ibcm_process_sidr_req_msg,
251 	ibcm_process_sidr_rep_msg,
252 	ibcm_process_lap_msg,
253 	ibcm_process_apr_msg
254 };
255 
256 /* the following globals are CM tunables */
257 ibt_rnr_nak_time_t	ibcm_default_rnr_nak_time = IBT_RNR_NAK_655ms;
258 
259 uint32_t	ibcm_max_retries = IBCM_MAX_RETRIES;
260 clock_t		ibcm_local_processing_time = IBCM_LOCAL_RESPONSE_TIME;
261 clock_t		ibcm_remote_response_time = IBCM_REMOTE_RESPONSE_TIME;
262 ib_time_t	ibcm_max_sidr_rep_proctime = IBCM_MAX_SIDR_PROCESS_TIME;
263 ib_time_t	ibcm_max_sidr_pktlife_time = IBCM_MAX_SIDR_PKT_LIFE_TIME;
264 
265 ib_time_t	ibcm_max_sidr_rep_store_time = 18;
266 uint32_t	ibcm_wait_for_acc_cnt_timeout = 500000;	/* 500 ms */
267 uint32_t	ibcm_wait_for_res_cnt_timeout = 500000;	/* 500 ms */
268 
269 ib_time_t	ibcm_max_ib_pkt_lt = IBCM_MAX_IB_PKT_LT;
270 ib_time_t	ibcm_max_ib_mad_pkt_lt = IBCM_MAX_IB_MAD_PKT_LT;
271 
272 /*
273  * This delay accounts for time involved in various activities as follows :
274  *
275  * IBMF delays for posting the MADs in non-blocking mode
276  * IBMF delays for receiving the MADs and delivering to CM
277  * CM delays in processing the MADs before invoking client handlers,
278  * Any other delays associated with HCA driver in processing the MADs and
279  * 	other subsystems that CM may invoke (ex : SA, HCA driver)
280  */
281 uint32_t	ibcm_sw_delay	= 1000;	/* 1000us / 1ms */
282 uint32_t	ibcm_max_sa_retries = IBCM_MAX_SA_RETRIES + 1;
283 
284 /*	approx boot time */
285 uint32_t	ibcm_adj_btime = 4;	/* 4 seconds */
286 
287 /*
288  * The information in ibcm_clpinfo is kept in wireformat and is setup at
289  * init time, and used read-only after that
290  */
291 ibcm_classportinfo_msg_t	ibcm_clpinfo;
292 
293 char	*event_str[] = {
294 	"NEVER SEE THIS             ",
295 	"SESSION_ID                 ",
296 	"CHAN_HDL                   ",
297 	"LOCAL_COMID/HCA/PORT       ",
298 	"LOCAL_QPN                  ",
299 	"REMOTE_COMID/HCA           ",
300 	"REMOTE_QPN                 ",
301 	"BASE_TIME                  ",
302 	"INCOMING_REQ               ",
303 	"INCOMING_REP               ",
304 	"INCOMING_RTU               ",
305 	"INCOMING_COMEST            ",
306 	"INCOMING_MRA               ",
307 	"INCOMING_REJ               ",
308 	"INCOMING_LAP               ",
309 	"INCOMING_APR               ",
310 	"INCOMING_DREQ              ",
311 	"INCOMING_DREP              ",
312 	"OUTGOING_REQ               ",
313 	"OUTGOING_REP               ",
314 	"OUTGOING_RTU               ",
315 	"OUTGOING_LAP               ",
316 	"OUTGOING_APR               ",
317 	"OUTGOING_MRA               ",
318 	"OUTGOING_REJ               ",
319 	"OUTGOING_DREQ              ",
320 	"OUTGOING_DREP              ",
321 	"REQ_POST_COMPLETE          ",
322 	"REP_POST_COMPLETE          ",
323 	"RTU_POST_COMPLETE          ",
324 	"MRA_POST_COMPLETE          ",
325 	"REJ_POST_COMPLETE          ",
326 	"LAP_POST_COMPLETE          ",
327 	"APR_POST_COMPLETE          ",
328 	"DREQ_POST_COMPLETE         ",
329 	"DREP_POST_COMPLETE         ",
330 	"TIMEOUT_REP                ",
331 	"CALLED_REQ_RCVD_EVENT      ",
332 	"RET_REQ_RCVD_EVENT         ",
333 	"CALLED_REP_RCVD_EVENT      ",
334 	"RET_REP_RCVD_EVENT         ",
335 	"CALLED_CONN_EST_EVENT      ",
336 	"RET_CONN_EST_EVENT         ",
337 	"CALLED_CONN_FAIL_EVENT     ",
338 	"RET_CONN_FAIL_EVENT        ",
339 	"CALLED_CONN_CLOSE_EVENT    ",
340 	"RET_CONN_CLOSE_EVENT       ",
341 	"INIT_INIT                  ",
342 	"INIT_INIT_FAIL             ",
343 	"INIT_RTR                   ",
344 	"INIT_RTR_FAIL              ",
345 	"RTR_RTS                    ",
346 	"RTR_RTS_FAIL               ",
347 	"RTS_RTS                    ",
348 	"RTS_RTS_FAIL               ",
349 	"TO_ERROR                   ",
350 	"ERROR_FAIL                 ",
351 	"SET_ALT                    ",
352 	"SET_ALT_FAIL               ",
353 	"STALE_DETECT               ",
354 	"OUTGOING_REQ_RETRY         ",
355 	"OUTGOING_REP_RETRY         ",
356 	"OUTGOING_LAP_RETRY         ",
357 	"OUTGOING_MRA_RETRY         ",
358 	"OUTGOING_DREQ_RETRY        ",
359 	"NEVER SEE THIS             "
360 };
361 
362 char	ibcm_debug_buf[IBCM_DEBUG_BUF_SIZE];
363 
364 _NOTE(SCHEME_PROTECTS_DATA("used in a localized function consistently",
365     ibcm_debug_buf))
366 _NOTE(READ_ONLY_DATA(ibcm_taskq))
367 
368 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_flags))
369 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_hdr))
370 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_ud_timeout_list_hdr))
371 
372 #ifdef DEBUG
373 int		ibcm_test_mode = 0;	/* set to 1, if running tests */
374 #endif
375 
376 
377 /* Module Driver Info */
378 static struct modlmisc ibcm_modlmisc = {
379 	&mod_miscops,
380 	"IB Communication Manager %I%"
381 };
382 
383 /* Module Linkage */
384 static struct modlinkage ibcm_modlinkage = {
385 	MODREV_1,
386 	&ibcm_modlmisc,
387 	NULL
388 };
389 
390 
391 int
392 _init(void)
393 {
394 	int		rval;
395 	ibcm_status_t	status;
396 
397 	status = ibcm_init();
398 	if (status != IBCM_SUCCESS) {
399 		IBTF_DPRINTF_L2(cmlog, "_init: ibcm failed %d", status);
400 		return (EINVAL);
401 	}
402 
403 	rval = mod_install(&ibcm_modlinkage);
404 	if (rval != 0) {
405 		IBTF_DPRINTF_L2(cmlog, "_init: ibcm mod_install failed %d",
406 		    rval);
407 		(void) ibcm_fini();
408 	}
409 
410 	IBTF_DPRINTF_L5(cmlog, "_init: ibcm successful");
411 	return (rval);
412 
413 }
414 
415 
416 int
417 _info(struct modinfo *modinfop)
418 {
419 	return (mod_info(&ibcm_modlinkage, modinfop));
420 }
421 
422 
423 int
424 _fini(void)
425 {
426 	int status;
427 
428 	if (ibcm_fini() != IBCM_SUCCESS)
429 		return (EBUSY);
430 
431 	if ((status = mod_remove(&ibcm_modlinkage)) != 0) {
432 		IBTF_DPRINTF_L2(cmlog, "_fini: ibcm mod_remove failed %d",
433 		    status);
434 		return (status);
435 	}
436 
437 	IBTF_DPRINTF_L5(cmlog, "_fini: ibcm successful");
438 
439 	return (status);
440 }
441 
442 /* Initializes all global mutex and CV in cm module */
443 static void
444 ibcm_init_locks()
445 {
446 
447 	/* Verify CM MAD sizes */
448 #ifdef DEBUG
449 
450 	if (ibcm_test_mode > 1) {
451 
452 		IBTF_DPRINTF_L1(cmlog, "REQ MAD SIZE %d",
453 		    sizeof (ibcm_req_msg_t));
454 		IBTF_DPRINTF_L1(cmlog, "REP MAD SIZE %d",
455 		    sizeof (ibcm_rep_msg_t));
456 		IBTF_DPRINTF_L1(cmlog, "RTU MAD SIZE %d",
457 		    sizeof (ibcm_rtu_msg_t));
458 		IBTF_DPRINTF_L1(cmlog, "MRA MAD SIZE %d",
459 		    sizeof (ibcm_mra_msg_t));
460 		IBTF_DPRINTF_L1(cmlog, "REJ MAD SIZE %d",
461 		    sizeof (ibcm_rej_msg_t));
462 		IBTF_DPRINTF_L1(cmlog, "LAP MAD SIZE %d",
463 		    sizeof (ibcm_lap_msg_t));
464 		IBTF_DPRINTF_L1(cmlog, "APR MAD SIZE %d",
465 		    sizeof (ibcm_apr_msg_t));
466 		IBTF_DPRINTF_L1(cmlog, "DREQ MAD SIZE %d",
467 		    sizeof (ibcm_dreq_msg_t));
468 		IBTF_DPRINTF_L1(cmlog, "DREP MAD SIZE %d",
469 		    sizeof (ibcm_drep_msg_t));
470 		IBTF_DPRINTF_L1(cmlog, "SIDR REQ MAD SIZE %d",
471 		    sizeof (ibcm_sidr_req_msg_t));
472 		IBTF_DPRINTF_L1(cmlog, "SIDR REP MAD SIZE %d",
473 		    sizeof (ibcm_sidr_rep_msg_t));
474 	}
475 
476 #endif
477 
478 	/* Create all global locks within cm module */
479 	mutex_init(&ibcm_svc_info_lock, NULL, MUTEX_DEFAULT, NULL);
480 	mutex_init(&ibcm_timeout_list_lock, NULL, MUTEX_DEFAULT, NULL);
481 	mutex_init(&ibcm_global_hca_lock, NULL, MUTEX_DEFAULT, NULL);
482 	mutex_init(&ibcm_sa_open_lock, NULL, MUTEX_DEFAULT, NULL);
483 	mutex_init(&ibcm_recv_mutex, NULL, MUTEX_DEFAULT, NULL);
484 	mutex_init(&ibcm_sm_notice_serialize_lock, NULL, MUTEX_DEFAULT, NULL);
485 	mutex_init(&ibcm_qp_list_lock, NULL, MUTEX_DEFAULT, NULL);
486 	mutex_init(&ibcm_trace_mutex, NULL, MUTEX_DEFAULT, NULL);
487 	mutex_init(&ibcm_trace_print_mutex, NULL, MUTEX_DEFAULT, NULL);
488 	cv_init(&ibcm_svc_info_cv, NULL, CV_DRIVER, NULL);
489 	cv_init(&ibcm_timeout_list_cv, NULL, CV_DRIVER, NULL);
490 	cv_init(&ibcm_timeout_thread_done_cv, NULL, CV_DRIVER, NULL);
491 	cv_init(&ibcm_global_hca_cv, NULL, CV_DRIVER, NULL);
492 	cv_init(&ibcm_sa_open_cv, NULL, CV_DRIVER, NULL);
493 	avl_create(&ibcm_svc_avl_tree, ibcm_svc_compare,
494 	    sizeof (ibcm_svc_info_t),
495 	    offsetof(struct ibcm_svc_info_s, svc_link));
496 
497 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_locks: done");
498 }
499 
500 /* Destroys all global mutex and CV in cm module */
501 static void
502 ibcm_fini_locks()
503 {
504 	/* Destroy all global locks within cm module */
505 	mutex_destroy(&ibcm_svc_info_lock);
506 	mutex_destroy(&ibcm_timeout_list_lock);
507 	mutex_destroy(&ibcm_global_hca_lock);
508 	mutex_destroy(&ibcm_sa_open_lock);
509 	mutex_destroy(&ibcm_recv_mutex);
510 	mutex_destroy(&ibcm_sm_notice_serialize_lock);
511 	mutex_destroy(&ibcm_qp_list_lock);
512 	mutex_destroy(&ibcm_trace_mutex);
513 	mutex_destroy(&ibcm_trace_print_mutex);
514 	cv_destroy(&ibcm_svc_info_cv);
515 	cv_destroy(&ibcm_timeout_list_cv);
516 	cv_destroy(&ibcm_timeout_thread_done_cv);
517 	cv_destroy(&ibcm_global_hca_cv);
518 	cv_destroy(&ibcm_sa_open_cv);
519 	avl_destroy(&ibcm_svc_avl_tree);
520 
521 	IBTF_DPRINTF_L5(cmlog, "ibcm_fini_locks: done");
522 }
523 
524 
525 /* Initialize CM's classport info */
526 static void
527 ibcm_init_classportinfo()
528 {
529 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
530 
531 	ibcm_clpinfo.BaseVersion = IBCM_MAD_BASE_VERSION;
532 	ibcm_clpinfo.ClassVersion = IBCM_MAD_CLASS_VERSION;
533 
534 	/* For now, CM supports same capabilities at all ports */
535 	ibcm_clpinfo.CapabilityMask =
536 	    h2b16(IBCM_CPINFO_CAP_RC | IBCM_CPINFO_CAP_SIDR);
537 
538 	/* Bits 0-7 are all 0 for Communication Mgmt Class */
539 
540 	/* For now, CM has the same respvalue at all ports */
541 	ibcm_clpinfo.RespTimeValue_plus =
542 	    h2b32(ibt_usec2ib(ibcm_local_processing_time) & 0x1f);
543 
544 	/* For now, redirect fields are set to 0 */
545 	/* Trap fields are not applicable to CM, hence set to 0 */
546 
547 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
548 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_classportinfo: done");
549 }
550 
551 /*
552  * ibcm_init():
553  * 	- call ibt_attach()
554  * 	- create AVL trees
555  *	- Attach HCA handlers that are already present before
556  *	CM got loaded.
557  *
558  * Arguments:	NONE
559  *
560  * Return values:
561  *	IBCM_SUCCESS - success
562  */
563 static ibcm_status_t
564 ibcm_init(void)
565 {
566 	ibt_status_t	status;
567 	kthread_t	*t;
568 
569 	IBTF_DPRINTF_L3(cmlog, "ibcm_init:");
570 
571 	ibcm_init_classportinfo();
572 
573 	if (ibcm_init_ids() != IBCM_SUCCESS) {
574 		IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
575 		    "fatal error: vmem_create() failed");
576 		return (IBCM_FAILURE);
577 	}
578 	ibcm_init_locks();
579 
580 	if (ibcm_ar_init() != IBCM_SUCCESS) {
581 		IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
582 		    "fatal error: ibcm_ar_init() failed");
583 		ibcm_fini_ids();
584 		ibcm_fini_locks();
585 		return (IBCM_FAILURE);
586 	}
587 	ibcm_rc_flow_control_init();
588 
589 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_taskq))
590 	ibcm_taskq = system_taskq;
591 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_taskq))
592 
593 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
594 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
595 
596 	/* Start the timeout list processing thread */
597 	ibcm_timeout_list_flags = 0;
598 	t = thread_create(NULL, 0, ibcm_process_tlist, 0, 0, &p0, TS_RUN,
599 	    ibcm_timeout_thread_pri);
600 	ibcm_timeout_thread_did = t->t_did;
601 
602 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
603 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
604 
605 	/*
606 	 * NOTE : if ibt_attach is done after ibcm_init_hcas, then some
607 	 * HCA DR events may be lost. CM could call re-init hca list
608 	 * again, but it is more complicated. Some HCA's DR's lost may
609 	 * be HCA detach, which makes hca list re-syncing and locking more
610 	 * complex
611 	 */
612 	status = ibt_attach(&ibcm_ibt_modinfo, NULL, NULL, &ibcm_ibt_handle);
613 	if (status != IBT_SUCCESS) {
614 		IBTF_DPRINTF_L2(cmlog, "ibcm_init(): ibt_attach failed %d",
615 		    status);
616 		(void) ibcm_ar_fini();
617 		ibcm_stop_timeout_thread();
618 		ibcm_fini_ids();
619 		ibcm_fini_locks();
620 		ibcm_rc_flow_control_fini();
621 		return (IBCM_FAILURE);
622 	}
623 
624 	/* Block all HCA attach/detach asyncs */
625 	mutex_enter(&ibcm_global_hca_lock);
626 
627 	ibcm_init_hcas();
628 	ibcm_finit_state = IBCM_FINIT_IDLE;
629 
630 	ibcm_path_cache_init();
631 
632 	/* Unblock any waiting HCA DR asyncs in CM */
633 	mutex_exit(&ibcm_global_hca_lock);
634 
635 	IBTF_DPRINTF_L4(cmlog, "ibcm_init: done");
636 	return (IBCM_SUCCESS);
637 }
638 
639 /* Allocates and initializes the "per hca" global data in CM */
640 static void
641 ibcm_init_hcas()
642 {
643 	uint_t	num_hcas = 0;
644 	ib_guid_t *guid_array;
645 	int i;
646 
647 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas:");
648 
649 	/* Get the number of HCAs */
650 	num_hcas = ibt_get_hca_list(&guid_array);
651 	IBTF_DPRINTF_L4(cmlog, "ibcm_init_hcas: ibt_get_hca_list() "
652 	    "returned %d hcas", num_hcas);
653 
654 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
655 
656 	for (i = 0; i < num_hcas; i++)
657 		ibcm_hca_attach(guid_array[i]);
658 
659 	if (num_hcas)
660 		ibt_free_hca_list(guid_array, num_hcas);
661 
662 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas: done");
663 }
664 
665 
666 /*
667  * ibcm_fini():
668  * 	- Deregister w/ ibt
669  * 	- Cleanup IBCM HCA listp
670  * 	- Destroy mutexes
671  *
672  * Arguments:	NONE
673  *
674  * Return values:
675  *	IBCM_SUCCESS - success
676  */
677 static ibcm_status_t
678 ibcm_fini(void)
679 {
680 	ibt_status_t	status;
681 
682 	IBTF_DPRINTF_L3(cmlog, "ibcm_fini:");
683 
684 	/*
685 	 * CM assumes that the all general clients got rid of all the
686 	 * established connections and service registrations, completed all
687 	 * pending SIDR operations before a call to ibcm_fini()
688 	 */
689 
690 	if (ibcm_ar_fini() != IBCM_SUCCESS) {
691 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: ibcm_ar_fini failed");
692 		return (IBCM_FAILURE);
693 	}
694 
695 	/* cleanup the svcinfo list */
696 	mutex_enter(&ibcm_svc_info_lock);
697 	if (avl_first(&ibcm_svc_avl_tree) != NULL) {
698 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
699 		    "ibcm_svc_avl_tree is not empty");
700 		mutex_exit(&ibcm_svc_info_lock);
701 		return (IBCM_FAILURE);
702 	}
703 	mutex_exit(&ibcm_svc_info_lock);
704 
705 	/* disables any new hca attach/detaches */
706 	mutex_enter(&ibcm_global_hca_lock);
707 
708 	ibcm_finit_state = IBCM_FINIT_BUSY;
709 
710 	if (ibcm_fini_hcas() != IBCM_SUCCESS) {
711 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
712 		    "some hca's still have client resources");
713 
714 		/* First, re-initialize the hcas */
715 		ibcm_init_hcas();
716 		/* and then enable the HCA asyncs */
717 		ibcm_finit_state = IBCM_FINIT_IDLE;
718 		mutex_exit(&ibcm_global_hca_lock);
719 		if (ibcm_ar_init() != IBCM_SUCCESS) {
720 			IBTF_DPRINTF_L1(cmlog, "ibcm_fini:ibcm_ar_init failed");
721 		}
722 		return (IBCM_FAILURE);
723 	}
724 
725 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
726 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
727 
728 	ASSERT(ibcm_timeout_list_hdr == NULL);
729 	ASSERT(ibcm_ud_timeout_list_hdr == NULL);
730 
731 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
732 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
733 
734 	/* Release any pending asyncs on ibcm_global_hca_lock */
735 	ibcm_finit_state = IBCM_FINIT_SUCCESS;
736 	mutex_exit(&ibcm_global_hca_lock);
737 
738 	ibcm_stop_timeout_thread();
739 
740 	/*
741 	 * Detach from IBTL. Waits until all pending asyncs are complete.
742 	 * Above cv_broadcast wakes up any waiting hca attach/detach asyncs
743 	 */
744 	status = ibt_detach(ibcm_ibt_handle);
745 
746 	/* if detach fails, CM didn't free up some resources, so assert */
747 	if (status != IBT_SUCCESS)
748 	    IBTF_DPRINTF_L1(cmlog, "ibcm_fini: ibt_detach failed %d", status);
749 
750 	ibcm_rc_flow_control_fini();
751 
752 	ibcm_path_cache_fini();
753 
754 	ibcm_fini_ids();
755 	ibcm_fini_locks();
756 	IBTF_DPRINTF_L3(cmlog, "ibcm_fini: done");
757 	return (IBCM_SUCCESS);
758 }
759 
760 /* This routine exit's the ibcm timeout thread  */
761 static void
762 ibcm_stop_timeout_thread()
763 {
764 	mutex_enter(&ibcm_timeout_list_lock);
765 
766 	/* Stop the timeout list processing thread */
767 	ibcm_timeout_list_flags =
768 	    ibcm_timeout_list_flags | IBCM_TIMEOUT_THREAD_EXIT;
769 
770 	/* Wake up, if the timeout thread is on a cv_wait */
771 	cv_signal(&ibcm_timeout_list_cv);
772 
773 	mutex_exit(&ibcm_timeout_list_lock);
774 	thread_join(ibcm_timeout_thread_did);
775 
776 	IBTF_DPRINTF_L5(cmlog, "ibcm_stop_timeout_thread: done");
777 }
778 
779 
780 /* Attempts to release all the hca's associated with CM */
781 static ibcm_status_t
782 ibcm_fini_hcas()
783 {
784 	ibcm_hca_info_t *hcap, *next;
785 
786 	IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas:");
787 
788 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
789 
790 	hcap = ibcm_hca_listp;
791 	while (hcap != NULL) {
792 		next = hcap->hca_next;
793 		if (ibcm_hca_detach(hcap) != IBCM_SUCCESS) {
794 			ibcm_hca_listp = hcap;
795 			return (IBCM_FAILURE);
796 		}
797 		hcap = next;
798 	}
799 
800 	IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas: SUCCEEDED");
801 	return (IBCM_SUCCESS);
802 }
803 
804 
805 /*
806  * ibcm_hca_attach():
807  *	Called as an asynchronous event to notify CM of an attach of HCA.
808  *	Here ibcm_hca_info_t is initialized and all fields are
809  *	filled in along with SA Access handles and IBMA handles.
810  *	Also called from ibcm_init to initialize ibcm_hca_info_t's for each
811  *	hca's
812  *
813  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
814  *	hca_guid	- HCA's guid
815  *
816  * Return values: NONE
817  */
818 static void
819 ibcm_hca_attach(ib_guid_t hcaguid)
820 {
821 	int			i;
822 	ibt_status_t		status;
823 	uint_t			nports = 0;
824 	ibcm_hca_info_t		*hcap;
825 	ibt_hca_attr_t		hca_attrs;
826 
827 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: guid = 0x%llX", hcaguid);
828 
829 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
830 
831 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*hcap))
832 
833 	status = ibt_query_hca_byguid(hcaguid, &hca_attrs);
834 	if (status != IBT_SUCCESS) {
835 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
836 		    "ibt_query_hca_byguid failed = %d", status);
837 		return;
838 	}
839 	nports = hca_attrs.hca_nports;
840 
841 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_attach: num ports = %x", nports);
842 
843 	if ((hcap = ibcm_add_hca_entry(hcaguid, nports)) == NULL)
844 		return;
845 
846 	hcap->hca_guid = hcaguid;	/* Set GUID */
847 	hcap->hca_num_ports = nports;	/* Set number of ports */
848 
849 	if (ibcm_init_hca_ids(hcap) != IBCM_SUCCESS) {
850 		ibcm_delete_hca_entry(hcap);
851 		return;
852 	}
853 
854 	/* Store the static hca attribute data */
855 	hcap->hca_caps = hca_attrs.hca_flags;
856 	hcap->hca_vendor_id = hca_attrs.hca_vendor_id;
857 	hcap->hca_device_id = hca_attrs.hca_device_id;
858 	hcap->hca_ack_delay = hca_attrs.hca_local_ack_delay;
859 	hcap->hca_max_rdma_in_qp = hca_attrs.hca_max_rdma_in_qp;
860 	hcap->hca_max_rdma_out_qp = hca_attrs.hca_max_rdma_out_qp;
861 
862 	/* loop thru nports and initialize IBMF handles */
863 	for (i = 0; i < hcap->hca_num_ports; i++) {
864 		status = ibt_get_port_state_byguid(hcaguid, i + 1, NULL, NULL);
865 		if (status != IBT_SUCCESS) {
866 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
867 			    "port_num %d state DOWN", i + 1);
868 		}
869 
870 		hcap->hca_port_info[i].port_hcap = hcap;
871 		hcap->hca_port_info[i].port_num = i+1;
872 
873 		if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
874 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
875 			    "ibcm_hca_init_port failed %d port_num %d",
876 			    status, i+1);
877 	}
878 
879 	/* create the "active" CM AVL tree */
880 	avl_create(&hcap->hca_active_tree, ibcm_active_node_compare,
881 	    sizeof (ibcm_state_data_t),
882 	    offsetof(struct ibcm_state_data_s, avl_active_link));
883 
884 	/* create the "passive" CM AVL tree */
885 	avl_create(&hcap->hca_passive_tree, ibcm_passive_node_compare,
886 	    sizeof (ibcm_state_data_t),
887 	    offsetof(struct ibcm_state_data_s, avl_passive_link));
888 
889 	/* create the "passive comid" CM AVL tree */
890 	avl_create(&hcap->hca_passive_comid_tree,
891 	    ibcm_passive_comid_node_compare,
892 	    sizeof (ibcm_state_data_t),
893 	    offsetof(struct ibcm_state_data_s, avl_passive_comid_link));
894 
895 	/*
896 	 * Mark the state of the HCA to "attach" only at the end
897 	 * Now CM starts accepting incoming MADs and client API calls
898 	 */
899 	hcap->hca_state = IBCM_HCA_ACTIVE;
900 
901 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*hcap))
902 
903 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: ATTACH Done");
904 }
905 
906 /*
907  * ibcm_hca_detach():
908  *	Called as an asynchronous event to notify CM of a detach of HCA.
909  *	Here ibcm_hca_info_t is freed up and all fields that
910  *	were initialized earlier are cleaned up
911  *
912  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
913  *	hca_guid    - HCA's guid
914  *
915  * Return values:
916  *	IBCM_SUCCESS	- able to detach HCA
917  *	IBCM_FAILURE	- failed to detach HCA
918  */
919 static ibcm_status_t
920 ibcm_hca_detach(ibcm_hca_info_t *hcap)
921 {
922 	int		port_index, i;
923 	ibcm_status_t	status = IBCM_SUCCESS;
924 	clock_t		absolute_time;
925 
926 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: hcap = 0x%p guid = 0x%llX",
927 	    hcap, hcap->hca_guid);
928 
929 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
930 
931 	/*
932 	 * Declare hca is going away to all CM clients. Wait until the
933 	 * access count becomes zero.
934 	 */
935 	hcap->hca_state = IBCM_HCA_NOT_ACTIVE;
936 
937 	/* wait on response CV to 500mS */
938 	absolute_time = ddi_get_lbolt() +
939 	    drv_usectohz(ibcm_wait_for_acc_cnt_timeout);
940 
941 	while (hcap->hca_acc_cnt > 0)
942 		if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock,
943 		    absolute_time) == -1)
944 			break;
945 
946 	if (hcap->hca_acc_cnt != 0) {
947 		/* We got a timeout */
948 #ifdef DEBUG
949 		if (ibcm_test_mode > 0)
950 			IBTF_DPRINTF_L1(cmlog, "ibcm_hca_detach: Unexpected "
951 			    "abort due to timeout on acc_cnt %u",
952 			    hcap->hca_acc_cnt);
953 		else
954 #endif
955 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
956 			    " to timeout on acc_cnt %u", hcap->hca_acc_cnt);
957 		hcap->hca_state = IBCM_HCA_ACTIVE;
958 		return (IBCM_FAILURE);
959 	}
960 
961 	/*
962 	 * First make sure, there are no active users of ibma handles,
963 	 * and then de-register handles.
964 	 */
965 
966 	/* make sure that there are no "Service"s registered w/ this HCA. */
967 	if (hcap->hca_svc_cnt != 0) {
968 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
969 		    "Active services still there %d", hcap->hca_svc_cnt);
970 		hcap->hca_state = IBCM_HCA_ACTIVE;
971 		return (IBCM_FAILURE);
972 	}
973 
974 	if (ibcm_check_sidr_clean(hcap) != IBCM_SUCCESS) {
975 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach:"
976 		    "There are active SIDR operations");
977 		hcap->hca_state = IBCM_HCA_ACTIVE;
978 		return (IBCM_FAILURE);
979 	}
980 
981 	if (ibcm_check_avl_clean(hcap) != IBCM_SUCCESS) {
982 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
983 		    "There are active RC connections");
984 		hcap->hca_state = IBCM_HCA_ACTIVE;
985 		return (IBCM_FAILURE);
986 	}
987 
988 	/*
989 	 * Now, wait until all rc and sidr stateps go away
990 	 * All these stateps must be short lived ones, waiting to be cleaned
991 	 * up after some timeout value, based on the current state.
992 	 */
993 	IBTF_DPRINTF_L5(cmlog, "ibcm_hca_detach:hca_guid = 0x%llX res_cnt = %d",
994 	    hcap->hca_guid, hcap->hca_res_cnt);
995 
996 	/* wait on response CV to 500mS */
997 	absolute_time = ddi_get_lbolt() +
998 	    drv_usectohz(ibcm_wait_for_res_cnt_timeout);
999 
1000 	while (hcap->hca_res_cnt > 0)
1001 		if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock,
1002 		    absolute_time) == -1)
1003 			break;
1004 
1005 	if (hcap->hca_res_cnt != 0) {
1006 		/* We got a timeout waiting for hca_res_cnt to become 0 */
1007 #ifdef DEBUG
1008 		if (ibcm_test_mode > 0)
1009 			IBTF_DPRINTF_L1(cmlog, "ibcm_hca_detach: Unexpected "
1010 			    "abort due to timeout on res_cnt %d",
1011 			    hcap->hca_res_cnt);
1012 		else
1013 #endif
1014 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
1015 			    " to timeout on res_cnt %d", hcap->hca_res_cnt);
1016 		hcap->hca_state = IBCM_HCA_ACTIVE;
1017 		return (IBCM_FAILURE);
1018 	}
1019 
1020 	/* Re-assert the while loop step above */
1021 	ASSERT(hcap->hca_sidr_list == NULL);
1022 	avl_destroy(&hcap->hca_active_tree);
1023 	avl_destroy(&hcap->hca_passive_tree);
1024 	avl_destroy(&hcap->hca_passive_comid_tree);
1025 
1026 	/*
1027 	 * Unregister all ports from IBMA
1028 	 * If there is a failure, re-initialize any free'd ibma handles. This
1029 	 * is required to receive the incoming mads
1030 	 */
1031 	status = IBCM_SUCCESS;
1032 	for (port_index = 0; port_index < hcap->hca_num_ports; port_index++) {
1033 		if ((status = ibcm_hca_fini_port(hcap, port_index)) !=
1034 		    IBCM_SUCCESS) {
1035 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
1036 			    "Failed to free IBMA Handle for port_num %d",
1037 			    port_index + 1);
1038 			break;
1039 		}
1040 	}
1041 
1042 	/* If detach fails, re-initialize ibma handles for incoming mads */
1043 	if (status != IBCM_SUCCESS)  {
1044 		for (i = 0; i < port_index; i++) {
1045 			if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
1046 				IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
1047 				    "Failed to re-allocate IBMA Handles for"
1048 				    " port_num %d", port_index + 1);
1049 		}
1050 		hcap->hca_state = IBCM_HCA_ACTIVE;
1051 		return (IBCM_FAILURE);
1052 	}
1053 
1054 	ibcm_fini_hca_ids(hcap);
1055 	ibcm_delete_hca_entry(hcap);
1056 
1057 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: DETACH succeeded");
1058 	return (IBCM_SUCCESS);
1059 }
1060 
1061 /* Checks, if there are any active sidr state entries in the specified hca */
1062 static ibcm_status_t
1063 ibcm_check_sidr_clean(ibcm_hca_info_t *hcap)
1064 {
1065 	ibcm_ud_state_data_t	*usp;
1066 	uint32_t		transient_cnt = 0;
1067 
1068 	IBTF_DPRINTF_L5(cmlog, "ibcm_check_sidr_clean:");
1069 
1070 	rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);
1071 	usp = hcap->hca_sidr_list;	/* Point to the list */
1072 	while (usp != NULL) {
1073 		mutex_enter(&usp->ud_state_mutex);
1074 		if ((usp->ud_state != IBCM_STATE_SIDR_REP_SENT) &&
1075 		    (usp->ud_state != IBCM_STATE_TIMED_OUT) &&
1076 		    (usp->ud_state != IBCM_STATE_DELETE)) {
1077 
1078 			IBTF_DPRINTF_L3(cmlog, "ibcm_check_sidr_clean:"
1079 			    "usp = %p not in transient state = %d", usp,
1080 			    usp->ud_state);
1081 
1082 			mutex_exit(&usp->ud_state_mutex);
1083 			rw_exit(&hcap->hca_sidr_list_lock);
1084 			return (IBCM_FAILURE);
1085 		} else {
1086 			mutex_exit(&usp->ud_state_mutex);
1087 			++transient_cnt;
1088 		}
1089 
1090 		usp = usp->ud_nextp;
1091 	}
1092 	rw_exit(&hcap->hca_sidr_list_lock);
1093 
1094 	IBTF_DPRINTF_L4(cmlog, "ibcm_check_sidr_clean: transient_cnt %d",
1095 	    transient_cnt);
1096 
1097 	return (IBCM_SUCCESS);
1098 }
1099 
1100 /* Checks, if there are any active rc state entries, in the specified hca */
1101 static ibcm_status_t
1102 ibcm_check_avl_clean(ibcm_hca_info_t *hcap)
1103 
1104 {
1105 	ibcm_state_data_t	*sp;
1106 	avl_tree_t		*avl_tree;
1107 	uint32_t		transient_cnt = 0;
1108 
1109 	IBTF_DPRINTF_L5(cmlog, "ibcm_check_avl_clean:");
1110 	/*
1111 	 * Both the trees ie., active and passive must reference to all
1112 	 * statep's, so let's use one
1113 	 */
1114 	avl_tree = &hcap->hca_active_tree;
1115 
1116 	rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
1117 
1118 	for (sp = avl_first(avl_tree); sp != NULL;
1119 	    sp = avl_walk(avl_tree, sp, AVL_AFTER)) {
1120 		mutex_enter(&sp->state_mutex);
1121 		if ((sp->state != IBCM_STATE_TIMEWAIT) &&
1122 		    (sp->state != IBCM_STATE_REJ_SENT) &&
1123 		    (sp->state != IBCM_STATE_DELETE)) {
1124 			IBTF_DPRINTF_L3(cmlog, "ibcm_check_avl_clean: "
1125 			    "sp = %p not in transient state = %d", sp,
1126 			    sp->state);
1127 			mutex_exit(&sp->state_mutex);
1128 			rw_exit(&hcap->hca_state_rwlock);
1129 			return (IBCM_FAILURE);
1130 		} else {
1131 			mutex_exit(&sp->state_mutex);
1132 			++transient_cnt;
1133 		}
1134 	}
1135 
1136 	rw_exit(&hcap->hca_state_rwlock);
1137 
1138 	IBTF_DPRINTF_L4(cmlog, "ibcm_check_avl_clean: transient_cnt %d",
1139 	    transient_cnt);
1140 
1141 	return (IBCM_SUCCESS);
1142 }
1143 
1144 /* Adds a new entry into CM's global hca list, if hca_guid is not there yet */
1145 static ibcm_hca_info_t *
1146 ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports)
1147 {
1148 	ibcm_hca_info_t	*hcap;
1149 
1150 	IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: guid = 0x%llX",
1151 	    hcaguid);
1152 
1153 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1154 
1155 	/*
1156 	 * Check if this hca_guid already in the list
1157 	 * If yes, then ignore this and return NULL
1158 	 */
1159 
1160 	hcap = ibcm_hca_listp;
1161 
1162 	/* search for this HCA */
1163 	while (hcap != NULL) {
1164 		if (hcap->hca_guid == hcaguid) {
1165 			/* already exists */
1166 			IBTF_DPRINTF_L2(cmlog, "ibcm_add_hca_entry: "
1167 			    "hcap %p guid 0x%llX, entry already exists !!",
1168 			    hcap, hcap->hca_guid);
1169 			return (NULL);
1170 		}
1171 		hcap = hcap->hca_next;
1172 	}
1173 
1174 	/* Allocate storage for the new HCA entry found */
1175 	hcap = kmem_zalloc(sizeof (ibcm_hca_info_t) +
1176 	    (nports - 1) * sizeof (ibcm_port_info_t), KM_SLEEP);
1177 
1178 	/* initialize RW lock */
1179 	rw_init(&hcap->hca_state_rwlock, NULL, RW_DRIVER, NULL);
1180 	/* initialize SIDR list lock */
1181 	rw_init(&hcap->hca_sidr_list_lock, NULL, RW_DRIVER, NULL);
1182 	/* Insert "hcap" into the global HCA list maintained by CM */
1183 	hcap->hca_next = ibcm_hca_listp;
1184 	ibcm_hca_listp = hcap;
1185 
1186 	IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: done hcap = 0x%p", hcap);
1187 
1188 	return (hcap);
1189 
1190 }
1191 
1192 /* deletes the given ibcm_hca_info_t from CM's global hca list */
1193 void
1194 ibcm_delete_hca_entry(ibcm_hca_info_t *hcap)
1195 {
1196 	ibcm_hca_info_t	*headp, *prevp = NULL;
1197 
1198 	/* ibcm_hca_global_lock is held */
1199 	IBTF_DPRINTF_L5(cmlog, "ibcm_delete_hca_entry: guid = 0x%llX "
1200 	    "hcap = 0x%p", hcap->hca_guid, hcap);
1201 
1202 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1203 
1204 	headp = ibcm_hca_listp;
1205 	while (headp != NULL) {
1206 		if (headp == hcap) {
1207 			IBTF_DPRINTF_L3(cmlog, "ibcm_delete_hca_entry: "
1208 			    "deleting hcap %p hcaguid %llX", hcap,
1209 			    hcap->hca_guid);
1210 			if (prevp) {
1211 				prevp->hca_next = headp->hca_next;
1212 			} else {
1213 				prevp = headp->hca_next;
1214 				ibcm_hca_listp = prevp;
1215 			}
1216 			rw_destroy(&hcap->hca_state_rwlock);
1217 			rw_destroy(&hcap->hca_sidr_list_lock);
1218 			kmem_free(hcap, sizeof (ibcm_hca_info_t) +
1219 			    (hcap->hca_num_ports - 1) *
1220 			    sizeof (ibcm_port_info_t));
1221 			return;
1222 		}
1223 
1224 		prevp = headp;
1225 		headp = headp->hca_next;
1226 	}
1227 }
1228 
1229 /*
1230  * ibcm_find_hca_entry:
1231  *	Given a HCA's GUID find out ibcm_hca_info_t entry for that HCA
1232  *	This entry can be then used to access AVL tree/SIDR list etc.
1233  *	If entry exists and in HCA ATTACH state, then hca's ref cnt is
1234  *	incremented and entry returned. Else NULL returned.
1235  *
1236  *	All functions that use ibcm_find_hca_entry and get a non-NULL
1237  *	return values must call ibcm_dec_hca_acc_cnt to decrement the
1238  *	respective hca ref cnt. There shouldn't be any usage of
1239  *	ibcm_hca_info_t * returned from ibcm_find_hca_entry,
1240  *	after decrementing the hca_acc_cnt
1241  *
1242  * INPUTS:
1243  *	hca_guid	- HCA's guid
1244  *
1245  * RETURN VALUE:
1246  *	hcap		- if a match is found, else NULL
1247  */
1248 ibcm_hca_info_t *
1249 ibcm_find_hca_entry(ib_guid_t hca_guid)
1250 {
1251 	ibcm_hca_info_t *hcap;
1252 
1253 	IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: guid = 0x%llX", hca_guid);
1254 
1255 	mutex_enter(&ibcm_global_hca_lock);
1256 
1257 	hcap = ibcm_hca_listp;
1258 	/* search for this HCA */
1259 	while (hcap != NULL) {
1260 		if (hcap->hca_guid == hca_guid)
1261 			break;
1262 		hcap = hcap->hca_next;
1263 	}
1264 
1265 	/* if no hcap for the hca_guid, return NULL */
1266 	if (hcap == NULL) {
1267 		mutex_exit(&ibcm_global_hca_lock);
1268 		return (NULL);
1269 	}
1270 
1271 	/* return hcap, only if it valid to use */
1272 	if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1273 		++(hcap->hca_acc_cnt);
1274 
1275 		IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: "
1276 		    "found hcap = 0x%p hca_acc_cnt %u", hcap,
1277 		    hcap->hca_acc_cnt);
1278 
1279 		mutex_exit(&ibcm_global_hca_lock);
1280 		return (hcap);
1281 	} else {
1282 		mutex_exit(&ibcm_global_hca_lock);
1283 
1284 		IBTF_DPRINTF_L2(cmlog, "ibcm_find_hca_entry: "
1285 		    "found hcap = 0x%p not in active state", hcap);
1286 		return (NULL);
1287 	}
1288 }
1289 
1290 /*
1291  * Searches for ibcm_hca_info_t entry based on hca_guid, but doesn't increment
1292  * the hca's reference count. This function is used, where the calling context
1293  * is attempting to delete hcap itself and hence acc_cnt cannot be incremented
1294  * OR assumes that valid hcap must be available in ibcm's global hca list.
1295  */
1296 ibcm_hca_info_t *
1297 ibcm_find_hcap_entry(ib_guid_t hca_guid)
1298 {
1299 	ibcm_hca_info_t *hcap;
1300 
1301 	IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: guid = 0x%llX", hca_guid);
1302 
1303 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1304 
1305 	hcap = ibcm_hca_listp;
1306 	/* search for this HCA */
1307 	while (hcap != NULL) {
1308 		if (hcap->hca_guid == hca_guid)
1309 			break;
1310 		hcap = hcap->hca_next;
1311 	}
1312 
1313 	if (hcap == NULL)
1314 		IBTF_DPRINTF_L2(cmlog, "ibcm_find_hcap_entry: No hcap found for"
1315 		    " hca_guid 0x%llX", hca_guid);
1316 	else
1317 		IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: hcap found for"
1318 		    " hca_guid 0x%llX", hca_guid);
1319 
1320 	return (hcap);
1321 }
1322 
1323 /* increment the hca's temporary reference count */
1324 ibcm_status_t
1325 ibcm_inc_hca_acc_cnt(ibcm_hca_info_t *hcap)
1326 {
1327 	mutex_enter(&ibcm_global_hca_lock);
1328 	if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1329 		++(hcap->hca_acc_cnt);
1330 		IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_acc_cnt: "
1331 		    "hcap = 0x%p  acc_cnt = %d ", hcap, hcap->hca_acc_cnt);
1332 		mutex_exit(&ibcm_global_hca_lock);
1333 		return (IBCM_SUCCESS);
1334 	} else {
1335 		IBTF_DPRINTF_L2(cmlog, "ibcm_inc_hca_acc_cnt: "
1336 		    "hcap INACTIVE 0x%p  acc_cnt = %d ", hcap,
1337 		    hcap->hca_acc_cnt);
1338 		mutex_exit(&ibcm_global_hca_lock);
1339 		return (IBCM_FAILURE);
1340 	}
1341 }
1342 
1343 /* decrement the hca's ref count, and wake up any waiting threads */
1344 void
1345 ibcm_dec_hca_acc_cnt(ibcm_hca_info_t *hcap)
1346 {
1347 	mutex_enter(&ibcm_global_hca_lock);
1348 	ASSERT(hcap->hca_acc_cnt > 0);
1349 	--(hcap->hca_acc_cnt);
1350 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_acc_cnt: hcap = 0x%p "
1351 	    "acc_cnt = %d", hcap, hcap->hca_acc_cnt);
1352 	if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1353 	    (hcap->hca_acc_cnt == 0)) {
1354 		IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_acc_cnt: "
1355 		    "cv_broadcast for hcap = 0x%p", hcap);
1356 		cv_broadcast(&ibcm_global_hca_cv);
1357 	}
1358 	mutex_exit(&ibcm_global_hca_lock);
1359 }
1360 
1361 /* increment the hca's resource count */
1362 void
1363 ibcm_inc_hca_res_cnt(ibcm_hca_info_t *hcap)
1364 
1365 {
1366 	mutex_enter(&ibcm_global_hca_lock);
1367 	++(hcap->hca_res_cnt);
1368 	IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_res_cnt: hcap = 0x%p "
1369 	    "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1370 	mutex_exit(&ibcm_global_hca_lock);
1371 }
1372 
1373 /* decrement the hca's resource count, and wake up any waiting threads */
1374 void
1375 ibcm_dec_hca_res_cnt(ibcm_hca_info_t *hcap)
1376 {
1377 	mutex_enter(&ibcm_global_hca_lock);
1378 	ASSERT(hcap->hca_res_cnt > 0);
1379 	--(hcap->hca_res_cnt);
1380 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_res_cnt: hcap = 0x%p "
1381 	    "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1382 	if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1383 	    (hcap->hca_res_cnt == 0)) {
1384 		IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_res_cnt: "
1385 		    "cv_broadcast for hcap = 0x%p", hcap);
1386 		cv_broadcast(&ibcm_global_hca_cv);
1387 	}
1388 	mutex_exit(&ibcm_global_hca_lock);
1389 }
1390 
1391 /* increment the hca's service count */
1392 void
1393 ibcm_inc_hca_svc_cnt(ibcm_hca_info_t *hcap)
1394 
1395 {
1396 	mutex_enter(&ibcm_global_hca_lock);
1397 	++(hcap->hca_svc_cnt);
1398 	IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_svc_cnt: hcap = 0x%p "
1399 	    "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1400 	mutex_exit(&ibcm_global_hca_lock);
1401 }
1402 
1403 /* decrement the hca's service count */
1404 void
1405 ibcm_dec_hca_svc_cnt(ibcm_hca_info_t *hcap)
1406 {
1407 	mutex_enter(&ibcm_global_hca_lock);
1408 	ASSERT(hcap->hca_svc_cnt > 0);
1409 	--(hcap->hca_svc_cnt);
1410 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_svc_cnt: hcap = 0x%p "
1411 	    "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1412 	mutex_exit(&ibcm_global_hca_lock);
1413 }
1414 
1415 /*
1416  * The following code manages three classes of requests that CM makes to
1417  * the fabric.  Those three classes are SA_ACCESS, REQ/REP/RTU, and DREQ/DREP.
1418  * The main issue is that the fabric can become very busy, and the CM
1419  * protocols rely on responses being made based on a predefined timeout
1420  * value.  By managing how many simultaneous sessions are allowed, there
1421  * is observed extremely high reliability of CM protocol succeeding when
1422  * it should.
1423  *
1424  * SA_ACCESS and DREQ/DREP are managed at the thread level, whereby the
1425  * thread blocks until there are less than some number of threads doing
1426  * similar requests.
1427  *
1428  * REQ/REP/RTU requests beyond a given limit are added to a list,
1429  * allowing the thread to return immediately to its caller in the
1430  * case where the "mode" is IBT_NONBLOCKING.  This is the mode used
1431  * by uDAPL and seems to be an important feature/behavior.
1432  */
1433 
1434 static int
1435 ibcm_ok_to_start(struct ibcm_open_s *openp)
1436 {
1437 	return (openp->sends < openp->sends_hiwat &&
1438 	    openp->in_progress < openp->in_progress_max);
1439 }
1440 
1441 void
1442 ibcm_open_done(ibcm_state_data_t *statep)
1443 {
1444 	int run;
1445 	ibcm_state_data_t **linkp, *tmp;
1446 
1447 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1448 	if (statep->open_flow == 1) {
1449 		statep->open_flow = 0;
1450 		mutex_enter(&ibcm_open.mutex);
1451 		if (statep->open_link == NULL) {
1452 			ibcm_open.in_progress--;
1453 			run = ibcm_ok_to_start(&ibcm_open);
1454 		} else {
1455 			ibcm_open.queued--;
1456 			linkp = &ibcm_open.head.open_link;
1457 			while (*linkp != statep)
1458 				linkp = &((*linkp)->open_link);
1459 			*linkp = statep->open_link;
1460 			statep->open_link = NULL;
1461 			/*
1462 			 * If we remove what tail pointed to, we need
1463 			 * to reassign tail (it is never NULL).
1464 			 * tail points to head for the empty list.
1465 			 */
1466 			if (ibcm_open.tail == statep) {
1467 				tmp = &ibcm_open.head;
1468 				while (tmp->open_link != &ibcm_open.head)
1469 					tmp = tmp->open_link;
1470 				ibcm_open.tail = tmp;
1471 			}
1472 			run = 0;
1473 		}
1474 		mutex_exit(&ibcm_open.mutex);
1475 		if (run)
1476 			ibcm_run_tlist_thread();
1477 	}
1478 }
1479 
1480 /* dtrace */
1481 void
1482 ibcm_open_wait(hrtime_t delta)
1483 {
1484 	if (delta > 1000000)
1485 		IBTF_DPRINTF_L2(cmlog, "ibcm_open_wait: flow more %lld", delta);
1486 }
1487 
1488 void
1489 ibcm_open_start(ibcm_state_data_t *statep)
1490 {
1491 	ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_REQ);
1492 
1493 	mutex_enter(&statep->state_mutex);
1494 	ibcm_open_wait(gethrtime() - statep->post_time);
1495 	mutex_exit(&statep->state_mutex);
1496 
1497 	ibcm_post_rc_mad(statep, statep->stored_msg, ibcm_post_req_complete,
1498 	    statep);
1499 
1500 	mutex_enter(&statep->state_mutex);
1501 	IBCM_REF_CNT_DECR(statep);
1502 	mutex_exit(&statep->state_mutex);
1503 }
1504 
1505 void
1506 ibcm_open_enqueue(ibcm_state_data_t *statep)
1507 {
1508 	int run;
1509 
1510 	mutex_enter(&statep->state_mutex);
1511 	statep->post_time = gethrtime();
1512 	mutex_exit(&statep->state_mutex);
1513 	mutex_enter(&ibcm_open.mutex);
1514 	if (ibcm_open.queued == 0 && ibcm_ok_to_start(&ibcm_open)) {
1515 		ibcm_open.in_progress++;
1516 		mutex_exit(&ibcm_open.mutex);
1517 		ibcm_open_start(statep);
1518 	} else {
1519 		ibcm_open.queued++;
1520 		statep->open_link = &ibcm_open.head;
1521 		ibcm_open.tail->open_link = statep;
1522 		ibcm_open.tail = statep;
1523 		run = ibcm_ok_to_start(&ibcm_open);
1524 		mutex_exit(&ibcm_open.mutex);
1525 		if (run)
1526 			ibcm_run_tlist_thread();
1527 	}
1528 }
1529 
1530 ibcm_state_data_t *
1531 ibcm_open_dequeue(void)
1532 {
1533 	ibcm_state_data_t *statep;
1534 
1535 	ASSERT(MUTEX_HELD(&ibcm_open.mutex));
1536 	ibcm_open.queued--;
1537 	ibcm_open.in_progress++;
1538 	statep = ibcm_open.head.open_link;
1539 	ibcm_open.head.open_link = statep->open_link;
1540 	statep->open_link = NULL;
1541 	/*
1542 	 * If we remove what tail pointed to, we need
1543 	 * to reassign tail (it is never NULL).
1544 	 * tail points to head for the empty list.
1545 	 */
1546 	if (ibcm_open.tail == statep)
1547 		ibcm_open.tail = &ibcm_open.head;
1548 	return (statep);
1549 }
1550 
1551 void
1552 ibcm_check_for_opens(void)
1553 {
1554 	ibcm_state_data_t 	*statep;
1555 
1556 	mutex_enter(&ibcm_open.mutex);
1557 
1558 	while (ibcm_open.queued > 0) {
1559 		if (ibcm_ok_to_start(&ibcm_open)) {
1560 			statep = ibcm_open_dequeue();
1561 			mutex_exit(&ibcm_open.mutex);
1562 
1563 			ibcm_open_start(statep);
1564 
1565 			mutex_enter(&ibcm_open.mutex);
1566 		} else {
1567 			break;
1568 		}
1569 	}
1570 	mutex_exit(&ibcm_open.mutex);
1571 }
1572 
1573 
1574 static void
1575 ibcm_flow_init(ibcm_flow_t *flow, uint_t simul_max)
1576 {
1577 	flow->list			= NULL;
1578 	flow->simul			= 0;
1579 	flow->waiters_per_chunk		= 4;
1580 	flow->simul_max			= simul_max;
1581 	flow->lowat			= simul_max - flow->waiters_per_chunk;
1582 	flow->lowat_default		= flow->lowat;
1583 	/* stats */
1584 	flow->total			= 0;
1585 }
1586 
1587 static void
1588 ibcm_rc_flow_control_init(void)
1589 {
1590 	mutex_init(&ibcm_open.mutex, NULL, MUTEX_DEFAULT, NULL);
1591 	mutex_enter(&ibcm_open.mutex);
1592 	ibcm_flow_init(&ibcm_close_flow, ibcm_close_simul_max);
1593 	ibcm_flow_init(&ibcm_lapr_flow, ibcm_lapr_simul_max);
1594 	ibcm_flow_init(&ibcm_saa_flow, ibcm_saa_simul_max);
1595 
1596 	ibcm_open.queued 		= 0;
1597 	ibcm_open.exit_deferred 	= 0;
1598 	ibcm_open.in_progress 		= 0;
1599 	ibcm_open.in_progress_max 	= 16;
1600 	ibcm_open.sends 		= 0;
1601 	ibcm_open.sends_max 		= 0;
1602 	ibcm_open.sends_lowat 		= 8;
1603 	ibcm_open.sends_hiwat 		= 16;
1604 	ibcm_open.tail 			= &ibcm_open.head;
1605 	ibcm_open.head.open_link 	= NULL;
1606 	mutex_exit(&ibcm_open.mutex);
1607 
1608 	mutex_init(&ibcm_close.mutex, NULL, MUTEX_DEFAULT, NULL);
1609 	mutex_enter(&ibcm_close.mutex);
1610 	ibcm_close.tail			= &ibcm_close.head;
1611 	ibcm_close.head.close_link 	= NULL;
1612 	mutex_exit(&ibcm_close.mutex);
1613 }
1614 
1615 static void
1616 ibcm_rc_flow_control_fini(void)
1617 {
1618 	mutex_destroy(&ibcm_open.mutex);
1619 	mutex_destroy(&ibcm_close.mutex);
1620 }
1621 
1622 static ibcm_flow1_t *
1623 ibcm_flow_find(ibcm_flow_t *flow)
1624 {
1625 	ibcm_flow1_t *flow1;
1626 	ibcm_flow1_t *f;
1627 
1628 	f = flow->list;
1629 	if (f) {	/* most likely code path */
1630 		while (f->link != NULL)
1631 			f = f->link;
1632 		if (f->waiters < flow->waiters_per_chunk)
1633 			return (f);
1634 	}
1635 
1636 	/* There was no flow1 list element ready for another waiter */
1637 	mutex_exit(&ibcm_open.mutex);
1638 	flow1 = kmem_alloc(sizeof (*flow1), KM_SLEEP);
1639 	mutex_enter(&ibcm_open.mutex);
1640 
1641 	f = flow->list;
1642 	if (f) {
1643 		while (f->link != NULL)
1644 			f = f->link;
1645 		if (f->waiters < flow->waiters_per_chunk) {
1646 			kmem_free(flow1, sizeof (*flow1));
1647 			return (f);
1648 		}
1649 		f->link = flow1;
1650 	} else {
1651 		flow->list = flow1;
1652 	}
1653 	cv_init(&flow1->cv, NULL, CV_DRIVER, NULL);
1654 	flow1->waiters = 0;
1655 	flow1->link = NULL;
1656 	return (flow1);
1657 }
1658 
1659 static void
1660 ibcm_flow_enter(ibcm_flow_t *flow)
1661 {
1662 	mutex_enter(&ibcm_open.mutex);
1663 	if (flow->list == NULL && flow->simul < flow->simul_max) {
1664 		flow->simul++;
1665 		flow->total++;
1666 		mutex_exit(&ibcm_open.mutex);
1667 	} else {
1668 		ibcm_flow1_t *flow1;
1669 
1670 		flow1 = ibcm_flow_find(flow);
1671 		flow1->waiters++;
1672 		cv_wait(&flow1->cv, &ibcm_open.mutex);
1673 		if (--flow1->waiters == 0) {
1674 			cv_destroy(&flow1->cv);
1675 			mutex_exit(&ibcm_open.mutex);
1676 			kmem_free(flow1, sizeof (*flow1));
1677 		} else
1678 			mutex_exit(&ibcm_open.mutex);
1679 	}
1680 }
1681 
1682 static void
1683 ibcm_flow_exit(ibcm_flow_t *flow)
1684 {
1685 	mutex_enter(&ibcm_open.mutex);
1686 	if (--flow->simul < flow->lowat) {
1687 		if (flow->lowat < flow->lowat_default)
1688 			flow->lowat++;
1689 		if (flow->list) {
1690 			ibcm_flow1_t *flow1;
1691 
1692 			flow1 = flow->list;
1693 			flow->list = flow1->link;	/* unlink */
1694 			flow1->link = NULL;		/* be clean */
1695 			flow->total += flow1->waiters;
1696 			flow->simul += flow1->waiters;
1697 			cv_broadcast(&flow1->cv);
1698 		}
1699 	}
1700 	mutex_exit(&ibcm_open.mutex);
1701 }
1702 
1703 void
1704 ibcm_flow_inc(void)
1705 {
1706 	mutex_enter(&ibcm_open.mutex);
1707 	if (++ibcm_open.sends > ibcm_open.sends_max) {
1708 		ibcm_open.sends_max = ibcm_open.sends;
1709 		IBTF_DPRINTF_L2(cmlog, "ibcm_flow_inc: sends max = %d",
1710 		    ibcm_open.sends_max);
1711 	}
1712 	mutex_exit(&ibcm_open.mutex);
1713 }
1714 
1715 static void
1716 ibcm_check_send_cmpltn_time(hrtime_t delta, char *event_msg)
1717 {
1718 	if (delta > 4000000LL) {
1719 		IBTF_DPRINTF_L2(cmlog, "ibcm_check_send_cmpltn_time: "
1720 		    "%s: %lldns", event_msg, delta);
1721 	}
1722 }
1723 
1724 void
1725 ibcm_flow_dec(hrtime_t time, char *mad_type)
1726 {
1727 	int flow_exit = 0;
1728 	int run = 0;
1729 
1730 	if (ibcm_dtrace)
1731 		ibcm_check_send_cmpltn_time(gethrtime() - time, mad_type);
1732 	mutex_enter(&ibcm_open.mutex);
1733 	ibcm_open.sends--;
1734 	if (ibcm_open.sends < ibcm_open.sends_lowat) {
1735 		run = ibcm_ok_to_start(&ibcm_open);
1736 		if (ibcm_open.exit_deferred) {
1737 			ibcm_open.exit_deferred--;
1738 			flow_exit = 1;
1739 		}
1740 	}
1741 	mutex_exit(&ibcm_open.mutex);
1742 	if (flow_exit)
1743 		ibcm_flow_exit(&ibcm_close_flow);
1744 	if (run)
1745 		ibcm_run_tlist_thread();
1746 }
1747 
1748 void
1749 ibcm_close_enqueue(ibcm_state_data_t *statep)
1750 {
1751 	mutex_enter(&ibcm_close.mutex);
1752 	statep->close_link = NULL;
1753 	ibcm_close.tail->close_link = statep;
1754 	ibcm_close.tail = statep;
1755 	mutex_exit(&ibcm_close.mutex);
1756 	ibcm_run_tlist_thread();
1757 }
1758 
1759 void
1760 ibcm_check_for_async_close()
1761 {
1762 	ibcm_state_data_t 	*statep;
1763 
1764 	mutex_enter(&ibcm_close.mutex);
1765 
1766 	while (ibcm_close.head.close_link) {
1767 		statep = ibcm_close.head.close_link;
1768 		ibcm_close.head.close_link = statep->close_link;
1769 		statep->close_link = NULL;
1770 		if (ibcm_close.tail == statep)
1771 			ibcm_close.tail = &ibcm_close.head;
1772 		mutex_exit(&ibcm_close.mutex);
1773 		ibcm_close_start(statep);
1774 		mutex_enter(&ibcm_close.mutex);
1775 	}
1776 	mutex_exit(&ibcm_close.mutex);
1777 }
1778 
1779 void
1780 ibcm_close_enter(void)
1781 {
1782 	ibcm_flow_enter(&ibcm_close_flow);
1783 }
1784 
1785 void
1786 ibcm_close_exit(void)
1787 {
1788 	int flow_exit;
1789 
1790 	mutex_enter(&ibcm_open.mutex);
1791 	if (ibcm_open.sends < ibcm_open.sends_lowat ||
1792 	    ibcm_open.exit_deferred >= 4)
1793 		flow_exit = 1;
1794 	else {
1795 		flow_exit = 0;
1796 		ibcm_open.exit_deferred++;
1797 	}
1798 	mutex_exit(&ibcm_open.mutex);
1799 	if (flow_exit)
1800 		ibcm_flow_exit(&ibcm_close_flow);
1801 }
1802 
1803 /*
1804  * This function needs to be called twice to finish our flow
1805  * control accounting when closing down a connection.  One
1806  * call has send_done set to 1, while the other has it set to 0.
1807  * Because of retries, this could get called more than once
1808  * with either 0 or 1, but additional calls have no effect.
1809  */
1810 void
1811 ibcm_close_done(ibcm_state_data_t *statep, int send_done)
1812 {
1813 	int flow_exit;
1814 
1815 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1816 	if (statep->close_flow == 1) {
1817 		if (send_done)
1818 			statep->close_flow = 3;
1819 		else
1820 			statep->close_flow = 2;
1821 	} else if ((send_done && statep->close_flow == 2) ||
1822 	    (!send_done && statep->close_flow == 3)) {
1823 		statep->close_flow = 0;
1824 		mutex_enter(&ibcm_open.mutex);
1825 		if (ibcm_open.sends < ibcm_open.sends_lowat ||
1826 		    ibcm_open.exit_deferred >= 4)
1827 			flow_exit = 1;
1828 		else {
1829 			flow_exit = 0;
1830 			ibcm_open.exit_deferred++;
1831 		}
1832 		mutex_exit(&ibcm_open.mutex);
1833 		if (flow_exit)
1834 			ibcm_flow_exit(&ibcm_close_flow);
1835 	}
1836 }
1837 
1838 void
1839 ibcm_lapr_enter(void)
1840 {
1841 	ibcm_flow_enter(&ibcm_lapr_flow);
1842 }
1843 
1844 void
1845 ibcm_lapr_exit(void)
1846 {
1847 	ibcm_flow_exit(&ibcm_lapr_flow);
1848 }
1849 
1850 void
1851 ibcm_sa_access_enter()
1852 {
1853 	ibcm_flow_enter(&ibcm_saa_flow);
1854 }
1855 
1856 void
1857 ibcm_sa_access_exit()
1858 {
1859 	ibcm_flow_exit(&ibcm_saa_flow);
1860 }
1861 
1862 static void
1863 ibcm_sm_notice_handler(ibmf_saa_handle_t saa_handle,
1864     ibmf_saa_subnet_event_t saa_event_code,
1865     ibmf_saa_event_details_t *saa_event_details,
1866     void *callback_arg)
1867 {
1868 	ibcm_port_info_t	*portp = (ibcm_port_info_t *)callback_arg;
1869 	ibt_subnet_event_code_t code;
1870 	ibt_subnet_event_t	event;
1871 	uint8_t			event_status;
1872 
1873 	IBTF_DPRINTF_L3(cmlog, "ibcm_sm_notice_handler: saa_hdl %p, code = %d",
1874 	    saa_handle, saa_event_code);
1875 
1876 	mutex_enter(&ibcm_sm_notice_serialize_lock);
1877 
1878 	switch (saa_event_code) {
1879 	case IBMF_SAA_EVENT_MCG_CREATED:
1880 		code = IBT_SM_EVENT_MCG_CREATED;
1881 		break;
1882 	case IBMF_SAA_EVENT_MCG_DELETED:
1883 		code = IBT_SM_EVENT_MCG_DELETED;
1884 		break;
1885 	case IBMF_SAA_EVENT_GID_AVAILABLE:
1886 		code = IBT_SM_EVENT_GID_AVAIL;
1887 		ibcm_path_cache_purge();
1888 		break;
1889 	case IBMF_SAA_EVENT_GID_UNAVAILABLE:
1890 		code = IBT_SM_EVENT_GID_UNAVAIL;
1891 		ibcm_path_cache_purge();
1892 		break;
1893 	case IBMF_SAA_EVENT_SUBSCRIBER_STATUS_CHG:
1894 		event_status =
1895 		    saa_event_details->ie_producer_event_status_mask &
1896 		    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
1897 		if (event_status == (portp->port_event_status &
1898 		    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)) {
1899 			mutex_exit(&ibcm_sm_notice_serialize_lock);
1900 			return;	/* no change */
1901 		}
1902 		portp->port_event_status = event_status;
1903 		if (event_status == IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)
1904 			code = IBT_SM_EVENT_AVAILABLE;
1905 		else
1906 			code = IBT_SM_EVENT_UNAVAILABLE;
1907 		break;
1908 	default:
1909 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1910 		return;
1911 	}
1912 
1913 	mutex_enter(&ibcm_global_hca_lock);
1914 
1915 	/* don't send the event if we're tearing down */
1916 	if (!IBCM_ACCESS_HCA_OK(portp->port_hcap)) {
1917 		mutex_exit(&ibcm_global_hca_lock);
1918 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1919 		return;
1920 	}
1921 
1922 	++(portp->port_hcap->hca_acc_cnt);
1923 	mutex_exit(&ibcm_global_hca_lock);
1924 
1925 	event.sm_notice_gid = saa_event_details->ie_gid;
1926 	ibtl_cm_sm_notice_handler(portp->port_sgid0, code, &event);
1927 
1928 	mutex_exit(&ibcm_sm_notice_serialize_lock);
1929 
1930 	ibcm_dec_hca_acc_cnt(portp->port_hcap);
1931 }
1932 
1933 void
1934 ibt_register_subnet_notices(ibt_clnt_hdl_t ibt_hdl,
1935     ibt_sm_notice_handler_t sm_notice_handler, void *private)
1936 {
1937 	ibcm_port_info_t	*portp;
1938 	ibcm_hca_info_t		*hcap;
1939 	uint8_t			port;
1940 	int			num_failed_sgids;
1941 	ibtl_cm_sm_init_fail_t	*ifail;
1942 	ib_gid_t		*sgidp;
1943 
1944 	IBTF_DPRINTF_L3(cmlog, "ibt_register_subnet_notices: ibt_hdl = %p",
1945 	    ibt_hdl);
1946 
1947 	mutex_enter(&ibcm_sm_notice_serialize_lock);
1948 
1949 	ibtl_cm_set_sm_notice_handler(ibt_hdl, sm_notice_handler, private);
1950 	if (sm_notice_handler == NULL) {
1951 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1952 		return;
1953 	}
1954 
1955 	/* for each port, if service is not available, make a call */
1956 	mutex_enter(&ibcm_global_hca_lock);
1957 	num_failed_sgids = 0;
1958 	hcap = ibcm_hca_listp;
1959 	while (hcap != NULL) {
1960 		portp = hcap->hca_port_info;
1961 		for (port = 0; port < hcap->hca_num_ports; port++) {
1962 			if (!(portp->port_event_status &
1963 			    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1964 				num_failed_sgids++;
1965 			portp++;
1966 		}
1967 		hcap = hcap->hca_next;
1968 	}
1969 	if (num_failed_sgids != 0) {
1970 		ifail = kmem_alloc(sizeof (*ifail) +
1971 		    (num_failed_sgids - 1) * sizeof (ib_gid_t), KM_SLEEP);
1972 		ifail->smf_num_sgids = num_failed_sgids;
1973 		ifail->smf_ibt_hdl = ibt_hdl;
1974 		sgidp = &ifail->smf_sgid[0];
1975 		hcap = ibcm_hca_listp;
1976 		while (hcap != NULL) {
1977 			portp = hcap->hca_port_info;
1978 			for (port = 0; port < hcap->hca_num_ports; port++) {
1979 				if (!(portp->port_event_status &
1980 				    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1981 					*sgidp++ = portp->port_sgid0;
1982 				portp++;
1983 			}
1984 			hcap = hcap->hca_next;
1985 		}
1986 	}
1987 	mutex_exit(&ibcm_global_hca_lock);
1988 
1989 	if (num_failed_sgids != 0) {
1990 		ibtl_cm_sm_notice_init_failure(ifail);
1991 		kmem_free(ifail, sizeof (*ifail) +
1992 		    (num_failed_sgids - 1) * sizeof (ib_gid_t));
1993 	}
1994 	mutex_exit(&ibcm_sm_notice_serialize_lock);
1995 }
1996 
1997 /* The following is run from a taskq because we've seen the stack overflow. */
1998 static void
1999 ibcm_init_saa(void *arg)
2000 {
2001 	ibcm_port_info_t		*portp = (ibcm_port_info_t *)arg;
2002 	int				status;
2003 	ib_guid_t			port_guid;
2004 	ibmf_saa_subnet_event_args_t	event_args;
2005 
2006 	port_guid = portp->port_sgid0.gid_guid;
2007 
2008 	IBTF_DPRINTF_L3(cmlog, "ibcm_init_saa: port guid %llX", port_guid);
2009 
2010 	event_args.is_event_callback_arg = portp;
2011 	event_args.is_event_callback = ibcm_sm_notice_handler;
2012 
2013 	if ((status = ibmf_sa_session_open(port_guid, 0, &event_args,
2014 	    IBMF_VERSION, 0, &portp->port_ibmf_saa_hdl)) != IBMF_SUCCESS) {
2015 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
2016 		    "ibmf_sa_session_open failed for port guid %llX "
2017 		    "status = %d", port_guid, status);
2018 	} else {
2019 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
2020 		    "registered sa_hdl 0x%p for port guid %llX",
2021 		    portp->port_ibmf_saa_hdl, port_guid);
2022 	}
2023 
2024 	mutex_enter(&ibcm_sa_open_lock);
2025 	portp->port_saa_open_in_progress = 0;
2026 	cv_broadcast(&ibcm_sa_open_cv);
2027 	mutex_exit(&ibcm_sa_open_lock);
2028 }
2029 
2030 void
2031 ibcm_init_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
2032 {
2033 	ibmf_saa_handle_t	saa_handle;
2034 	uint8_t			port_index = port - 1;
2035 	ibcm_port_info_t	*portp = &hcap->hca_port_info[port_index];
2036 	ibt_status_t		ibt_status;
2037 
2038 	if (port_index >= hcap->hca_num_ports)
2039 		return;
2040 
2041 	mutex_enter(&ibcm_sa_open_lock);
2042 	if (portp->port_saa_open_in_progress) {
2043 		mutex_exit(&ibcm_sa_open_lock);
2044 		return;
2045 	}
2046 
2047 	saa_handle = portp->port_ibmf_saa_hdl;
2048 	if (saa_handle != NULL) {
2049 		mutex_exit(&ibcm_sa_open_lock);
2050 		return;
2051 	}
2052 
2053 	portp->port_saa_open_in_progress = 1;
2054 	mutex_exit(&ibcm_sa_open_lock);
2055 
2056 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(portp->port_event_status))
2057 
2058 	/* The assumption is that we're getting event notifications */
2059 	portp->port_event_status = IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
2060 
2061 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(portp->port_event_status))
2062 
2063 	ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
2064 	    portp->port_num, &portp->port_sgid0, NULL);
2065 	if (ibt_status != IBT_SUCCESS) {
2066 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa_handle: "
2067 		    "ibt_get_port_state_byguid failed for guid %llX "
2068 		    "with status %d", portp->port_hcap->hca_guid, ibt_status);
2069 		mutex_enter(&ibcm_sa_open_lock);
2070 		portp->port_saa_open_in_progress = 0;
2071 		cv_broadcast(&ibcm_sa_open_cv);
2072 		mutex_exit(&ibcm_sa_open_lock);
2073 		return;
2074 	}
2075 	/* if the port is UP, try sa_session_open */
2076 	(void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
2077 }
2078 
2079 
2080 ibmf_saa_handle_t
2081 ibcm_get_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
2082 {
2083 	ibmf_saa_handle_t	saa_handle;
2084 	uint8_t			port_index = port - 1;
2085 	ibcm_port_info_t	*portp = &hcap->hca_port_info[port_index];
2086 	ibt_status_t		ibt_status;
2087 
2088 	if (port_index >= hcap->hca_num_ports)
2089 		return (NULL);
2090 
2091 	mutex_enter(&ibcm_sa_open_lock);
2092 	while (portp->port_saa_open_in_progress) {
2093 		cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
2094 	}
2095 
2096 	saa_handle = portp->port_ibmf_saa_hdl;
2097 	if (saa_handle != NULL) {
2098 		mutex_exit(&ibcm_sa_open_lock);
2099 		return (saa_handle);
2100 	}
2101 
2102 	portp->port_saa_open_in_progress = 1;
2103 	mutex_exit(&ibcm_sa_open_lock);
2104 
2105 	ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
2106 	    portp->port_num, &portp->port_sgid0, NULL);
2107 	if (ibt_status != IBT_SUCCESS) {
2108 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_saa_handle: "
2109 		    "ibt_get_port_state_byguid failed for guid %llX "
2110 		    "with status %d", portp->port_hcap->hca_guid, ibt_status);
2111 		mutex_enter(&ibcm_sa_open_lock);
2112 		portp->port_saa_open_in_progress = 0;
2113 		cv_broadcast(&ibcm_sa_open_cv);
2114 		mutex_exit(&ibcm_sa_open_lock);
2115 		return (NULL);
2116 	}
2117 	/* if the port is UP, try sa_session_open */
2118 	(void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
2119 
2120 	mutex_enter(&ibcm_sa_open_lock);
2121 	while (portp->port_saa_open_in_progress) {
2122 		cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
2123 	}
2124 	saa_handle = portp->port_ibmf_saa_hdl;
2125 	mutex_exit(&ibcm_sa_open_lock);
2126 	return (saa_handle);
2127 }
2128 
2129 
2130 /*
2131  * ibcm_hca_init_port():
2132  * 	- Register port with IBMA
2133  *
2134  * Arguments:
2135  *	hcap		- HCA's guid
2136  *	port_index	- port number minus 1
2137  *
2138  * Return values:
2139  *	IBCM_SUCCESS - success
2140  */
2141 ibt_status_t
2142 ibcm_hca_init_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2143 {
2144 	int			status;
2145 	ibmf_register_info_t	*ibmf_reg;
2146 
2147 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_init_port: hcap = 0x%p port_num %d",
2148 	    hcap, port_index + 1);
2149 
2150 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
2151 
2152 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hcap->hca_port_info))
2153 
2154 	if (hcap->hca_port_info[port_index].port_ibmf_hdl == NULL) {
2155 		/* Register with IBMF */
2156 		ibmf_reg = &hcap->hca_port_info[port_index].port_ibmf_reg;
2157 		ibmf_reg->ir_ci_guid = hcap->hca_guid;
2158 		ibmf_reg->ir_port_num = port_index + 1;
2159 		ibmf_reg->ir_client_class = COMM_MGT_MANAGER_AGENT;
2160 
2161 		/*
2162 		 * register with management framework
2163 		 */
2164 		status = ibmf_register(ibmf_reg, IBMF_VERSION,
2165 		    IBMF_REG_FLAG_NO_OFFLOAD, NULL, NULL,
2166 		    &(hcap->hca_port_info[port_index].port_ibmf_hdl),
2167 		    &(hcap->hca_port_info[port_index].port_ibmf_caps));
2168 
2169 		if (status != IBMF_SUCCESS) {
2170 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_init_port: "
2171 			    "ibmf_register failed for port_num %x, "
2172 			    "status = %x", port_index + 1, status);
2173 			return (ibcm_ibmf_analyze_error(status));
2174 		}
2175 
2176 		hcap->hca_port_info[port_index].port_qp1.qp_cm =
2177 		    IBMF_QP_HANDLE_DEFAULT;
2178 		hcap->hca_port_info[port_index].port_qp1.qp_port =
2179 		    &(hcap->hca_port_info[port_index]);
2180 
2181 		/*
2182 		 * Register the read callback with IBMF.
2183 		 * Since we just did an ibmf_register, handle is
2184 		 * valid and ibcm_recv_cb() is valid so we can
2185 		 * safely assert for success of ibmf_setup_recv_cb()
2186 		 *
2187 		 * Depending on the "state" of the HCA,
2188 		 * CM may drop incoming packets
2189 		 */
2190 		status = ibmf_setup_async_cb(
2191 		    hcap->hca_port_info[port_index].port_ibmf_hdl,
2192 		    IBMF_QP_HANDLE_DEFAULT, ibcm_recv_cb,
2193 		    &(hcap->hca_port_info[port_index].port_qp1), 0);
2194 		ASSERT(status == IBMF_SUCCESS);
2195 
2196 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_init_port: "
2197 		    "IBMF hdl[%x] = 0x%p", port_index,
2198 		    hcap->hca_port_info[port_index].port_ibmf_hdl);
2199 
2200 		/* Attempt to get the saa_handle for this port */
2201 		ibcm_init_saa_handle(hcap, port_index + 1);
2202 	}
2203 
2204 	return (IBT_SUCCESS);
2205 }
2206 
2207 /*
2208  * useful, to re attempt to initialize port ibma handles from elsewhere in
2209  * cm code
2210  */
2211 ibt_status_t
2212 ibcm_hca_reinit_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2213 {
2214 	ibt_status_t	status;
2215 
2216 	IBTF_DPRINTF_L5(cmlog, "ibcm_hca_reinit_port: hcap 0x%p port_num %d",
2217 	    hcap, port_index + 1);
2218 
2219 	mutex_enter(&ibcm_global_hca_lock);
2220 	status = ibcm_hca_init_port(hcap, port_index);
2221 	mutex_exit(&ibcm_global_hca_lock);
2222 	return (status);
2223 }
2224 
2225 
2226 /*
2227  * ibcm_hca_fini_port():
2228  * 	- Deregister port with IBMA
2229  *
2230  * Arguments:
2231  *	hcap		- HCA's guid
2232  *	port_index	- port number minus 1
2233  *
2234  * Return values:
2235  *	IBCM_SUCCESS - success
2236  */
2237 static ibcm_status_t
2238 ibcm_hca_fini_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2239 {
2240 	int			ibmf_status;
2241 	ibcm_status_t		ibcm_status;
2242 
2243 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: hcap = 0x%p port_num %d ",
2244 	    hcap, port_index + 1);
2245 
2246 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
2247 
2248 	if (hcap->hca_port_info[port_index].port_ibmf_saa_hdl != NULL) {
2249 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
2250 		    "ibmf_sa_session_close IBMF SAA hdl %p",
2251 		    hcap->hca_port_info[port_index].port_ibmf_saa_hdl);
2252 
2253 		ibmf_status = ibmf_sa_session_close(
2254 		    &hcap->hca_port_info[port_index].port_ibmf_saa_hdl, 0);
2255 		if (ibmf_status != IBMF_SUCCESS) {
2256 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2257 			    "ibmf_sa_session_close of port %d returned %x",
2258 			    port_index + 1, ibmf_status);
2259 			return (IBCM_FAILURE);
2260 		}
2261 	}
2262 
2263 	if (hcap->hca_port_info[port_index].port_ibmf_hdl != NULL) {
2264 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
2265 		    "ibmf_unregister IBMF Hdl %p",
2266 		    hcap->hca_port_info[port_index].port_ibmf_hdl);
2267 
2268 		/* clean-up all the ibmf qp's allocated on this port */
2269 		ibcm_status = ibcm_free_allqps(hcap, port_index + 1);
2270 
2271 		if (ibcm_status != IBCM_SUCCESS) {
2272 
2273 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2274 			    "ibcm_free_allqps failed for port_num %d",
2275 			    port_index + 1);
2276 			return (IBCM_FAILURE);
2277 		}
2278 
2279 		/* Tear down the receive callback */
2280 		ibmf_status = ibmf_tear_down_async_cb(
2281 		    hcap->hca_port_info[port_index].port_ibmf_hdl,
2282 		    IBMF_QP_HANDLE_DEFAULT, 0);
2283 
2284 		if (ibmf_status != IBMF_SUCCESS) {
2285 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2286 			    "ibmf_tear_down_async_cb failed %d port_num %d",
2287 			    ibmf_status, port_index + 1);
2288 			return (IBCM_FAILURE);
2289 		}
2290 
2291 		/* Now, unregister with IBMF */
2292 		ibmf_status = ibmf_unregister(
2293 		    &hcap->hca_port_info[port_index].port_ibmf_hdl, 0);
2294 		IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: "
2295 		    "ibmf_unregister of port_num %x returned %x",
2296 		    port_index + 1, ibmf_status);
2297 
2298 		if (ibmf_status == IBMF_SUCCESS)
2299 			hcap->hca_port_info[port_index].port_ibmf_hdl =
2300 								NULL;
2301 		else {
2302 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2303 			    "ibmf_unregister failed %d port_num %d",
2304 			    ibmf_status, port_index + 1);
2305 			return (IBCM_FAILURE);
2306 		}
2307 	}
2308 	return (IBCM_SUCCESS);
2309 }
2310 
2311 /*
2312  * ibcm_comm_est_handler():
2313  *	Check if the given channel is in ESTABLISHED state or not
2314  *
2315  * Arguments:
2316  *	eventp	- A pointer to an ibt_async_event_t struct
2317  *
2318  * Return values: NONE
2319  */
2320 static void
2321 ibcm_comm_est_handler(ibt_async_event_t *eventp)
2322 {
2323 	ibcm_state_data_t	*statep;
2324 
2325 	IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler:");
2326 
2327 	/* Both QP and EEC handles can't be NULL */
2328 	if (eventp->ev_chan_hdl == NULL) {
2329 		IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2330 		    "both QP and EEC handles are NULL");
2331 		return;
2332 	}
2333 
2334 	/* get the "statep" from qp/eec handles */
2335 	IBCM_GET_CHAN_PRIVATE(eventp->ev_chan_hdl, statep);
2336 	if (statep == NULL) {
2337 		IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: statep is NULL");
2338 		return;
2339 	}
2340 
2341 	mutex_enter(&statep->state_mutex);
2342 
2343 	IBCM_RELEASE_CHAN_PRIVATE(eventp->ev_chan_hdl);
2344 
2345 	IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: statep = %p", statep);
2346 
2347 	IBCM_REF_CNT_INCR(statep);
2348 
2349 	if ((statep->state == IBCM_STATE_REP_SENT) ||
2350 	    (statep->state == IBCM_STATE_MRA_REP_RCVD)) {
2351 		timeout_id_t	timer_val = statep->timerid;
2352 
2353 		statep->state = IBCM_STATE_TRANSIENT_ESTABLISHED;
2354 
2355 		if (timer_val) {
2356 			statep->timerid = 0;
2357 			mutex_exit(&statep->state_mutex);
2358 			(void) untimeout(timer_val);
2359 		} else
2360 			mutex_exit(&statep->state_mutex);
2361 
2362 		/* CM doesn't have RTU message here */
2363 		ibcm_cep_state_rtu(statep, NULL);
2364 
2365 	} else {
2366 		if (statep->state == IBCM_STATE_ESTABLISHED ||
2367 		    statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) {
2368 			IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: "
2369 			    "Channel already in ESTABLISHED state");
2370 		} else {
2371 			/* An unexpected behavior from remote */
2372 			IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2373 			    "Unexpected in state = %d", statep->state);
2374 		}
2375 		mutex_exit(&statep->state_mutex);
2376 
2377 		ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_COMEST);
2378 	}
2379 
2380 	mutex_enter(&statep->state_mutex);
2381 	IBCM_REF_CNT_DECR(statep);
2382 	mutex_exit(&statep->state_mutex);
2383 }
2384 
2385 
2386 /*
2387  * ibcm_async_handler():
2388  *	CM's Async Handler
2389  *	(Handles ATTACH, DETACH, COM_EST events)
2390  *
2391  * Arguments:
2392  *	eventp	- A pointer to an ibt_async_event_t struct
2393  *
2394  * Return values: None
2395  *
2396  * NOTE : CM assumes that all HCA DR events are delivered sequentially
2397  * i.e., until ibcm_async_handler  completes for a given HCA DR, framework
2398  * shall not invoke ibcm_async_handler with another DR event for the same
2399  * HCA
2400  */
2401 /* ARGSUSED */
2402 void
2403 ibcm_async_handler(void *clnt_hdl, ibt_hca_hdl_t hca_hdl,
2404     ibt_async_code_t code, ibt_async_event_t *eventp)
2405 {
2406 	ibcm_hca_info_t		*hcap;
2407 	ibcm_port_up_t		*pup;
2408 
2409 	IBTF_DPRINTF_L3(cmlog, "ibcm_async_handler: "
2410 	    "clnt_hdl = %p, code = 0x%x, eventp = 0x%p",
2411 	    clnt_hdl, code, eventp);
2412 
2413 	mutex_enter(&ibcm_global_hca_lock);
2414 
2415 	/* If fini is going to complete successfully, then return */
2416 	if (ibcm_finit_state != IBCM_FINIT_IDLE) {
2417 
2418 		/*
2419 		 * This finit state implies one of the following:
2420 		 * Init either didn't start or didn't complete OR
2421 		 * Fini is about to return SUCCESS and release the global lock.
2422 		 * In all these cases, it is safe to ignore the async.
2423 		 */
2424 
2425 		IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler: ignoring event %x, "
2426 		    "as either init didn't complete or fini about to succeed",
2427 		    code);
2428 		mutex_exit(&ibcm_global_hca_lock);
2429 		return;
2430 	}
2431 
2432 	switch (code) {
2433 	case IBT_EVENT_PORT_UP:
2434 		mutex_exit(&ibcm_global_hca_lock);
2435 		pup = kmem_alloc(sizeof (ibcm_port_up_t), KM_SLEEP);
2436 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pup))
2437 		pup->pup_hca_guid = eventp->ev_hca_guid;
2438 		pup->pup_port = eventp->ev_port;
2439 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pup))
2440 		(void) taskq_dispatch(ibcm_taskq,
2441 		    ibcm_service_record_rewrite_task, pup, TQ_SLEEP);
2442 		ibcm_path_cache_purge();
2443 		return;
2444 
2445 	case IBT_HCA_ATTACH_EVENT:
2446 
2447 		/* eventp->ev_hcaguid is the HCA GUID of interest */
2448 		ibcm_hca_attach(eventp->ev_hca_guid);
2449 		break;
2450 
2451 	case IBT_HCA_DETACH_EVENT:
2452 
2453 		/* eventp->ev_hca_guid is the HCA GUID of interest */
2454 		if ((hcap = ibcm_find_hcap_entry(eventp->ev_hca_guid)) ==
2455 		    NULL) {
2456 			IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler:"
2457 			    " hca %llX doesn't exist", eventp->ev_hca_guid);
2458 			break;
2459 		}
2460 
2461 		(void) ibcm_hca_detach(hcap);
2462 		break;
2463 
2464 	case IBT_EVENT_COM_EST_QP:
2465 		/* eventp->ev_qp_hdl is the ibt_qp_hdl_t of interest */
2466 	case IBT_EVENT_COM_EST_EEC:
2467 		/* eventp->ev_eec_hdl is the ibt_eec_hdl_t of interest */
2468 		ibcm_comm_est_handler(eventp);
2469 		break;
2470 	default:
2471 		break;
2472 	}
2473 
2474 	/* Unblock, any blocked fini/init operations */
2475 	mutex_exit(&ibcm_global_hca_lock);
2476 }
2477