xref: /illumos-gate/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c (revision fde3102f1c8dab43af9075a6e9cdabedec6ca9d7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * ibcm_impl.c
31  *
32  * contains internal functions of IB CM module.
33  *
34  * TBD:
35  * 1. HCA CATASTROPHIC/RECOVERED not handled yet
36  */
37 
38 #include <sys/ib/mgt/ibcm/ibcm_impl.h>
39 #include <sys/disp.h>
40 
41 
42 /* function prototypes */
43 static ibcm_status_t	ibcm_init(void);
44 static ibcm_status_t	ibcm_fini(void);
45 
46 /* Routines to initialize and destory CM global locks and CVs */
47 static void		ibcm_init_locks(void);
48 static void		ibcm_fini_locks(void);
49 
50 /* Routines that initialize/teardown CM's global hca structures */
51 static void		ibcm_init_hcas();
52 static ibcm_status_t	ibcm_fini_hcas();
53 
54 static void		ibcm_init_classportinfo();
55 static void		ibcm_stop_timeout_thread();
56 
57 /* Routines that handle HCA attach/detach asyncs */
58 static void		ibcm_hca_attach(ib_guid_t);
59 static ibcm_status_t	ibcm_hca_detach(ibcm_hca_info_t *);
60 
61 /* Routines that initialize the HCA's port related fields */
62 static ibt_status_t	ibcm_hca_init_port(ibcm_hca_info_t *hcap,
63 			    uint8_t port_index);
64 static ibcm_status_t	ibcm_hca_fini_port(ibcm_hca_info_t *hcap,
65 			    uint8_t port_index);
66 
67 static void ibcm_rc_flow_control_init(void);
68 static void ibcm_rc_flow_control_fini(void);
69 
70 /*
71  * Routines that check if hca's avl trees and sidr lists are free of any
72  * active client resources ie., RC or UD state structures in certain states
73  */
74 static ibcm_status_t	ibcm_check_avl_clean(ibcm_hca_info_t *hcap);
75 static ibcm_status_t	ibcm_check_sidr_clean(ibcm_hca_info_t *hcap);
76 
77 /* Add a new hca structure to CM's global hca list */
78 static ibcm_hca_info_t	*ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports);
79 
80 static void		ibcm_comm_est_handler(ibt_async_event_t *);
81 void			ibcm_async_handler(void *, ibt_hca_hdl_t,
82 			    ibt_async_code_t, ibt_async_event_t *);
83 
84 /* Global variables */
85 char			cmlog[] = "ibcm";	/* for debug log messages */
86 ibt_clnt_hdl_t		ibcm_ibt_handle;	/* IBT handle */
87 kmutex_t		ibcm_svc_info_lock;	/* list lock */
88 kcondvar_t		ibcm_svc_info_cv;	/* cv for deregister */
89 kmutex_t		ibcm_recv_mutex;
90 avl_tree_t		ibcm_svc_avl_tree;
91 taskq_t			*ibcm_taskq = NULL;
92 int			taskq_dispatch_fail_cnt;
93 
94 kmutex_t		ibcm_trace_mutex;	/* Trace mutex */
95 kmutex_t		ibcm_trace_print_mutex;	/* Trace print mutex */
96 int			ibcm_conn_max_trcnt = IBCM_MAX_CONN_TRCNT;
97 
98 int			ibcm_enable_trace = 4;	/* Trace level 4 by default */
99 
100 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_info_s::{svc_bind_list
101     svc_ref_cnt svc_to_delete}))
102 
103 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_bind_s::{sbind_link}))
104 
105 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_mutex, ibcm_conn_trace_s))
106 
107 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_conn_trace_s))
108 
109 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_print_mutex, ibcm_debug_buf))
110 
111 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_debug_buf))
112 
113 /*
114  * Initial state is INIT. All hca dr's return success immediately in this
115  * state, without adding or deleting any hca's to CM.
116  */
117 ibcm_finit_state_t	ibcm_finit_state = IBCM_FINIT_INIT;
118 
119 /* mutex and cv to manage hca's reference and resource count(s) */
120 kmutex_t		ibcm_global_hca_lock;
121 kcondvar_t		ibcm_global_hca_cv;
122 
123 /* mutex and cv to sa session open */
124 kmutex_t		ibcm_sa_open_lock;
125 kcondvar_t		ibcm_sa_open_cv;
126 int			ibcm_sa_timeout_delay = 1;		/* in ticks */
127 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sa_open_lock,
128     ibcm_port_info_s::{port_ibmf_saa_hdl port_saa_open_in_progress}))
129 
130 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_port_info_s::{port_ibmf_saa_hdl}))
131 
132 /* serialize sm notice callbacks */
133 kmutex_t		ibcm_sm_notice_serialize_lock;
134 
135 _NOTE(LOCK_ORDER(ibcm_sm_notice_serialize_lock ibcm_global_hca_lock))
136 
137 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock, ibcm_hca_info_s::{hca_state
138     hca_svc_cnt hca_acc_cnt hca_res_cnt hca_next}))
139 
140 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock,
141     ibcm_port_info_s::{port_ibmf_hdl}))
142 
143 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sm_notice_serialize_lock,
144     ibcm_port_info_s::{port_event_status}))
145 
146 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_hca_info_s::{hca_state}))
147 _NOTE(DATA_READABLE_WITHOUT_LOCK(
148     ibcm_hca_info_s::{hca_port_info.port_ibmf_hdl}))
149 
150 /* mutex for CM's qp list management */
151 kmutex_t		ibcm_qp_list_lock;
152 
153 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_port_info_s::{port_qplist}))
154 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
155 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
156 
157 kcondvar_t		ibcm_timeout_list_cv;
158 kcondvar_t		ibcm_timeout_thread_done_cv;
159 kt_did_t		ibcm_timeout_thread_did;
160 ibcm_state_data_t	*ibcm_timeout_list_hdr, *ibcm_timeout_list_tail;
161 ibcm_ud_state_data_t	*ibcm_ud_timeout_list_hdr, *ibcm_ud_timeout_list_tail;
162 kmutex_t		ibcm_timeout_list_lock;
163 uint8_t			ibcm_timeout_list_flags = 0;
164 pri_t			ibcm_timeout_thread_pri = MINCLSYSPRI;
165 
166 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
167     ibcm_state_data_s::timeout_next))
168 
169 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
170     ibcm_ud_state_data_s::ud_timeout_next))
171 
172 static ibt_clnt_modinfo_t ibcm_ibt_modinfo = {	/* Client's modinfop */
173 	IBTI_V1,
174 	IBT_CM,
175 	ibcm_async_handler,
176 	NULL,
177 	"IBCM"
178 };
179 
180 /* IBCM's list of HCAs registered with it */
181 static ibcm_hca_info_t	*ibcm_hca_listp = NULL;	/* CM's HCA list */
182 
183 /* Array of CM state call table functions */
184 ibcm_state_handler_t	ibcm_sm_funcs_tbl[] = {
185 	ibcm_process_req_msg,
186 	ibcm_process_mra_msg,
187 	ibcm_process_rej_msg,
188 	ibcm_process_rep_msg,
189 	ibcm_process_rtu_msg,
190 	ibcm_process_dreq_msg,
191 	ibcm_process_drep_msg,
192 	ibcm_process_sidr_req_msg,
193 	ibcm_process_sidr_rep_msg,
194 	ibcm_process_lap_msg,
195 	ibcm_process_apr_msg
196 };
197 
198 /* the following globals are CM tunables */
199 ibt_rnr_nak_time_t	ibcm_default_rnr_nak_time = IBT_RNR_NAK_655ms;
200 
201 uint32_t	ibcm_max_retries = IBCM_MAX_RETRIES;
202 clock_t		ibcm_local_processing_time = IBCM_LOCAL_RESPONSE_TIME;
203 clock_t		ibcm_remote_response_time = IBCM_REMOTE_RESPONSE_TIME;
204 ib_time_t	ibcm_max_sidr_rep_proctime = IBCM_MAX_SIDR_PROCESS_TIME;
205 ib_time_t	ibcm_max_sidr_pktlife_time = IBCM_MAX_SIDR_PKT_LIFE_TIME;
206 
207 ib_time_t	ibcm_max_sidr_rep_store_time = 18;
208 uint32_t	ibcm_wait_for_acc_cnt_timeout = 500000;	/* 500 ms */
209 uint32_t	ibcm_wait_for_res_cnt_timeout = 500000;	/* 500 ms */
210 
211 ib_time_t	ibcm_max_ib_pkt_lt = IBCM_MAX_IB_PKT_LT;
212 ib_time_t	ibcm_max_ib_mad_pkt_lt = IBCM_MAX_IB_MAD_PKT_LT;
213 
214 /*
215  * This delay accounts for time involved in various activities as follows :
216  *
217  * IBMF delays for posting the MADs in non-blocking mode
218  * IBMF delays for receiving the MADs and delivering to CM
219  * CM delays in processing the MADs before invoking client handlers,
220  * Any other delays associated with HCA driver in processing the MADs and
221  * 	other subsystems that CM may invoke (ex : SA, HCA driver)
222  */
223 uint32_t	ibcm_sw_delay	= 1000;	/* 1000us / 1ms */
224 uint32_t	ibcm_max_sa_retries = IBCM_MAX_SA_RETRIES + 1;
225 
226 /*	approx boot time */
227 uint32_t	ibcm_adj_btime = 4;	/* 4 seconds */
228 
229 /*
230  * The information in ibcm_clpinfo is kept in wireformat and is setup at
231  * init time, and used read-only after that
232  */
233 ibcm_classportinfo_msg_t	ibcm_clpinfo;
234 
235 char	*event_str[] = {
236 	"NEVER SEE THIS             ",
237 	"SESSION_ID                 ",
238 	"CHAN_HDL                   ",
239 	"LOCAL_COMID/HCA/PORT       ",
240 	"LOCAL_QPN                  ",
241 	"REMOTE_COMID/HCA           ",
242 	"REMOTE_QPN                 ",
243 	"BASE_TIME                  ",
244 	"INCOMING_REQ               ",
245 	"INCOMING_REP               ",
246 	"INCOMING_RTU               ",
247 	"INCOMING_COMEST            ",
248 	"INCOMING_MRA               ",
249 	"INCOMING_REJ               ",
250 	"INCOMING_LAP               ",
251 	"INCOMING_APR               ",
252 	"INCOMING_DREQ              ",
253 	"INCOMING_DREP              ",
254 	"OUTGOING_REQ               ",
255 	"OUTGOING_REP               ",
256 	"OUTGOING_RTU               ",
257 	"OUTGOING_LAP               ",
258 	"OUTGOING_APR               ",
259 	"OUTGOING_MRA               ",
260 	"OUTGOING_REJ               ",
261 	"OUTGOING_DREQ              ",
262 	"OUTGOING_DREP              ",
263 	"REQ_POST_COMPLETE          ",
264 	"REP_POST_COMPLETE          ",
265 	"RTU_POST_COMPLETE          ",
266 	"MRA_POST_COMPLETE          ",
267 	"REJ_POST_COMPLETE          ",
268 	"LAP_POST_COMPLETE          ",
269 	"APR_POST_COMPLETE          ",
270 	"DREQ_POST_COMPLETE         ",
271 	"DREP_POST_COMPLETE         ",
272 	"TIMEOUT_REP                ",
273 	"CALLED_REQ_RCVD_EVENT      ",
274 	"RET_REQ_RCVD_EVENT         ",
275 	"CALLED_REP_RCVD_EVENT      ",
276 	"RET_REP_RCVD_EVENT         ",
277 	"CALLED_CONN_EST_EVENT      ",
278 	"RET_CONN_EST_EVENT         ",
279 	"CALLED_CONN_FAIL_EVENT     ",
280 	"RET_CONN_FAIL_EVENT        ",
281 	"CALLED_CONN_CLOSE_EVENT    ",
282 	"RET_CONN_CLOSE_EVENT       ",
283 	"INIT_INIT                  ",
284 	"INIT_INIT_FAIL             ",
285 	"INIT_RTR                   ",
286 	"INIT_RTR_FAIL              ",
287 	"RTR_RTS                    ",
288 	"RTR_RTS_FAIL               ",
289 	"RTS_RTS                    ",
290 	"RTS_RTS_FAIL               ",
291 	"TO_ERROR                   ",
292 	"ERROR_FAIL                 ",
293 	"SET_ALT                    ",
294 	"SET_ALT_FAIL               ",
295 	"STALE_DETECT               ",
296 	"NEVER SEE THIS             "
297 };
298 
299 char	ibcm_debug_buf[IBCM_DEBUG_BUF_SIZE];
300 
301 _NOTE(SCHEME_PROTECTS_DATA("used in a localized function consistently",
302     ibcm_debug_buf))
303 _NOTE(READ_ONLY_DATA(ibcm_taskq))
304 
305 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_flags))
306 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_hdr))
307 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_ud_timeout_list_hdr))
308 
309 #ifdef DEBUG
310 int		ibcm_test_mode = 0;	/* set to 1, if running tests */
311 #endif
312 
313 
314 /* Module Driver Info */
315 static struct modlmisc ibcm_modlmisc = {
316 	&mod_miscops,
317 	"IB Communication Manager %I%"
318 };
319 
320 /* Module Linkage */
321 static struct modlinkage ibcm_modlinkage = {
322 	MODREV_1,
323 	&ibcm_modlmisc,
324 	NULL
325 };
326 
327 
328 int
329 _init(void)
330 {
331 	int		rval;
332 	ibcm_status_t	status;
333 
334 	status = ibcm_init();
335 	if (status != IBCM_SUCCESS) {
336 		IBTF_DPRINTF_L2(cmlog, "_init: ibcm failed %d", status);
337 		return (EINVAL);
338 	}
339 
340 	rval = mod_install(&ibcm_modlinkage);
341 	if (rval != 0) {
342 		IBTF_DPRINTF_L2(cmlog, "_init: ibcm mod_install failed %d",
343 		    rval);
344 		(void) ibcm_fini();
345 	}
346 
347 	IBTF_DPRINTF_L5(cmlog, "_init: ibcm successful");
348 	return (rval);
349 
350 }
351 
352 
353 int
354 _info(struct modinfo *modinfop)
355 {
356 	return (mod_info(&ibcm_modlinkage, modinfop));
357 }
358 
359 
360 int
361 _fini(void)
362 {
363 	int status;
364 
365 	if (ibcm_fini() != IBCM_SUCCESS)
366 		return (EBUSY);
367 
368 	if ((status = mod_remove(&ibcm_modlinkage)) != 0) {
369 		IBTF_DPRINTF_L2(cmlog, "_fini: ibcm mod_remove failed %d",
370 		    status);
371 		return (status);
372 	}
373 
374 	IBTF_DPRINTF_L5(cmlog, "_fini: ibcm successful");
375 
376 	return (status);
377 }
378 
379 /* Initializes all global mutex and CV in cm module */
380 static void
381 ibcm_init_locks()
382 {
383 
384 	/* Verify CM MAD sizes */
385 #ifdef DEBUG
386 
387 	if (ibcm_test_mode > 1) {
388 
389 		IBTF_DPRINTF_L1(cmlog, "REQ MAD SIZE %d",
390 		    sizeof (ibcm_req_msg_t));
391 		IBTF_DPRINTF_L1(cmlog, "REP MAD SIZE %d",
392 		    sizeof (ibcm_rep_msg_t));
393 		IBTF_DPRINTF_L1(cmlog, "RTU MAD SIZE %d",
394 		    sizeof (ibcm_rtu_msg_t));
395 		IBTF_DPRINTF_L1(cmlog, "MRA MAD SIZE %d",
396 		    sizeof (ibcm_mra_msg_t));
397 		IBTF_DPRINTF_L1(cmlog, "REJ MAD SIZE %d",
398 		    sizeof (ibcm_rej_msg_t));
399 		IBTF_DPRINTF_L1(cmlog, "LAP MAD SIZE %d",
400 		    sizeof (ibcm_lap_msg_t));
401 		IBTF_DPRINTF_L1(cmlog, "APR MAD SIZE %d",
402 		    sizeof (ibcm_apr_msg_t));
403 		IBTF_DPRINTF_L1(cmlog, "DREQ MAD SIZE %d",
404 		    sizeof (ibcm_dreq_msg_t));
405 		IBTF_DPRINTF_L1(cmlog, "DREP MAD SIZE %d",
406 		    sizeof (ibcm_drep_msg_t));
407 		IBTF_DPRINTF_L1(cmlog, "SIDR REQ MAD SIZE %d",
408 		    sizeof (ibcm_sidr_req_msg_t));
409 		IBTF_DPRINTF_L1(cmlog, "SIDR REP MAD SIZE %d",
410 		    sizeof (ibcm_sidr_rep_msg_t));
411 	}
412 
413 #endif
414 
415 	/* Create all global locks within cm module */
416 	mutex_init(&ibcm_svc_info_lock, NULL, MUTEX_DEFAULT, NULL);
417 	mutex_init(&ibcm_timeout_list_lock, NULL, MUTEX_DEFAULT, NULL);
418 	mutex_init(&ibcm_global_hca_lock, NULL, MUTEX_DEFAULT, NULL);
419 	mutex_init(&ibcm_sa_open_lock, NULL, MUTEX_DEFAULT, NULL);
420 	mutex_init(&ibcm_recv_mutex, NULL, MUTEX_DEFAULT, NULL);
421 	mutex_init(&ibcm_sm_notice_serialize_lock, NULL, MUTEX_DEFAULT, NULL);
422 	mutex_init(&ibcm_qp_list_lock, NULL, MUTEX_DEFAULT, NULL);
423 	mutex_init(&ibcm_trace_mutex, NULL, MUTEX_DEFAULT, NULL);
424 	mutex_init(&ibcm_trace_print_mutex, NULL, MUTEX_DEFAULT, NULL);
425 	cv_init(&ibcm_svc_info_cv, NULL, CV_DRIVER, NULL);
426 	cv_init(&ibcm_timeout_list_cv, NULL, CV_DRIVER, NULL);
427 	cv_init(&ibcm_timeout_thread_done_cv, NULL, CV_DRIVER, NULL);
428 	cv_init(&ibcm_global_hca_cv, NULL, CV_DRIVER, NULL);
429 	cv_init(&ibcm_sa_open_cv, NULL, CV_DRIVER, NULL);
430 	avl_create(&ibcm_svc_avl_tree, ibcm_svc_compare,
431 	    sizeof (ibcm_svc_info_t),
432 	    offsetof(struct ibcm_svc_info_s, svc_link));
433 
434 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_locks: done");
435 }
436 
437 /* Destroys all global mutex and CV in cm module */
438 static void
439 ibcm_fini_locks()
440 {
441 	/* Destroy all global locks within cm module */
442 	mutex_destroy(&ibcm_svc_info_lock);
443 	mutex_destroy(&ibcm_timeout_list_lock);
444 	mutex_destroy(&ibcm_global_hca_lock);
445 	mutex_destroy(&ibcm_sa_open_lock);
446 	mutex_destroy(&ibcm_recv_mutex);
447 	mutex_destroy(&ibcm_sm_notice_serialize_lock);
448 	mutex_destroy(&ibcm_qp_list_lock);
449 	mutex_destroy(&ibcm_trace_mutex);
450 	mutex_destroy(&ibcm_trace_print_mutex);
451 	cv_destroy(&ibcm_svc_info_cv);
452 	cv_destroy(&ibcm_timeout_list_cv);
453 	cv_destroy(&ibcm_timeout_thread_done_cv);
454 	cv_destroy(&ibcm_global_hca_cv);
455 	cv_destroy(&ibcm_sa_open_cv);
456 	avl_destroy(&ibcm_svc_avl_tree);
457 
458 	IBTF_DPRINTF_L5(cmlog, "ibcm_fini_locks: done");
459 }
460 
461 
462 /* Initialize CM's classport info */
463 static void
464 ibcm_init_classportinfo()
465 {
466 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
467 
468 	ibcm_clpinfo.BaseVersion = IBCM_MAD_BASE_VERSION;
469 	ibcm_clpinfo.ClassVersion = IBCM_MAD_CLASS_VERSION;
470 
471 	/* For now, CM supports same capabilities at all ports */
472 	ibcm_clpinfo.CapabilityMask =
473 	    h2b16(IBCM_CPINFO_CAP_RC | IBCM_CPINFO_CAP_SIDR);
474 
475 	/* Bits 0-7 are all 0 for Communication Mgmt Class */
476 
477 	/* For now, CM has the same respvalue at all ports */
478 	ibcm_clpinfo.RespTimeValue_plus =
479 	    h2b32(ibt_usec2ib(ibcm_local_processing_time) & 0x1f);
480 
481 	/* For now, redirect fields are set to 0 */
482 	/* Trap fields are not applicable to CM, hence set to 0 */
483 
484 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
485 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_classportinfo: done");
486 }
487 
488 /*
489  * ibcm_init():
490  * 	- call ibt_attach()
491  * 	- create AVL trees
492  *	- Attach HCA handlers that are already present before
493  *	CM got loaded.
494  *
495  * Arguments:	NONE
496  *
497  * Return values:
498  *	IBCM_SUCCESS - success
499  */
500 static ibcm_status_t
501 ibcm_init(void)
502 {
503 	ibt_status_t	status;
504 	kthread_t	*t;
505 
506 	IBTF_DPRINTF_L3(cmlog, "ibcm_init:");
507 
508 	ibcm_init_classportinfo();
509 
510 	if (ibcm_init_ids() != IBCM_SUCCESS) {
511 		IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
512 		    "fatal error: vmem_create() failed");
513 		return (IBCM_FAILURE);
514 	}
515 	ibcm_init_locks();
516 
517 	if (ibcm_ar_init() != IBCM_SUCCESS) {
518 		IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
519 		    "fatal error: ibcm_ar_init() failed");
520 		ibcm_fini_ids();
521 		ibcm_fini_locks();
522 		return (IBCM_FAILURE);
523 	}
524 
525 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_taskq))
526 	ibcm_taskq = system_taskq;
527 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_taskq))
528 
529 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
530 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
531 
532 	/* Start the timeout list processing thread */
533 	ibcm_timeout_list_flags = 0;
534 	t = thread_create(NULL, 0, ibcm_process_tlist, 0, 0, &p0, TS_RUN,
535 	    ibcm_timeout_thread_pri);
536 	ibcm_timeout_thread_did = t->t_did;
537 
538 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
539 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
540 
541 	/*
542 	 * NOTE : if ibt_attach is done after ibcm_init_hcas, then some
543 	 * HCA DR events may be lost. CM could call re-init hca list
544 	 * again, but it is more complicated. Some HCA's DR's lost may
545 	 * be HCA detach, which makes hca list re-syncing and locking more
546 	 * complex
547 	 */
548 	status = ibt_attach(&ibcm_ibt_modinfo, NULL, NULL, &ibcm_ibt_handle);
549 	if (status != IBT_SUCCESS) {
550 		IBTF_DPRINTF_L2(cmlog, "ibcm_init(): ibt_attach failed %d",
551 		    status);
552 		(void) ibcm_ar_fini();
553 		ibcm_fini_ids();
554 		ibcm_fini_locks();
555 		ibcm_stop_timeout_thread();
556 		return (IBCM_FAILURE);
557 	}
558 
559 	/* Block all HCA attach/detach asyncs */
560 	mutex_enter(&ibcm_global_hca_lock);
561 
562 	ibcm_init_hcas();
563 	ibcm_finit_state = IBCM_FINIT_IDLE;
564 
565 	ibcm_path_cache_init();
566 
567 	/* Unblock any waiting HCA DR asyncs in CM */
568 	mutex_exit(&ibcm_global_hca_lock);
569 
570 	ibcm_rc_flow_control_init();
571 
572 	IBTF_DPRINTF_L4(cmlog, "ibcm_init: done");
573 	return (IBCM_SUCCESS);
574 }
575 
576 /* Allocates and initializes the "per hca" global data in CM */
577 static void
578 ibcm_init_hcas()
579 {
580 	uint_t	num_hcas = 0;
581 	ib_guid_t *guid_array;
582 	int i;
583 
584 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas:");
585 
586 	/* Get the number of HCAs */
587 	num_hcas = ibt_get_hca_list(&guid_array);
588 	IBTF_DPRINTF_L4(cmlog, "ibcm_init_hcas: ibt_get_hca_list() "
589 	    "returned %d hcas", num_hcas);
590 
591 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
592 
593 	for (i = 0; i < num_hcas; i++)
594 		ibcm_hca_attach(guid_array[i]);
595 
596 	if (num_hcas)
597 		ibt_free_hca_list(guid_array, num_hcas);
598 
599 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas: done");
600 }
601 
602 
603 /*
604  * ibcm_fini():
605  * 	- Deregister w/ ibt
606  * 	- Cleanup IBCM HCA listp
607  * 	- Destroy mutexes
608  *
609  * Arguments:	NONE
610  *
611  * Return values:
612  *	IBCM_SUCCESS - success
613  */
614 static ibcm_status_t
615 ibcm_fini(void)
616 {
617 	ibt_status_t	status;
618 
619 	IBTF_DPRINTF_L3(cmlog, "ibcm_fini:");
620 
621 	/*
622 	 * CM assumes that the all general clients got rid of all the
623 	 * established connections and service registrations, completed all
624 	 * pending SIDR operations before a call to ibcm_fini()
625 	 */
626 
627 	if (ibcm_ar_fini() != IBCM_SUCCESS) {
628 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: ibcm_ar_fini failed");
629 		return (IBCM_FAILURE);
630 	}
631 
632 	/* cleanup the svcinfo list */
633 	mutex_enter(&ibcm_svc_info_lock);
634 	if (avl_first(&ibcm_svc_avl_tree) != NULL) {
635 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
636 		    "ibcm_svc_avl_tree is not empty");
637 		mutex_exit(&ibcm_svc_info_lock);
638 		return (IBCM_FAILURE);
639 	}
640 	mutex_exit(&ibcm_svc_info_lock);
641 
642 	/* disables any new hca attach/detaches */
643 	mutex_enter(&ibcm_global_hca_lock);
644 
645 	ibcm_finit_state = IBCM_FINIT_BUSY;
646 
647 	if (ibcm_fini_hcas() != IBCM_SUCCESS) {
648 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
649 		    "some hca's still have client resources");
650 
651 		/* First, re-initialize the hcas */
652 		ibcm_init_hcas();
653 		/* and then enable the HCA asyncs */
654 		ibcm_finit_state = IBCM_FINIT_IDLE;
655 		mutex_exit(&ibcm_global_hca_lock);
656 		if (ibcm_ar_init() != IBCM_SUCCESS) {
657 			IBTF_DPRINTF_L1(cmlog, "ibcm_fini:ibcm_ar_init failed");
658 		}
659 		return (IBCM_FAILURE);
660 	}
661 
662 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
663 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
664 
665 	ASSERT(ibcm_timeout_list_hdr == NULL);
666 	ASSERT(ibcm_ud_timeout_list_hdr == NULL);
667 
668 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
669 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
670 
671 	/* Release any pending asyncs on ibcm_global_hca_lock */
672 	ibcm_finit_state = IBCM_FINIT_SUCCESS;
673 	mutex_exit(&ibcm_global_hca_lock);
674 
675 	ibcm_stop_timeout_thread();
676 
677 	/*
678 	 * Detach from IBTL. Waits until all pending asyncs are complete.
679 	 * Above cv_broadcast wakes up any waiting hca attach/detach asyncs
680 	 */
681 	status = ibt_detach(ibcm_ibt_handle);
682 
683 	/* if detach fails, CM didn't free up some resources, so assert */
684 	if (status != IBT_SUCCESS)
685 	    IBTF_DPRINTF_L1(cmlog, "ibcm_fini: ibt_detach failed %d", status);
686 
687 	ibcm_rc_flow_control_fini();
688 
689 	ibcm_path_cache_fini();
690 
691 	ibcm_fini_ids();
692 	ibcm_fini_locks();
693 	IBTF_DPRINTF_L3(cmlog, "ibcm_fini: done");
694 	return (IBCM_SUCCESS);
695 }
696 
697 /* This routine exit's the ibcm timeout thread  */
698 static void
699 ibcm_stop_timeout_thread()
700 {
701 	mutex_enter(&ibcm_timeout_list_lock);
702 
703 	/* Stop the timeout list processing thread */
704 	ibcm_timeout_list_flags =
705 	    ibcm_timeout_list_flags | IBCM_TIMEOUT_THREAD_EXIT;
706 
707 	/* Wake up, if the timeout thread is on a cv_wait */
708 	cv_signal(&ibcm_timeout_list_cv);
709 
710 	mutex_exit(&ibcm_timeout_list_lock);
711 	thread_join(ibcm_timeout_thread_did);
712 
713 	IBTF_DPRINTF_L5(cmlog, "ibcm_stop_timeout_thread: done");
714 }
715 
716 
717 /* Attempts to release all the hca's associated with CM */
718 static ibcm_status_t
719 ibcm_fini_hcas()
720 {
721 	ibcm_hca_info_t *hcap, *next;
722 
723 	IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas:");
724 
725 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
726 
727 	hcap = ibcm_hca_listp;
728 	while (hcap != NULL) {
729 		next = hcap->hca_next;
730 		if (ibcm_hca_detach(hcap) != IBCM_SUCCESS) {
731 			ibcm_hca_listp = hcap;
732 			return (IBCM_FAILURE);
733 		}
734 		hcap = next;
735 	}
736 
737 	IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas: SUCCEEDED");
738 	return (IBCM_SUCCESS);
739 }
740 
741 
742 /*
743  * ibcm_hca_attach():
744  *	Called as an asynchronous event to notify CM of an attach of HCA.
745  *	Here ibcm_hca_info_t is initialized and all fields are
746  *	filled in along with SA Access handles and IBMA handles.
747  *	Also called from ibcm_init to initialize ibcm_hca_info_t's for each
748  *	hca's
749  *
750  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
751  *	hca_guid	- HCA's guid
752  *
753  * Return values: NONE
754  */
755 static void
756 ibcm_hca_attach(ib_guid_t hcaguid)
757 {
758 	int			i;
759 	ibt_status_t		status;
760 	uint_t			nports = 0;
761 	ibcm_hca_info_t		*hcap;
762 	ibt_hca_attr_t		hca_attrs;
763 
764 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: guid = 0x%llX", hcaguid);
765 
766 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
767 
768 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*hcap))
769 
770 	status = ibt_query_hca_byguid(hcaguid, &hca_attrs);
771 	if (status != IBT_SUCCESS) {
772 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
773 		    "ibt_query_hca_byguid failed = %d", status);
774 		return;
775 	}
776 	nports = hca_attrs.hca_nports;
777 
778 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_attach: num ports = %x", nports);
779 
780 	if ((hcap = ibcm_add_hca_entry(hcaguid, nports)) == NULL)
781 		return;
782 
783 	hcap->hca_guid = hcaguid;	/* Set GUID */
784 	hcap->hca_num_ports = nports;	/* Set number of ports */
785 
786 	if (ibcm_init_hca_ids(hcap) != IBCM_SUCCESS) {
787 		ibcm_delete_hca_entry(hcap);
788 		return;
789 	}
790 
791 	/* Store the static hca attribute data */
792 	hcap->hca_caps = hca_attrs.hca_flags;
793 	hcap->hca_ack_delay = hca_attrs.hca_local_ack_delay;
794 	hcap->hca_max_rdma_in_qp = hca_attrs.hca_max_rdma_in_qp;
795 	hcap->hca_max_rdma_out_qp = hca_attrs.hca_max_rdma_out_qp;
796 
797 	/* loop thru nports and initialize IBMF handles */
798 	for (i = 0; i < hcap->hca_num_ports; i++) {
799 		status = ibt_get_port_state_byguid(hcaguid, i + 1, NULL, NULL);
800 		if (status != IBT_SUCCESS) {
801 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
802 			    "port_num %d state DOWN", i + 1);
803 		}
804 
805 		hcap->hca_port_info[i].port_hcap = hcap;
806 		hcap->hca_port_info[i].port_num = i+1;
807 
808 		if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
809 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
810 			    "ibcm_hca_init_port failed %d port_num %d",
811 			    status, i+1);
812 	}
813 
814 	/* create the "active" CM AVL tree */
815 	avl_create(&hcap->hca_active_tree, ibcm_active_node_compare,
816 	    sizeof (ibcm_state_data_t),
817 	    offsetof(struct ibcm_state_data_s, avl_active_link));
818 
819 	/* create the "passive" CM AVL tree */
820 	avl_create(&hcap->hca_passive_tree, ibcm_passive_node_compare,
821 	    sizeof (ibcm_state_data_t),
822 	    offsetof(struct ibcm_state_data_s, avl_passive_link));
823 
824 	/* create the "passive comid" CM AVL tree */
825 	avl_create(&hcap->hca_passive_comid_tree,
826 	    ibcm_passive_comid_node_compare,
827 	    sizeof (ibcm_state_data_t),
828 	    offsetof(struct ibcm_state_data_s, avl_passive_comid_link));
829 
830 	/*
831 	 * Mark the state of the HCA to "attach" only at the end
832 	 * Now CM starts accepting incoming MADs and client API calls
833 	 */
834 	hcap->hca_state = IBCM_HCA_ACTIVE;
835 
836 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*hcap))
837 
838 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: ATTACH Done");
839 }
840 
841 /*
842  * ibcm_hca_detach():
843  *	Called as an asynchronous event to notify CM of a detach of HCA.
844  *	Here ibcm_hca_info_t is freed up and all fields that
845  *	were initialized earlier are cleaned up
846  *
847  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
848  *	hca_guid    - HCA's guid
849  *
850  * Return values:
851  *	IBCM_SUCCESS	- able to detach HCA
852  *	IBCM_FAILURE	- failed to detach HCA
853  */
854 static ibcm_status_t
855 ibcm_hca_detach(ibcm_hca_info_t *hcap)
856 {
857 	int		port_index, i;
858 	ibcm_status_t	status = IBCM_SUCCESS;
859 	clock_t		absolute_time;
860 
861 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: hcap = 0x%p guid = 0x%llX",
862 	    hcap, hcap->hca_guid);
863 
864 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
865 
866 	/*
867 	 * Declare hca is going away to all CM clients. Wait until the
868 	 * access count becomes zero.
869 	 */
870 	hcap->hca_state = IBCM_HCA_NOT_ACTIVE;
871 
872 	/* wait on response CV to 500mS */
873 	absolute_time = ddi_get_lbolt() +
874 	    drv_usectohz(ibcm_wait_for_acc_cnt_timeout);
875 
876 	while (hcap->hca_acc_cnt > 0)
877 		if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock,
878 		    absolute_time) == -1)
879 			break;
880 
881 	if (hcap->hca_acc_cnt != 0) {
882 		/* We got a timeout */
883 #ifdef DEBUG
884 		if (ibcm_test_mode > 0)
885 			IBTF_DPRINTF_L1(cmlog, "ibcm_hca_detach: Unexpected "
886 			    "abort due to timeout on acc_cnt %u",
887 			    hcap->hca_acc_cnt);
888 		else
889 #endif
890 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
891 			    " to timeout on acc_cnt %u", hcap->hca_acc_cnt);
892 		hcap->hca_state = IBCM_HCA_ACTIVE;
893 		return (IBCM_FAILURE);
894 	}
895 
896 	/*
897 	 * First make sure, there are no active users of ibma handles,
898 	 * and then de-register handles.
899 	 */
900 
901 	/* make sure that there are no "Service"s registered w/ this HCA. */
902 	if (hcap->hca_svc_cnt != 0) {
903 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
904 		    "Active services still there %d", hcap->hca_svc_cnt);
905 		hcap->hca_state = IBCM_HCA_ACTIVE;
906 		return (IBCM_FAILURE);
907 	}
908 
909 	if (ibcm_check_sidr_clean(hcap) != IBCM_SUCCESS) {
910 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach:"
911 		    "There are active SIDR operations");
912 		hcap->hca_state = IBCM_HCA_ACTIVE;
913 		return (IBCM_FAILURE);
914 	}
915 
916 	if (ibcm_check_avl_clean(hcap) != IBCM_SUCCESS) {
917 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
918 		    "There are active RC connections");
919 		hcap->hca_state = IBCM_HCA_ACTIVE;
920 		return (IBCM_FAILURE);
921 	}
922 
923 	/*
924 	 * Now, wait until all rc and sidr stateps go away
925 	 * All these stateps must be short lived ones, waiting to be cleaned
926 	 * up after some timeout value, based on the current state.
927 	 */
928 	IBTF_DPRINTF_L5(cmlog, "hca_guid = 0x%llX res_cnt = %d",
929 	    hcap->hca_guid, hcap->hca_res_cnt);
930 
931 	/* wait on response CV to 500mS */
932 	absolute_time = ddi_get_lbolt() +
933 	    drv_usectohz(ibcm_wait_for_res_cnt_timeout);
934 
935 	while (hcap->hca_res_cnt > 0)
936 		if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock,
937 		    absolute_time) == -1)
938 			break;
939 
940 	if (hcap->hca_res_cnt != 0) {
941 		/* We got a timeout waiting for hca_res_cnt to become 0 */
942 #ifdef DEBUG
943 		if (ibcm_test_mode > 0)
944 			IBTF_DPRINTF_L1(cmlog, "ibcm_hca_detach: Unexpected "
945 			    "abort due to timeout on res_cnt %d",
946 			    hcap->hca_res_cnt);
947 		else
948 #endif
949 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
950 			    " to timeout on res_cnt %d", hcap->hca_res_cnt);
951 		hcap->hca_state = IBCM_HCA_ACTIVE;
952 		return (IBCM_FAILURE);
953 	}
954 
955 	/* Re-assert the while loop step above */
956 	ASSERT(hcap->hca_sidr_list == NULL);
957 	avl_destroy(&hcap->hca_active_tree);
958 	avl_destroy(&hcap->hca_passive_tree);
959 	avl_destroy(&hcap->hca_passive_comid_tree);
960 
961 	/*
962 	 * Unregister all ports from IBMA
963 	 * If there is a failure, re-initialize any free'd ibma handles. This
964 	 * is required to receive the incoming mads
965 	 */
966 	status = IBCM_SUCCESS;
967 	for (port_index = 0; port_index < hcap->hca_num_ports; port_index++) {
968 		if ((status = ibcm_hca_fini_port(hcap, port_index)) !=
969 		    IBCM_SUCCESS) {
970 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
971 			    "Failed to free IBMA Handle for port_num %d",
972 			    port_index + 1);
973 			break;
974 		}
975 	}
976 
977 	/* If detach fails, re-initialize ibma handles for incoming mads */
978 	if (status != IBCM_SUCCESS)  {
979 		for (i = 0; i < port_index; i++) {
980 			if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
981 				IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
982 				    "Failed to re-allocate IBMA Handles for"
983 				    " port_num %d", port_index + 1);
984 		}
985 		hcap->hca_state = IBCM_HCA_ACTIVE;
986 		return (IBCM_FAILURE);
987 	}
988 
989 	ibcm_fini_hca_ids(hcap);
990 	ibcm_delete_hca_entry(hcap);
991 
992 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: DETACH succeeded");
993 	return (IBCM_SUCCESS);
994 }
995 
996 /* Checks, if there are any active sidr state entries in the specified hca */
997 static ibcm_status_t
998 ibcm_check_sidr_clean(ibcm_hca_info_t *hcap)
999 {
1000 	ibcm_ud_state_data_t	*usp;
1001 	uint32_t		transient_cnt = 0;
1002 
1003 	IBTF_DPRINTF_L5(cmlog, "ibcm_check_sidr_clean:");
1004 
1005 	rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);
1006 	usp = hcap->hca_sidr_list;	/* Point to the list */
1007 	while (usp != NULL) {
1008 		mutex_enter(&usp->ud_state_mutex);
1009 		if ((usp->ud_state != IBCM_STATE_SIDR_REP_SENT) &&
1010 		    (usp->ud_state != IBCM_STATE_TIMED_OUT) &&
1011 		    (usp->ud_state != IBCM_STATE_DELETE)) {
1012 
1013 			IBTF_DPRINTF_L3(cmlog, "ibcm_check_sidr_clean:"
1014 			    "usp = %p not in transient state = %d", usp,
1015 			    usp->ud_state);
1016 
1017 			mutex_exit(&usp->ud_state_mutex);
1018 			rw_exit(&hcap->hca_sidr_list_lock);
1019 			return (IBCM_FAILURE);
1020 		} else {
1021 			mutex_exit(&usp->ud_state_mutex);
1022 			++transient_cnt;
1023 		}
1024 
1025 		usp = usp->ud_nextp;
1026 	}
1027 	rw_exit(&hcap->hca_sidr_list_lock);
1028 
1029 	IBTF_DPRINTF_L4(cmlog, "ibcm_check_sidr_clean: transient_cnt %d",
1030 	    transient_cnt);
1031 
1032 	return (IBCM_SUCCESS);
1033 }
1034 
1035 /* Checks, if there are any active rc state entries, in the specified hca */
1036 static ibcm_status_t
1037 ibcm_check_avl_clean(ibcm_hca_info_t *hcap)
1038 
1039 {
1040 	ibcm_state_data_t	*sp;
1041 	avl_tree_t		*avl_tree;
1042 	uint32_t		transient_cnt = 0;
1043 
1044 	IBTF_DPRINTF_L5(cmlog, "ibcm_check_avl_clean:");
1045 	/*
1046 	 * Both the trees ie., active and passive must reference to all
1047 	 * statep's, so let's use one
1048 	 */
1049 	avl_tree = &hcap->hca_active_tree;
1050 
1051 	rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
1052 
1053 	for (sp = avl_first(avl_tree); sp != NULL;
1054 	    sp = avl_walk(avl_tree, sp, AVL_AFTER)) {
1055 		mutex_enter(&sp->state_mutex);
1056 		if ((sp->state != IBCM_STATE_TIMEWAIT) &&
1057 		    (sp->state != IBCM_STATE_REJ_SENT) &&
1058 		    (sp->state != IBCM_STATE_DELETE)) {
1059 			IBTF_DPRINTF_L3(cmlog, "ibcm_check_avl_clean:"
1060 			    "sp = %p not in transient state = %d", sp,
1061 			    sp->state);
1062 			mutex_exit(&sp->state_mutex);
1063 			rw_exit(&hcap->hca_state_rwlock);
1064 			return (IBCM_FAILURE);
1065 		} else {
1066 			mutex_exit(&sp->state_mutex);
1067 			++transient_cnt;
1068 		}
1069 	}
1070 
1071 	rw_exit(&hcap->hca_state_rwlock);
1072 
1073 	IBTF_DPRINTF_L4(cmlog, "ibcm_check_avl_clean: transient_cnt %d",
1074 	    transient_cnt);
1075 
1076 	return (IBCM_SUCCESS);
1077 }
1078 
1079 /* Adds a new entry into CM's global hca list, if hca_guid is not there yet */
1080 static ibcm_hca_info_t *
1081 ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports)
1082 {
1083 	ibcm_hca_info_t	*hcap;
1084 
1085 	IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: guid = 0x%llX",
1086 	    hcaguid);
1087 
1088 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1089 
1090 	/*
1091 	 * Check if this hca_guid already in the list
1092 	 * If yes, then ignore this and return NULL
1093 	 */
1094 
1095 	hcap = ibcm_hca_listp;
1096 
1097 	/* search for this HCA */
1098 	while (hcap != NULL) {
1099 		if (hcap->hca_guid == hcaguid) {
1100 			/* already exists */
1101 			IBTF_DPRINTF_L2(cmlog, "ibcm_add_hca_entry: "
1102 			"hcap %p guid 0x%llX, entry already exists !!",
1103 			hcap, hcap->hca_guid);
1104 			return (NULL);
1105 		}
1106 		hcap = hcap->hca_next;
1107 	}
1108 
1109 	/* Allocate storage for the new HCA entry found */
1110 	hcap = kmem_zalloc(sizeof (ibcm_hca_info_t) +
1111 	    (nports - 1) * sizeof (ibcm_port_info_t), KM_SLEEP);
1112 
1113 	/* initialize RW lock */
1114 	rw_init(&hcap->hca_state_rwlock, NULL, RW_DRIVER, NULL);
1115 	/* initialize SIDR list lock */
1116 	rw_init(&hcap->hca_sidr_list_lock, NULL, RW_DRIVER, NULL);
1117 	/* Insert "hcap" into the global HCA list maintained by CM */
1118 	hcap->hca_next = ibcm_hca_listp;
1119 	ibcm_hca_listp = hcap;
1120 
1121 	IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: done hcap = 0x%p", hcap);
1122 
1123 	return (hcap);
1124 
1125 }
1126 
1127 /* deletes the given ibcm_hca_info_t from CM's global hca list */
1128 void
1129 ibcm_delete_hca_entry(ibcm_hca_info_t *hcap)
1130 {
1131 
1132 	ibcm_hca_info_t	*headp, *prevp = NULL;
1133 
1134 	/* ibcm_hca_global_lock is held */
1135 	IBTF_DPRINTF_L5(cmlog, "ibcm_delete_hca_entry: guid = 0x%llX "
1136 	    "hcap = 0x%p", hcap->hca_guid, hcap);
1137 
1138 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1139 
1140 	headp = ibcm_hca_listp;
1141 	while (headp != NULL) {
1142 		if (headp == hcap) {
1143 			IBTF_DPRINTF_L3(cmlog, "ibcm_delete_hca_entry: "
1144 			    "deleting hcap %p hcaguid %llX", hcap,
1145 			    hcap->hca_guid);
1146 			if (prevp) {
1147 				prevp->hca_next = headp->hca_next;
1148 			} else {
1149 				prevp = headp->hca_next;
1150 				ibcm_hca_listp = prevp;
1151 			}
1152 			rw_destroy(&hcap->hca_state_rwlock);
1153 			rw_destroy(&hcap->hca_sidr_list_lock);
1154 			kmem_free(hcap, sizeof (ibcm_hca_info_t) +
1155 			    (hcap->hca_num_ports - 1) *
1156 			    sizeof (ibcm_port_info_t));
1157 			return;
1158 		}
1159 
1160 		prevp = headp;
1161 		headp = headp->hca_next;
1162 	}
1163 }
1164 
1165 /*
1166  * ibcm_find_hca_entry:
1167  *	Given a HCA's GUID find out ibcm_hca_info_t entry for that HCA
1168  *	This entry can be then used to access AVL tree/SIDR list etc.
1169  *	If entry exists and in HCA ATTACH state, then hca's ref cnt is
1170  *	incremented and entry returned. Else NULL returned.
1171  *
1172  *	All functions that use ibcm_find_hca_entry and get a non-NULL
1173  *	return values must call ibcm_dec_hca_acc_cnt to decrement the
1174  *	respective hca ref cnt. There shouldn't be any usage of
1175  *	ibcm_hca_info_t * returned from ibcm_find_hca_entry,
1176  *	after decrementing the hca_acc_cnt
1177  *
1178  * INPUTS:
1179  *	hca_guid	- HCA's guid
1180  *
1181  * RETURN VALUE:
1182  *	hcap		- if a match is found, else NULL
1183  */
1184 ibcm_hca_info_t *
1185 ibcm_find_hca_entry(ib_guid_t hca_guid)
1186 {
1187 	ibcm_hca_info_t *hcap;
1188 
1189 	IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: guid = 0x%llX", hca_guid);
1190 
1191 	mutex_enter(&ibcm_global_hca_lock);
1192 
1193 	hcap = ibcm_hca_listp;
1194 	/* search for this HCA */
1195 	while (hcap != NULL) {
1196 		if (hcap->hca_guid == hca_guid)
1197 			break;
1198 		hcap = hcap->hca_next;
1199 	}
1200 
1201 	/* if no hcap for the hca_guid, return NULL */
1202 	if (hcap == NULL) {
1203 		mutex_exit(&ibcm_global_hca_lock);
1204 		return (NULL);
1205 	}
1206 
1207 	/* return hcap, only if it valid to use */
1208 	if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1209 		++(hcap->hca_acc_cnt);
1210 
1211 		IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: "
1212 		    "found hcap = 0x%p hca_acc_cnt %u", hcap,
1213 		    hcap->hca_acc_cnt);
1214 
1215 		mutex_exit(&ibcm_global_hca_lock);
1216 		return (hcap);
1217 	} else {
1218 		mutex_exit(&ibcm_global_hca_lock);
1219 
1220 		IBTF_DPRINTF_L2(cmlog, "ibcm_find_hca_entry: "
1221 		    "found hcap = 0x%p not in active state", hcap);
1222 		return (NULL);
1223 	}
1224 }
1225 
1226 /*
1227  * Searches for ibcm_hca_info_t entry based on hca_guid, but doesn't increment
1228  * the hca's reference count. This function is used, where the calling context
1229  * is attempting to delete hcap itself and hence acc_cnt cannot be incremented
1230  * OR assumes that valid hcap must be available in ibcm's global hca list.
1231  */
1232 ibcm_hca_info_t *
1233 ibcm_find_hcap_entry(ib_guid_t hca_guid)
1234 {
1235 	ibcm_hca_info_t *hcap;
1236 
1237 	IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: guid = 0x%llX", hca_guid);
1238 
1239 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1240 
1241 	hcap = ibcm_hca_listp;
1242 	/* search for this HCA */
1243 	while (hcap != NULL) {
1244 		if (hcap->hca_guid == hca_guid)
1245 			break;
1246 		hcap = hcap->hca_next;
1247 	}
1248 
1249 	if (hcap == NULL)
1250 		IBTF_DPRINTF_L2(cmlog, "ibcm_find_hcap_entry: No hcap found for"
1251 		    " hca_guid 0x%llX", hca_guid);
1252 	else
1253 		IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: hcap found for"
1254 		    " hca_guid 0x%llX", hca_guid);
1255 
1256 	return (hcap);
1257 }
1258 
1259 /* increment the hca's temporary reference count */
1260 ibcm_status_t
1261 ibcm_inc_hca_acc_cnt(ibcm_hca_info_t *hcap)
1262 {
1263 	mutex_enter(&ibcm_global_hca_lock);
1264 	if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1265 		++(hcap->hca_acc_cnt);
1266 		IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_acc_cnt: "
1267 		    "hcap = 0x%p  acc_cnt = %d ", hcap, hcap->hca_acc_cnt);
1268 		mutex_exit(&ibcm_global_hca_lock);
1269 		return (IBCM_SUCCESS);
1270 	} else {
1271 		IBTF_DPRINTF_L2(cmlog, "ibcm_inc_hca_acc_cnt: "
1272 		    "hcap INACTIVE 0x%p  acc_cnt = %d ", hcap,
1273 		    hcap->hca_acc_cnt);
1274 		mutex_exit(&ibcm_global_hca_lock);
1275 		return (IBCM_FAILURE);
1276 	}
1277 }
1278 
1279 /* decrement the hca's ref count, and wake up any waiting threads */
1280 void
1281 ibcm_dec_hca_acc_cnt(ibcm_hca_info_t *hcap)
1282 {
1283 	mutex_enter(&ibcm_global_hca_lock);
1284 	ASSERT(hcap->hca_acc_cnt > 0);
1285 	--(hcap->hca_acc_cnt);
1286 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_acc_cnt: hcap = 0x%p"
1287 	    "acc_cnt = %d", hcap, hcap->hca_acc_cnt);
1288 	if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1289 	    (hcap->hca_acc_cnt == 0)) {
1290 		IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_acc_cnt: "
1291 		    "cv_broadcast for hcap = 0x%p", hcap);
1292 		cv_broadcast(&ibcm_global_hca_cv);
1293 	}
1294 	mutex_exit(&ibcm_global_hca_lock);
1295 }
1296 
1297 /* increment the hca's resource count */
1298 void
1299 ibcm_inc_hca_res_cnt(ibcm_hca_info_t *hcap)
1300 
1301 {
1302 	mutex_enter(&ibcm_global_hca_lock);
1303 	++(hcap->hca_res_cnt);
1304 	IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_res_cnt: hcap = 0x%p"
1305 	    "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1306 	mutex_exit(&ibcm_global_hca_lock);
1307 }
1308 
1309 /* decrement the hca's resource count, and wake up any waiting threads */
1310 void
1311 ibcm_dec_hca_res_cnt(ibcm_hca_info_t *hcap)
1312 {
1313 	mutex_enter(&ibcm_global_hca_lock);
1314 	ASSERT(hcap->hca_res_cnt > 0);
1315 	--(hcap->hca_res_cnt);
1316 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_res_cnt: hcap = 0x%p"
1317 	    "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1318 	if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1319 	    (hcap->hca_res_cnt == 0)) {
1320 		IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_res_cnt: "
1321 		    "cv_broadcast for hcap = 0x%p", hcap);
1322 		cv_broadcast(&ibcm_global_hca_cv);
1323 	}
1324 	mutex_exit(&ibcm_global_hca_lock);
1325 }
1326 
1327 /* increment the hca's service count */
1328 void
1329 ibcm_inc_hca_svc_cnt(ibcm_hca_info_t *hcap)
1330 
1331 {
1332 	mutex_enter(&ibcm_global_hca_lock);
1333 	++(hcap->hca_svc_cnt);
1334 	IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_svc_cnt: hcap = 0x%p"
1335 	    "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1336 	mutex_exit(&ibcm_global_hca_lock);
1337 }
1338 
1339 /* decrement the hca's service count */
1340 void
1341 ibcm_dec_hca_svc_cnt(ibcm_hca_info_t *hcap)
1342 {
1343 	mutex_enter(&ibcm_global_hca_lock);
1344 	ASSERT(hcap->hca_svc_cnt > 0);
1345 	--(hcap->hca_svc_cnt);
1346 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_svc_cnt: hcap = 0x%p"
1347 	    "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1348 	mutex_exit(&ibcm_global_hca_lock);
1349 }
1350 
1351 /*
1352  * Flow control logic for open_rc_channel and close_rc_channel follows.
1353  * We use one instance of the same data structure to control each of
1354  * "open" and "close".  We allow up to IBCM_FLOW_SIMUL_MAX requests to be
1355  * initiated at one time.  We initiate the next group any time there are
1356  * less than IBCM_FLOW_LOW_WATER.
1357  */
1358 
1359 /* These variables are for both open_rc_channel and close_rc_channel */
1360 #define	IBCM_FLOW_SIMUL_MAX	32
1361 int ibcm_flow_simul_max = IBCM_FLOW_SIMUL_MAX;
1362 #define	IBCM_SAA_SIMUL_MAX	8
1363 int ibcm_saa_simul_max = IBCM_SAA_SIMUL_MAX;
1364 
1365 typedef struct ibcm_flow1_s {
1366 	struct ibcm_flow1_s	*link;
1367 	kcondvar_t		cv;
1368 	uint8_t			waiters;	/* 1 to IBCM_FLOW_SIMUL_MAX */
1369 } ibcm_flow1_t;
1370 
1371 typedef struct ibcm_flow_s {
1372 	ibcm_flow1_t	*list;
1373 	uint_t		simul;		/* #requests currently outstanding */
1374 	uint_t		simul_max;
1375 	uint_t		waiters_per_chunk;
1376 	uint_t		lowat;
1377 	uint_t		lowat_default;
1378 	/* statistics */
1379 	uint_t		total;
1380 	uint_t		enable_queuing;
1381 } ibcm_flow_t;
1382 
1383 kmutex_t ibcm_rc_flow_mutex;
1384 ibcm_flow_t ibcm_open_flow;
1385 ibcm_flow_t ibcm_close_flow;
1386 ibcm_flow_t ibcm_saa_flow;
1387 
1388 static void
1389 ibcm_flow_init(ibcm_flow_t *flow, uint_t simul_max)
1390 {
1391 	flow->list = NULL;
1392 	flow->simul = 0;
1393 	flow->waiters_per_chunk = 4;
1394 	flow->simul_max = simul_max;
1395 	flow->lowat = flow->simul_max - flow->waiters_per_chunk;
1396 	flow->lowat_default = flow->lowat;
1397 	/* stats */
1398 	flow->total = 0;
1399 	flow->enable_queuing = 0;
1400 }
1401 
1402 static void
1403 ibcm_rc_flow_control_init(void)
1404 {
1405 	mutex_init(&ibcm_rc_flow_mutex, NULL, MUTEX_DEFAULT, NULL);
1406 	mutex_enter(&ibcm_rc_flow_mutex);
1407 	ibcm_flow_init(&ibcm_open_flow, ibcm_flow_simul_max);
1408 	ibcm_flow_init(&ibcm_close_flow, ibcm_flow_simul_max);
1409 	ibcm_flow_init(&ibcm_saa_flow, ibcm_saa_simul_max);
1410 	mutex_exit(&ibcm_rc_flow_mutex);
1411 }
1412 
1413 static void
1414 ibcm_rc_flow_control_fini(void)
1415 {
1416 	mutex_destroy(&ibcm_rc_flow_mutex);
1417 }
1418 
1419 static ibcm_flow1_t *
1420 ibcm_flow_find(ibcm_flow_t *flow)
1421 {
1422 	ibcm_flow1_t *flow1;
1423 	ibcm_flow1_t *f;
1424 
1425 	f = flow->list;
1426 	if (f) {	/* most likely code path */
1427 		while (f->link != NULL)
1428 			f = f->link;
1429 		if (f->waiters < flow->waiters_per_chunk)
1430 			return (f);
1431 	}
1432 
1433 	/* There was no flow1 list element ready for another waiter */
1434 	mutex_exit(&ibcm_rc_flow_mutex);
1435 	flow1 = kmem_alloc(sizeof (*flow1), KM_SLEEP);
1436 	mutex_enter(&ibcm_rc_flow_mutex);
1437 
1438 	f = flow->list;
1439 	if (f) {
1440 		while (f->link != NULL)
1441 			f = f->link;
1442 		if (f->waiters < flow->waiters_per_chunk) {
1443 			kmem_free(flow1, sizeof (*flow1));
1444 			return (f);
1445 		}
1446 		f->link = flow1;
1447 	} else {
1448 		flow->list = flow1;
1449 	}
1450 	cv_init(&flow1->cv, NULL, CV_DRIVER, NULL);
1451 	flow1->waiters = 0;
1452 	flow1->link = NULL;
1453 	return (flow1);
1454 }
1455 
1456 static void
1457 ibcm_flow_enter(ibcm_flow_t *flow)
1458 {
1459 	mutex_enter(&ibcm_rc_flow_mutex);
1460 	if (flow->list == NULL && flow->simul < flow->simul_max) {
1461 		flow->simul++;
1462 		flow->total++;
1463 	} else {
1464 		ibcm_flow1_t *flow1;
1465 
1466 		flow1 = ibcm_flow_find(flow);
1467 		flow1->waiters++;
1468 		cv_wait(&flow1->cv, &ibcm_rc_flow_mutex);
1469 		if (--flow1->waiters == 0) {
1470 			cv_destroy(&flow1->cv);
1471 			kmem_free(flow1, sizeof (*flow1));
1472 		}
1473 	}
1474 	mutex_exit(&ibcm_rc_flow_mutex);
1475 }
1476 
1477 static void
1478 ibcm_flow_exit(ibcm_flow_t *flow)
1479 {
1480 	mutex_enter(&ibcm_rc_flow_mutex);
1481 	if (--flow->simul < flow->lowat) {
1482 		flow->lowat += flow->waiters_per_chunk;
1483 		if (flow->lowat > flow->lowat_default)
1484 			flow->lowat = flow->lowat_default;
1485 		if (flow->list) {
1486 			ibcm_flow1_t *flow1;
1487 
1488 			flow1 = flow->list;
1489 			flow->list = flow1->link;	/* unlink */
1490 			flow1->link = NULL;		/* be clean */
1491 			flow->total += flow1->waiters;
1492 			flow->simul += flow1->waiters;
1493 			cv_broadcast(&flow1->cv);
1494 		}
1495 	}
1496 	mutex_exit(&ibcm_rc_flow_mutex);
1497 }
1498 
1499 static void
1500 ibcm_flow_stall(ibcm_flow_t *flow)
1501 {
1502 	mutex_enter(&ibcm_rc_flow_mutex);
1503 	if (flow->lowat > 1) {
1504 		flow->lowat >>= 1;
1505 		IBTF_DPRINTF_L2(cmlog, "stall - lowat = %d", flow->lowat);
1506 	}
1507 	mutex_exit(&ibcm_rc_flow_mutex);
1508 }
1509 
1510 void
1511 ibcm_rc_flow_control_enter(void)
1512 {
1513 	ibcm_flow_enter(&ibcm_open_flow);
1514 }
1515 
1516 void
1517 ibcm_rc_flow_control_exit(void)
1518 {
1519 	ibcm_flow_exit(&ibcm_open_flow);
1520 }
1521 
1522 void
1523 ibcm_close_flow_control_enter()
1524 {
1525 	ibcm_flow_enter(&ibcm_close_flow);
1526 }
1527 
1528 void
1529 ibcm_close_flow_control_exit()
1530 {
1531 	ibcm_flow_exit(&ibcm_close_flow);
1532 }
1533 
1534 void
1535 ibcm_rc_flow_control_stall()
1536 {
1537 	ibcm_flow_stall(&ibcm_open_flow);
1538 }
1539 
1540 void
1541 ibcm_sa_access_enter()
1542 {
1543 	ibcm_flow_enter(&ibcm_saa_flow);
1544 }
1545 
1546 void
1547 ibcm_sa_access_exit()
1548 {
1549 	ibcm_flow_exit(&ibcm_saa_flow);
1550 }
1551 
1552 static void
1553 ibcm_sm_notice_handler(ibmf_saa_handle_t saa_handle,
1554     ibmf_saa_subnet_event_t saa_event_code,
1555     ibmf_saa_event_details_t *saa_event_details,
1556     void *callback_arg)
1557 {
1558 	ibcm_port_info_t	*portp = (ibcm_port_info_t *)callback_arg;
1559 	ibt_subnet_event_code_t code;
1560 	ibt_subnet_event_t	event;
1561 	uint8_t			event_status;
1562 
1563 	IBTF_DPRINTF_L3(cmlog, "ibcm_sm_notice_handler: saa_hdl %p, code = %d",
1564 	    saa_handle, saa_event_code);
1565 
1566 	mutex_enter(&ibcm_sm_notice_serialize_lock);
1567 
1568 	switch (saa_event_code) {
1569 	case IBMF_SAA_EVENT_MCG_CREATED:
1570 		code = IBT_SM_EVENT_MCG_CREATED;
1571 		break;
1572 	case IBMF_SAA_EVENT_MCG_DELETED:
1573 		code = IBT_SM_EVENT_MCG_DELETED;
1574 		break;
1575 	case IBMF_SAA_EVENT_GID_AVAILABLE:
1576 		code = IBT_SM_EVENT_GID_AVAIL;
1577 		ibcm_path_cache_purge();
1578 		break;
1579 	case IBMF_SAA_EVENT_GID_UNAVAILABLE:
1580 		code = IBT_SM_EVENT_GID_UNAVAIL;
1581 		ibcm_path_cache_purge();
1582 		break;
1583 	case IBMF_SAA_EVENT_SUBSCRIBER_STATUS_CHG:
1584 		event_status =
1585 		    saa_event_details->ie_producer_event_status_mask &
1586 		    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
1587 		if (event_status == (portp->port_event_status &
1588 		    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)) {
1589 			mutex_exit(&ibcm_sm_notice_serialize_lock);
1590 			return;	/* no change */
1591 		}
1592 		portp->port_event_status = event_status;
1593 		if (event_status == IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)
1594 			code = IBT_SM_EVENT_AVAILABLE;
1595 		else
1596 			code = IBT_SM_EVENT_UNAVAILABLE;
1597 		break;
1598 	default:
1599 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1600 		return;
1601 	}
1602 
1603 	mutex_enter(&ibcm_global_hca_lock);
1604 
1605 	/* don't send the event if we're tearing down */
1606 	if (!IBCM_ACCESS_HCA_OK(portp->port_hcap)) {
1607 		mutex_exit(&ibcm_global_hca_lock);
1608 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1609 		return;
1610 	}
1611 
1612 	++(portp->port_hcap->hca_acc_cnt);
1613 	mutex_exit(&ibcm_global_hca_lock);
1614 
1615 	event.sm_notice_gid = saa_event_details->ie_gid;
1616 	ibtl_cm_sm_notice_handler(portp->port_sgid0, code, &event);
1617 
1618 	mutex_exit(&ibcm_sm_notice_serialize_lock);
1619 
1620 	ibcm_dec_hca_acc_cnt(portp->port_hcap);
1621 }
1622 
1623 void
1624 ibt_register_subnet_notices(ibt_clnt_hdl_t ibt_hdl,
1625     ibt_sm_notice_handler_t sm_notice_handler, void *private)
1626 {
1627 	ibcm_port_info_t	*portp;
1628 	ibcm_hca_info_t		*hcap;
1629 	uint8_t			port;
1630 	int			num_failed_sgids;
1631 	ibtl_cm_sm_init_fail_t	*ifail;
1632 	ib_gid_t		*sgidp;
1633 
1634 	IBTF_DPRINTF_L3(cmlog, "ibt_register_subnet_notices: ibt_hdl = %p",
1635 	    ibt_hdl);
1636 
1637 	mutex_enter(&ibcm_sm_notice_serialize_lock);
1638 
1639 	ibtl_cm_set_sm_notice_handler(ibt_hdl, sm_notice_handler, private);
1640 	if (sm_notice_handler == NULL) {
1641 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1642 		return;
1643 	}
1644 
1645 	/* for each port, if service is not available, make a call */
1646 	mutex_enter(&ibcm_global_hca_lock);
1647 	num_failed_sgids = 0;
1648 	hcap = ibcm_hca_listp;
1649 	while (hcap != NULL) {
1650 		portp = hcap->hca_port_info;
1651 		for (port = 0; port < hcap->hca_num_ports; port++) {
1652 			if (!(portp->port_event_status &
1653 			    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1654 				num_failed_sgids++;
1655 			portp++;
1656 		}
1657 		hcap = hcap->hca_next;
1658 	}
1659 	if (num_failed_sgids != 0) {
1660 		ifail = kmem_alloc(sizeof (*ifail) +
1661 		    (num_failed_sgids - 1) * sizeof (ib_gid_t), KM_SLEEP);
1662 		ifail->smf_num_sgids = num_failed_sgids;
1663 		ifail->smf_ibt_hdl = ibt_hdl;
1664 		sgidp = &ifail->smf_sgid[0];
1665 		hcap = ibcm_hca_listp;
1666 		while (hcap != NULL) {
1667 			portp = hcap->hca_port_info;
1668 			for (port = 0; port < hcap->hca_num_ports; port++) {
1669 				if (!(portp->port_event_status &
1670 				    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1671 					*sgidp++ = portp->port_sgid0;
1672 				portp++;
1673 			}
1674 			hcap = hcap->hca_next;
1675 		}
1676 	}
1677 	mutex_exit(&ibcm_global_hca_lock);
1678 
1679 	if (num_failed_sgids != 0) {
1680 		ibtl_cm_sm_notice_init_failure(ifail);
1681 		kmem_free(ifail, sizeof (*ifail) +
1682 		    (num_failed_sgids - 1) * sizeof (ib_gid_t));
1683 	}
1684 	mutex_exit(&ibcm_sm_notice_serialize_lock);
1685 }
1686 
1687 /* The following is run from a taskq because we've seen the stack overflow. */
1688 static void
1689 ibcm_init_saa(void *arg)
1690 {
1691 	ibcm_port_info_t		*portp = (ibcm_port_info_t *)arg;
1692 	int				status;
1693 	ib_guid_t			port_guid;
1694 	ibmf_saa_subnet_event_args_t	event_args;
1695 
1696 	port_guid = portp->port_sgid0.gid_guid;
1697 
1698 	IBTF_DPRINTF_L3(cmlog, "ibcm_init_saa: port guid %llX", port_guid);
1699 
1700 	event_args.is_event_callback_arg = portp;
1701 	event_args.is_event_callback = ibcm_sm_notice_handler;
1702 
1703 	if ((status = ibmf_sa_session_open(port_guid, 0, &event_args,
1704 	    IBMF_VERSION, 0, &portp->port_ibmf_saa_hdl)) != IBMF_SUCCESS) {
1705 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa "
1706 		    "ibmf_sa_session_open failed for port guid %llX "
1707 		    "status = %d", port_guid, status);
1708 	} else {
1709 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa "
1710 		    "registered sa_hdl 0x%p for port guid %llX",
1711 		    portp->port_ibmf_saa_hdl, port_guid);
1712 	}
1713 
1714 	mutex_enter(&ibcm_sa_open_lock);
1715 	portp->port_saa_open_in_progress = 0;
1716 	cv_broadcast(&ibcm_sa_open_cv);
1717 	mutex_exit(&ibcm_sa_open_lock);
1718 }
1719 
1720 void
1721 ibcm_init_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
1722 {
1723 	ibmf_saa_handle_t	saa_handle;
1724 	uint8_t			port_index = port - 1;
1725 	ibcm_port_info_t	*portp = &hcap->hca_port_info[port_index];
1726 	ibt_status_t		ibt_status;
1727 
1728 	if (port_index >= hcap->hca_num_ports)
1729 		return;
1730 
1731 	mutex_enter(&ibcm_sa_open_lock);
1732 	if (portp->port_saa_open_in_progress) {
1733 		mutex_exit(&ibcm_sa_open_lock);
1734 		return;
1735 	}
1736 
1737 	saa_handle = portp->port_ibmf_saa_hdl;
1738 	if (saa_handle != NULL) {
1739 		mutex_exit(&ibcm_sa_open_lock);
1740 		return;
1741 	}
1742 
1743 	portp->port_saa_open_in_progress = 1;
1744 	mutex_exit(&ibcm_sa_open_lock);
1745 
1746 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(portp->port_event_status))
1747 
1748 	/* The assumption is that we're getting event notifications */
1749 	portp->port_event_status = IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
1750 
1751 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(portp->port_event_status))
1752 
1753 	ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
1754 	    portp->port_num, &portp->port_sgid0, NULL);
1755 	if (ibt_status != IBT_SUCCESS) {
1756 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa_handle "
1757 		    "ibt_get_port_state_byguid failed for guid %llX "
1758 		    "with status %d", portp->port_hcap->hca_guid, ibt_status);
1759 		mutex_enter(&ibcm_sa_open_lock);
1760 		portp->port_saa_open_in_progress = 0;
1761 		cv_broadcast(&ibcm_sa_open_cv);
1762 		mutex_exit(&ibcm_sa_open_lock);
1763 		return;
1764 	}
1765 	/* if the port is UP, try sa_session_open */
1766 	(void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
1767 }
1768 
1769 
1770 ibmf_saa_handle_t
1771 ibcm_get_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
1772 {
1773 	ibmf_saa_handle_t	saa_handle;
1774 	uint8_t			port_index = port - 1;
1775 	ibcm_port_info_t	*portp = &hcap->hca_port_info[port_index];
1776 	ibt_status_t		ibt_status;
1777 
1778 	if (port_index >= hcap->hca_num_ports)
1779 		return (NULL);
1780 
1781 	mutex_enter(&ibcm_sa_open_lock);
1782 	while (portp->port_saa_open_in_progress) {
1783 		cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
1784 	}
1785 
1786 	saa_handle = portp->port_ibmf_saa_hdl;
1787 	if (saa_handle != NULL) {
1788 		mutex_exit(&ibcm_sa_open_lock);
1789 		return (saa_handle);
1790 	}
1791 
1792 	portp->port_saa_open_in_progress = 1;
1793 	mutex_exit(&ibcm_sa_open_lock);
1794 
1795 	ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
1796 	    portp->port_num, &portp->port_sgid0, NULL);
1797 	if (ibt_status != IBT_SUCCESS) {
1798 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_saa_handle "
1799 		    "ibt_get_port_state_byguid failed for guid %llX "
1800 		    "with status %d", portp->port_hcap->hca_guid, ibt_status);
1801 		mutex_enter(&ibcm_sa_open_lock);
1802 		portp->port_saa_open_in_progress = 0;
1803 		cv_broadcast(&ibcm_sa_open_cv);
1804 		mutex_exit(&ibcm_sa_open_lock);
1805 		return (NULL);
1806 	}
1807 	/* if the port is UP, try sa_session_open */
1808 	(void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
1809 
1810 	mutex_enter(&ibcm_sa_open_lock);
1811 	while (portp->port_saa_open_in_progress) {
1812 		cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
1813 	}
1814 	saa_handle = portp->port_ibmf_saa_hdl;
1815 	mutex_exit(&ibcm_sa_open_lock);
1816 	return (saa_handle);
1817 }
1818 
1819 
1820 /*
1821  * ibcm_hca_init_port():
1822  * 	- Register port with IBMA
1823  *
1824  * Arguments:
1825  *	hcap		- HCA's guid
1826  *	port_index	- port number minus 1
1827  *
1828  * Return values:
1829  *	IBCM_SUCCESS - success
1830  */
1831 ibt_status_t
1832 ibcm_hca_init_port(ibcm_hca_info_t *hcap, uint8_t port_index)
1833 {
1834 	int			status;
1835 	ibmf_register_info_t	*ibmf_reg;
1836 
1837 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_init_port: hcap = 0x%p port_num %d",
1838 	    hcap, port_index + 1);
1839 
1840 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1841 
1842 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hcap->hca_port_info))
1843 
1844 	if (hcap->hca_port_info[port_index].port_ibmf_hdl == NULL) {
1845 		/* Register with IBMF */
1846 		ibmf_reg = &hcap->hca_port_info[port_index].port_ibmf_reg;
1847 		ibmf_reg->ir_ci_guid = hcap->hca_guid;
1848 		ibmf_reg->ir_port_num = port_index + 1;
1849 		ibmf_reg->ir_client_class = COMM_MGT_MANAGER_AGENT;
1850 
1851 		/*
1852 		 * register with management framework
1853 		 */
1854 		status = ibmf_register(ibmf_reg, IBMF_VERSION,
1855 		    IBMF_REG_FLAG_NO_OFFLOAD, NULL, NULL,
1856 		    &(hcap->hca_port_info[port_index].port_ibmf_hdl),
1857 		    &(hcap->hca_port_info[port_index].port_ibmf_caps));
1858 
1859 		if (status != IBMF_SUCCESS) {
1860 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_init_port: "
1861 			    "ibmf_register failed for port_num %x, "
1862 			    "status = %x", port_index + 1, status);
1863 			return (ibcm_ibmf_analyze_error(status));
1864 		}
1865 
1866 		hcap->hca_port_info[port_index].port_qp1.qp_cm =
1867 		    IBMF_QP_HANDLE_DEFAULT;
1868 		hcap->hca_port_info[port_index].port_qp1.qp_port =
1869 		    &(hcap->hca_port_info[port_index]);
1870 
1871 		/*
1872 		 * Register the read callback with IBMF.
1873 		 * Since we just did an ibmf_register, handle is
1874 		 * valid and ibcm_recv_cb() is valid so we can
1875 		 * safely assert for success of ibmf_setup_recv_cb()
1876 		 *
1877 		 * Depending on the "state" of the HCA,
1878 		 * CM may drop incoming packets
1879 		 */
1880 		status = ibmf_setup_async_cb(
1881 		    hcap->hca_port_info[port_index].port_ibmf_hdl,
1882 		    IBMF_QP_HANDLE_DEFAULT, ibcm_recv_cb,
1883 		    &(hcap->hca_port_info[port_index].port_qp1), 0);
1884 		ASSERT(status == IBMF_SUCCESS);
1885 
1886 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_init_port: "
1887 		    "IBMF hdl[%x] = 0x%p", port_index,
1888 		    hcap->hca_port_info[port_index].port_ibmf_hdl);
1889 
1890 		/* Attempt to get the saa_handle for this port */
1891 		ibcm_init_saa_handle(hcap, port_index + 1);
1892 	}
1893 
1894 	return (IBT_SUCCESS);
1895 }
1896 
1897 /*
1898  * useful, to re attempt to initialize port ibma handles from elsewhere in
1899  * cm code
1900  */
1901 ibt_status_t
1902 ibcm_hca_reinit_port(ibcm_hca_info_t *hcap, uint8_t port_index)
1903 {
1904 	ibt_status_t	status;
1905 
1906 	IBTF_DPRINTF_L5(cmlog, "ibcm_hca_reinit_port: hcap 0x%p port_num %d",
1907 	    hcap, port_index + 1);
1908 
1909 	mutex_enter(&ibcm_global_hca_lock);
1910 	status = ibcm_hca_init_port(hcap, port_index);
1911 	mutex_exit(&ibcm_global_hca_lock);
1912 	return (status);
1913 }
1914 
1915 
1916 /*
1917  * ibcm_hca_fini_port():
1918  * 	- Deregister port with IBMA
1919  *
1920  * Arguments:
1921  *	hcap		- HCA's guid
1922  *	port_index	- port number minus 1
1923  *
1924  * Return values:
1925  *	IBCM_SUCCESS - success
1926  */
1927 static ibcm_status_t
1928 ibcm_hca_fini_port(ibcm_hca_info_t *hcap, uint8_t port_index)
1929 {
1930 	int			ibmf_status;
1931 	ibcm_status_t		ibcm_status;
1932 
1933 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: hcap = 0x%p port_num %d ",
1934 	    hcap, port_index + 1);
1935 
1936 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1937 
1938 	if (hcap->hca_port_info[port_index].port_ibmf_saa_hdl != NULL) {
1939 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
1940 		    "ibmf_sa_session_close IBMF SAA hdl %p",
1941 		    hcap->hca_port_info[port_index].port_ibmf_saa_hdl);
1942 
1943 		ibmf_status = ibmf_sa_session_close(
1944 		    &hcap->hca_port_info[port_index].port_ibmf_saa_hdl, 0);
1945 		if (ibmf_status != IBMF_SUCCESS) {
1946 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port "
1947 			    "ibmf_sa_session_close of port %d returned %x",
1948 			    port_index + 1, ibmf_status);
1949 			return (IBCM_FAILURE);
1950 		}
1951 	}
1952 
1953 	if (hcap->hca_port_info[port_index].port_ibmf_hdl != NULL) {
1954 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
1955 		    "ibmf_unregister IBMF Hdl %p",
1956 		    hcap->hca_port_info[port_index].port_ibmf_hdl);
1957 
1958 		/* clean-up all the ibmf qp's allocated on this port */
1959 		ibcm_status = ibcm_free_allqps(hcap, port_index + 1);
1960 
1961 		if (ibcm_status != IBCM_SUCCESS) {
1962 
1963 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port "
1964 			    "ibcm_free_allqps failed for port_num %d",
1965 			    port_index + 1);
1966 			return (IBCM_FAILURE);
1967 		}
1968 
1969 		/* Tear down the receive callback */
1970 		ibmf_status = ibmf_tear_down_async_cb(
1971 		    hcap->hca_port_info[port_index].port_ibmf_hdl,
1972 		    IBMF_QP_HANDLE_DEFAULT, 0);
1973 
1974 		if (ibmf_status != IBMF_SUCCESS) {
1975 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port "
1976 			    "ibmf_tear_down_async_cb failed %d port_num %d",
1977 			    ibmf_status, port_index + 1);
1978 			return (IBCM_FAILURE);
1979 		}
1980 
1981 		/* Now, unregister with IBMF */
1982 		ibmf_status = ibmf_unregister(
1983 		    &hcap->hca_port_info[port_index].port_ibmf_hdl, 0);
1984 		IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: "
1985 		    "ibmf_unregister of port_num %x returned %x",
1986 		    port_index + 1, ibmf_status);
1987 
1988 		if (ibmf_status == IBMF_SUCCESS)
1989 			hcap->hca_port_info[port_index].port_ibmf_hdl =
1990 								NULL;
1991 		else {
1992 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port "
1993 			    "ibmf_unregister failed %d port_num %d",
1994 			    ibmf_status, port_index + 1);
1995 			return (IBCM_FAILURE);
1996 		}
1997 	}
1998 	return (IBCM_SUCCESS);
1999 }
2000 
2001 /*
2002  * ibcm_comm_est_handler():
2003  *	Check if the given channel is in ESTABLISHED state or not
2004  *
2005  * Arguments:
2006  *	eventp	- A pointer to an ibt_async_event_t struct
2007  *
2008  * Return values: NONE
2009  */
2010 static void
2011 ibcm_comm_est_handler(ibt_async_event_t *eventp)
2012 {
2013 	ibcm_state_data_t	*statep;
2014 
2015 	IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler:");
2016 
2017 	/* Both QP and EEC handles can't be NULL */
2018 	if (eventp->ev_chan_hdl == NULL) {
2019 		IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2020 		    "both QP and EEC handles are NULL");
2021 		return;
2022 	}
2023 
2024 	/* get the "statep" from qp/eec handles */
2025 	IBCM_GET_CHAN_PRIVATE(eventp->ev_chan_hdl, statep);
2026 	if (statep == NULL) {
2027 		IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: statep is NULL");
2028 		return;
2029 	}
2030 
2031 	mutex_enter(&statep->state_mutex);
2032 
2033 	IBCM_RELEASE_CHAN_PRIVATE(eventp->ev_chan_hdl);
2034 
2035 	IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: statep = %p", statep);
2036 
2037 	IBCM_REF_CNT_INCR(statep);
2038 
2039 	if ((statep->state == IBCM_STATE_REP_SENT) ||
2040 	    (statep->state == IBCM_STATE_MRA_REP_RCVD)) {
2041 		timeout_id_t	timer_val = statep->timerid;
2042 
2043 		statep->state = IBCM_STATE_TRANSIENT_ESTABLISHED;
2044 
2045 		if (timer_val) {
2046 			statep->timerid = 0;
2047 			mutex_exit(&statep->state_mutex);
2048 			(void) untimeout(timer_val);
2049 		} else
2050 			mutex_exit(&statep->state_mutex);
2051 
2052 		/* CM doesn't have RTU message here */
2053 		ibcm_cep_state_rtu(statep, NULL);
2054 
2055 	} else {
2056 		if (statep->state == IBCM_STATE_ESTABLISHED ||
2057 		    statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) {
2058 			IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: "
2059 			    "Channel already in ESTABLISHED state");
2060 		} else {
2061 			/* An unexpected behavior from remote */
2062 			IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2063 			    "Unexpected in state = %d", statep->state);
2064 		}
2065 		mutex_exit(&statep->state_mutex);
2066 
2067 		ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_COMEST);
2068 	}
2069 
2070 	mutex_enter(&statep->state_mutex);
2071 	IBCM_REF_CNT_DECR(statep);
2072 	mutex_exit(&statep->state_mutex);
2073 }
2074 
2075 
2076 /*
2077  * ibcm_async_handler():
2078  *	CM's Async Handler
2079  *	(Handles ATTACH, DETACH, COM_EST events)
2080  *
2081  * Arguments:
2082  *	eventp	- A pointer to an ibt_async_event_t struct
2083  *
2084  * Return values: None
2085  *
2086  * NOTE : CM assumes that all HCA DR events are delivered sequentially
2087  * i.e., until ibcm_async_handler  completes for a given HCA DR, framework
2088  * shall not invoke ibcm_async_handler with another DR event for the same
2089  * HCA
2090  */
2091 /* ARGSUSED */
2092 void
2093 ibcm_async_handler(void *clnt_hdl, ibt_hca_hdl_t hca_hdl,
2094     ibt_async_code_t code, ibt_async_event_t *eventp)
2095 {
2096 	ibcm_hca_info_t		*hcap;
2097 	ibcm_port_up_t		*pup;
2098 
2099 	IBTF_DPRINTF_L3(cmlog, "ibcm_async_handler: "
2100 	    "clnt_hdl = %p, code = 0x%x, eventp = 0x%p",
2101 	    clnt_hdl, code, eventp);
2102 
2103 	mutex_enter(&ibcm_global_hca_lock);
2104 
2105 	/* If fini is going to complete successfully, then return */
2106 	if (ibcm_finit_state != IBCM_FINIT_IDLE) {
2107 
2108 		/*
2109 		 * This finit state implies one of the following:
2110 		 * Init either didn't start or didn't complete OR
2111 		 * Fini is about to return SUCCESS and release the global lock.
2112 		 * In all these cases, it is safe to ignore the async.
2113 		 */
2114 
2115 		IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler: ignoring event %x, "
2116 		    "as either init didn't complete or fini about to succeed",
2117 		    code);
2118 		mutex_exit(&ibcm_global_hca_lock);
2119 		return;
2120 	}
2121 
2122 	switch (code) {
2123 	case IBT_EVENT_PORT_UP:
2124 		mutex_exit(&ibcm_global_hca_lock);
2125 		pup = kmem_alloc(sizeof (ibcm_port_up_t), KM_SLEEP);
2126 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pup))
2127 		pup->pup_hca_guid = eventp->ev_hca_guid;
2128 		pup->pup_port = eventp->ev_port;
2129 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pup))
2130 		(void) taskq_dispatch(ibcm_taskq,
2131 		    ibcm_service_record_rewrite_task, pup, TQ_SLEEP);
2132 		ibcm_path_cache_purge();
2133 		return;
2134 
2135 	case IBT_HCA_ATTACH_EVENT:
2136 
2137 		/* eventp->ev_hcaguid is the HCA GUID of interest */
2138 		ibcm_hca_attach(eventp->ev_hca_guid);
2139 		break;
2140 
2141 	case IBT_HCA_DETACH_EVENT:
2142 
2143 		/* eventp->ev_hca_guid is the HCA GUID of interest */
2144 		if ((hcap = ibcm_find_hcap_entry(eventp->ev_hca_guid)) ==
2145 		    NULL) {
2146 			IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler:"
2147 			    " hca %llX doesn't exist", eventp->ev_hca_guid);
2148 			break;
2149 		}
2150 
2151 		(void) ibcm_hca_detach(hcap);
2152 		break;
2153 
2154 	case IBT_EVENT_COM_EST_QP:
2155 		/* eventp->ev_qp_hdl is the ibt_qp_hdl_t of interest */
2156 	case IBT_EVENT_COM_EST_EEC:
2157 		/* eventp->ev_eec_hdl is the ibt_eec_hdl_t of interest */
2158 		ibcm_comm_est_handler(eventp);
2159 		break;
2160 	default:
2161 		break;
2162 	}
2163 
2164 	/* Unblock, any blocked fini/init operations */
2165 	mutex_exit(&ibcm_global_hca_lock);
2166 }
2167