xref: /illumos-gate/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c (revision 9d3d2ed09c8e9ba0b2ba44fdd1dd300b2c3f9e8e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * ibcm_impl.c
31  *
32  * contains internal functions of IB CM module.
33  *
34  * TBD:
35  * 1. HCA CATASTROPHIC/RECOVERED not handled yet
36  */
37 
38 #include <sys/ib/mgt/ibcm/ibcm_impl.h>
39 #include <sys/disp.h>
40 
41 
42 /* function prototypes */
43 static ibcm_status_t	ibcm_init(void);
44 static ibcm_status_t	ibcm_fini(void);
45 
46 /* Routines to initialize and destory CM global locks and CVs */
47 static void		ibcm_init_locks(void);
48 static void		ibcm_fini_locks(void);
49 
50 /* Routines that initialize/teardown CM's global hca structures */
51 static void		ibcm_init_hcas();
52 static ibcm_status_t	ibcm_fini_hcas();
53 
54 static void		ibcm_init_classportinfo();
55 static void		ibcm_stop_timeout_thread();
56 
57 /* Routines that handle HCA attach/detach asyncs */
58 static void		ibcm_hca_attach(ib_guid_t);
59 static ibcm_status_t	ibcm_hca_detach(ibcm_hca_info_t *);
60 
61 /* Routines that initialize the HCA's port related fields */
62 static ibt_status_t	ibcm_hca_init_port(ibcm_hca_info_t *hcap,
63 			    uint8_t port_index);
64 static ibcm_status_t	ibcm_hca_fini_port(ibcm_hca_info_t *hcap,
65 			    uint8_t port_index);
66 
67 static void ibcm_rc_flow_control_init(void);
68 static void ibcm_rc_flow_control_fini(void);
69 
70 /*
71  * Routines that check if hca's avl trees and sidr lists are free of any
72  * active client resources ie., RC or UD state structures in certain states
73  */
74 static ibcm_status_t	ibcm_check_avl_clean(ibcm_hca_info_t *hcap);
75 static ibcm_status_t	ibcm_check_sidr_clean(ibcm_hca_info_t *hcap);
76 
77 /* Add a new hca structure to CM's global hca list */
78 static ibcm_hca_info_t	*ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports);
79 
80 static void		ibcm_comm_est_handler(ibt_async_event_t *);
81 void			ibcm_async_handler(void *, ibt_hca_hdl_t,
82 			    ibt_async_code_t, ibt_async_event_t *);
83 
84 /* Global variables */
85 char			cmlog[] = "ibcm";	/* for debug log messages */
86 ibt_clnt_hdl_t		ibcm_ibt_handle;	/* IBT handle */
87 kmutex_t		ibcm_svc_info_lock;	/* list lock */
88 kcondvar_t		ibcm_svc_info_cv;	/* cv for deregister */
89 kmutex_t		ibcm_recv_mutex;
90 avl_tree_t		ibcm_svc_avl_tree;
91 taskq_t			*ibcm_taskq = NULL;
92 int			taskq_dispatch_fail_cnt;
93 
94 kmutex_t		ibcm_trace_mutex;	/* Trace mutex */
95 kmutex_t		ibcm_trace_print_mutex;	/* Trace print mutex */
96 int			ibcm_conn_max_trcnt = IBCM_MAX_CONN_TRCNT;
97 
98 int			ibcm_enable_trace = 2;	/* Trace level 4 by default */
99 int			ibcm_dtrace = 0; /* conditionally enable more dtrace */
100 
101 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_info_s::{svc_bind_list
102     svc_ref_cnt svc_to_delete}))
103 
104 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_bind_s::{sbind_link}))
105 
106 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_mutex, ibcm_conn_trace_s))
107 
108 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_conn_trace_s))
109 
110 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_print_mutex, ibcm_debug_buf))
111 
112 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_debug_buf))
113 
114 /*
115  * Initial state is INIT. All hca dr's return success immediately in this
116  * state, without adding or deleting any hca's to CM.
117  */
118 ibcm_finit_state_t	ibcm_finit_state = IBCM_FINIT_INIT;
119 
120 /* mutex and cv to manage hca's reference and resource count(s) */
121 kmutex_t		ibcm_global_hca_lock;
122 kcondvar_t		ibcm_global_hca_cv;
123 
124 /* mutex and cv to sa session open */
125 kmutex_t		ibcm_sa_open_lock;
126 kcondvar_t		ibcm_sa_open_cv;
127 int			ibcm_sa_timeout_delay = 1;		/* in ticks */
128 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sa_open_lock,
129     ibcm_port_info_s::{port_ibmf_saa_hdl port_saa_open_in_progress}))
130 
131 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_port_info_s::{port_ibmf_saa_hdl}))
132 
133 /* serialize sm notice callbacks */
134 kmutex_t		ibcm_sm_notice_serialize_lock;
135 
136 _NOTE(LOCK_ORDER(ibcm_sm_notice_serialize_lock ibcm_global_hca_lock))
137 
138 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock, ibcm_hca_info_s::{hca_state
139     hca_svc_cnt hca_acc_cnt hca_res_cnt hca_next}))
140 
141 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock,
142     ibcm_port_info_s::{port_ibmf_hdl}))
143 
144 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sm_notice_serialize_lock,
145     ibcm_port_info_s::{port_event_status}))
146 
147 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_hca_info_s::{hca_state}))
148 _NOTE(DATA_READABLE_WITHOUT_LOCK(
149     ibcm_hca_info_s::{hca_port_info.port_ibmf_hdl}))
150 
151 /* mutex for CM's qp list management */
152 kmutex_t		ibcm_qp_list_lock;
153 
154 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_port_info_s::{port_qplist}))
155 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
156 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
157 
158 kcondvar_t		ibcm_timeout_list_cv;
159 kcondvar_t		ibcm_timeout_thread_done_cv;
160 kt_did_t		ibcm_timeout_thread_did;
161 ibcm_state_data_t	*ibcm_timeout_list_hdr, *ibcm_timeout_list_tail;
162 ibcm_ud_state_data_t	*ibcm_ud_timeout_list_hdr, *ibcm_ud_timeout_list_tail;
163 kmutex_t		ibcm_timeout_list_lock;
164 uint8_t			ibcm_timeout_list_flags = 0;
165 pri_t			ibcm_timeout_thread_pri = MINCLSYSPRI;
166 
167 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
168     ibcm_state_data_s::timeout_next))
169 
170 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
171     ibcm_ud_state_data_s::ud_timeout_next))
172 
173 /*
174  * Flow control logic for open_rc_channel uses the following.
175  */
176 
177 struct ibcm_open_s {
178 	kmutex_t		mutex;
179 	kcondvar_t		cv;
180 	uint8_t			task_running;
181 	uint_t			queued;
182 	uint_t			exit_deferred;
183 	uint_t			in_progress;
184 	uint_t			in_progress_max;
185 	uint_t			sends;
186 	uint_t			sends_max;
187 	uint_t			sends_lowat;
188 	uint_t			sends_hiwat;
189 	ibcm_state_data_t	*tail;
190 	ibcm_state_data_t	head;
191 } ibcm_open;
192 
193 static void ibcm_open_task(void *);
194 
195 /*
196  * Flow control logic for SA access and close_rc_channel calls follows.
197  */
198 
199 int ibcm_close_simul_max	= 12;
200 int ibcm_lapr_simul_max		= 12;
201 int ibcm_saa_simul_max		= 8;
202 
203 typedef struct ibcm_flow1_s {
204 	struct ibcm_flow1_s	*link;
205 	kcondvar_t		cv;
206 	uint8_t			waiters;	/* 1 to IBCM_FLOW_SIMUL_MAX */
207 } ibcm_flow1_t;
208 
209 typedef struct ibcm_flow_s {
210 	ibcm_flow1_t		*list;
211 	uint_t			simul;	/* #requests currently outstanding */
212 	uint_t			simul_max;
213 	uint_t			waiters_per_chunk;
214 	uint_t			lowat;
215 	uint_t			lowat_default;
216 	/* statistics */
217 	uint_t			total;
218 } ibcm_flow_t;
219 
220 ibcm_flow_t ibcm_saa_flow;
221 ibcm_flow_t ibcm_close_flow;
222 ibcm_flow_t ibcm_lapr_flow;
223 
224 static ibt_clnt_modinfo_t ibcm_ibt_modinfo = {	/* Client's modinfop */
225 	IBTI_V2,
226 	IBT_CM,
227 	ibcm_async_handler,
228 	NULL,
229 	"IBCM"
230 };
231 
232 /* IBCM's list of HCAs registered with it */
233 static ibcm_hca_info_t	*ibcm_hca_listp = NULL;	/* CM's HCA list */
234 
235 /* Array of CM state call table functions */
236 ibcm_state_handler_t	ibcm_sm_funcs_tbl[] = {
237 	ibcm_process_req_msg,
238 	ibcm_process_mra_msg,
239 	ibcm_process_rej_msg,
240 	ibcm_process_rep_msg,
241 	ibcm_process_rtu_msg,
242 	ibcm_process_dreq_msg,
243 	ibcm_process_drep_msg,
244 	ibcm_process_sidr_req_msg,
245 	ibcm_process_sidr_rep_msg,
246 	ibcm_process_lap_msg,
247 	ibcm_process_apr_msg
248 };
249 
250 /* the following globals are CM tunables */
251 ibt_rnr_nak_time_t	ibcm_default_rnr_nak_time = IBT_RNR_NAK_655ms;
252 
253 uint32_t	ibcm_max_retries = IBCM_MAX_RETRIES;
254 clock_t		ibcm_local_processing_time = IBCM_LOCAL_RESPONSE_TIME;
255 clock_t		ibcm_remote_response_time = IBCM_REMOTE_RESPONSE_TIME;
256 ib_time_t	ibcm_max_sidr_rep_proctime = IBCM_MAX_SIDR_PROCESS_TIME;
257 ib_time_t	ibcm_max_sidr_pktlife_time = IBCM_MAX_SIDR_PKT_LIFE_TIME;
258 
259 ib_time_t	ibcm_max_sidr_rep_store_time = 18;
260 uint32_t	ibcm_wait_for_acc_cnt_timeout = 500000;	/* 500 ms */
261 uint32_t	ibcm_wait_for_res_cnt_timeout = 500000;	/* 500 ms */
262 
263 ib_time_t	ibcm_max_ib_pkt_lt = IBCM_MAX_IB_PKT_LT;
264 ib_time_t	ibcm_max_ib_mad_pkt_lt = IBCM_MAX_IB_MAD_PKT_LT;
265 
266 /*
267  * This delay accounts for time involved in various activities as follows :
268  *
269  * IBMF delays for posting the MADs in non-blocking mode
270  * IBMF delays for receiving the MADs and delivering to CM
271  * CM delays in processing the MADs before invoking client handlers,
272  * Any other delays associated with HCA driver in processing the MADs and
273  * 	other subsystems that CM may invoke (ex : SA, HCA driver)
274  */
275 uint32_t	ibcm_sw_delay	= 1000;	/* 1000us / 1ms */
276 uint32_t	ibcm_max_sa_retries = IBCM_MAX_SA_RETRIES + 1;
277 
278 /*	approx boot time */
279 uint32_t	ibcm_adj_btime = 4;	/* 4 seconds */
280 
281 /*
282  * The information in ibcm_clpinfo is kept in wireformat and is setup at
283  * init time, and used read-only after that
284  */
285 ibcm_classportinfo_msg_t	ibcm_clpinfo;
286 
287 char	*event_str[] = {
288 	"NEVER SEE THIS             ",
289 	"SESSION_ID                 ",
290 	"CHAN_HDL                   ",
291 	"LOCAL_COMID/HCA/PORT       ",
292 	"LOCAL_QPN                  ",
293 	"REMOTE_COMID/HCA           ",
294 	"REMOTE_QPN                 ",
295 	"BASE_TIME                  ",
296 	"INCOMING_REQ               ",
297 	"INCOMING_REP               ",
298 	"INCOMING_RTU               ",
299 	"INCOMING_COMEST            ",
300 	"INCOMING_MRA               ",
301 	"INCOMING_REJ               ",
302 	"INCOMING_LAP               ",
303 	"INCOMING_APR               ",
304 	"INCOMING_DREQ              ",
305 	"INCOMING_DREP              ",
306 	"OUTGOING_REQ               ",
307 	"OUTGOING_REP               ",
308 	"OUTGOING_RTU               ",
309 	"OUTGOING_LAP               ",
310 	"OUTGOING_APR               ",
311 	"OUTGOING_MRA               ",
312 	"OUTGOING_REJ               ",
313 	"OUTGOING_DREQ              ",
314 	"OUTGOING_DREP              ",
315 	"REQ_POST_COMPLETE          ",
316 	"REP_POST_COMPLETE          ",
317 	"RTU_POST_COMPLETE          ",
318 	"MRA_POST_COMPLETE          ",
319 	"REJ_POST_COMPLETE          ",
320 	"LAP_POST_COMPLETE          ",
321 	"APR_POST_COMPLETE          ",
322 	"DREQ_POST_COMPLETE         ",
323 	"DREP_POST_COMPLETE         ",
324 	"TIMEOUT_REP                ",
325 	"CALLED_REQ_RCVD_EVENT      ",
326 	"RET_REQ_RCVD_EVENT         ",
327 	"CALLED_REP_RCVD_EVENT      ",
328 	"RET_REP_RCVD_EVENT         ",
329 	"CALLED_CONN_EST_EVENT      ",
330 	"RET_CONN_EST_EVENT         ",
331 	"CALLED_CONN_FAIL_EVENT     ",
332 	"RET_CONN_FAIL_EVENT        ",
333 	"CALLED_CONN_CLOSE_EVENT    ",
334 	"RET_CONN_CLOSE_EVENT       ",
335 	"INIT_INIT                  ",
336 	"INIT_INIT_FAIL             ",
337 	"INIT_RTR                   ",
338 	"INIT_RTR_FAIL              ",
339 	"RTR_RTS                    ",
340 	"RTR_RTS_FAIL               ",
341 	"RTS_RTS                    ",
342 	"RTS_RTS_FAIL               ",
343 	"TO_ERROR                   ",
344 	"ERROR_FAIL                 ",
345 	"SET_ALT                    ",
346 	"SET_ALT_FAIL               ",
347 	"STALE_DETECT               ",
348 	"OUTGOING_REQ_RETRY         ",
349 	"OUTGOING_REP_RETRY         ",
350 	"OUTGOING_LAP_RETRY         ",
351 	"OUTGOING_MRA_RETRY         ",
352 	"OUTGOING_DREQ_RETRY        ",
353 	"NEVER SEE THIS             "
354 };
355 
356 char	ibcm_debug_buf[IBCM_DEBUG_BUF_SIZE];
357 
358 _NOTE(SCHEME_PROTECTS_DATA("used in a localized function consistently",
359     ibcm_debug_buf))
360 _NOTE(READ_ONLY_DATA(ibcm_taskq))
361 
362 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_flags))
363 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_hdr))
364 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_ud_timeout_list_hdr))
365 
366 #ifdef DEBUG
367 int		ibcm_test_mode = 0;	/* set to 1, if running tests */
368 #endif
369 
370 
371 /* Module Driver Info */
372 static struct modlmisc ibcm_modlmisc = {
373 	&mod_miscops,
374 	"IB Communication Manager %I%"
375 };
376 
377 /* Module Linkage */
378 static struct modlinkage ibcm_modlinkage = {
379 	MODREV_1,
380 	&ibcm_modlmisc,
381 	NULL
382 };
383 
384 
385 int
386 _init(void)
387 {
388 	int		rval;
389 	ibcm_status_t	status;
390 
391 	status = ibcm_init();
392 	if (status != IBCM_SUCCESS) {
393 		IBTF_DPRINTF_L2(cmlog, "_init: ibcm failed %d", status);
394 		return (EINVAL);
395 	}
396 
397 	rval = mod_install(&ibcm_modlinkage);
398 	if (rval != 0) {
399 		IBTF_DPRINTF_L2(cmlog, "_init: ibcm mod_install failed %d",
400 		    rval);
401 		(void) ibcm_fini();
402 	}
403 
404 	IBTF_DPRINTF_L5(cmlog, "_init: ibcm successful");
405 	return (rval);
406 
407 }
408 
409 
410 int
411 _info(struct modinfo *modinfop)
412 {
413 	return (mod_info(&ibcm_modlinkage, modinfop));
414 }
415 
416 
417 int
418 _fini(void)
419 {
420 	int status;
421 
422 	if (ibcm_fini() != IBCM_SUCCESS)
423 		return (EBUSY);
424 
425 	if ((status = mod_remove(&ibcm_modlinkage)) != 0) {
426 		IBTF_DPRINTF_L2(cmlog, "_fini: ibcm mod_remove failed %d",
427 		    status);
428 		return (status);
429 	}
430 
431 	IBTF_DPRINTF_L5(cmlog, "_fini: ibcm successful");
432 
433 	return (status);
434 }
435 
436 /* Initializes all global mutex and CV in cm module */
437 static void
438 ibcm_init_locks()
439 {
440 
441 	/* Verify CM MAD sizes */
442 #ifdef DEBUG
443 
444 	if (ibcm_test_mode > 1) {
445 
446 		IBTF_DPRINTF_L1(cmlog, "REQ MAD SIZE %d",
447 		    sizeof (ibcm_req_msg_t));
448 		IBTF_DPRINTF_L1(cmlog, "REP MAD SIZE %d",
449 		    sizeof (ibcm_rep_msg_t));
450 		IBTF_DPRINTF_L1(cmlog, "RTU MAD SIZE %d",
451 		    sizeof (ibcm_rtu_msg_t));
452 		IBTF_DPRINTF_L1(cmlog, "MRA MAD SIZE %d",
453 		    sizeof (ibcm_mra_msg_t));
454 		IBTF_DPRINTF_L1(cmlog, "REJ MAD SIZE %d",
455 		    sizeof (ibcm_rej_msg_t));
456 		IBTF_DPRINTF_L1(cmlog, "LAP MAD SIZE %d",
457 		    sizeof (ibcm_lap_msg_t));
458 		IBTF_DPRINTF_L1(cmlog, "APR MAD SIZE %d",
459 		    sizeof (ibcm_apr_msg_t));
460 		IBTF_DPRINTF_L1(cmlog, "DREQ MAD SIZE %d",
461 		    sizeof (ibcm_dreq_msg_t));
462 		IBTF_DPRINTF_L1(cmlog, "DREP MAD SIZE %d",
463 		    sizeof (ibcm_drep_msg_t));
464 		IBTF_DPRINTF_L1(cmlog, "SIDR REQ MAD SIZE %d",
465 		    sizeof (ibcm_sidr_req_msg_t));
466 		IBTF_DPRINTF_L1(cmlog, "SIDR REP MAD SIZE %d",
467 		    sizeof (ibcm_sidr_rep_msg_t));
468 	}
469 
470 #endif
471 
472 	/* Create all global locks within cm module */
473 	mutex_init(&ibcm_svc_info_lock, NULL, MUTEX_DEFAULT, NULL);
474 	mutex_init(&ibcm_timeout_list_lock, NULL, MUTEX_DEFAULT, NULL);
475 	mutex_init(&ibcm_global_hca_lock, NULL, MUTEX_DEFAULT, NULL);
476 	mutex_init(&ibcm_sa_open_lock, NULL, MUTEX_DEFAULT, NULL);
477 	mutex_init(&ibcm_recv_mutex, NULL, MUTEX_DEFAULT, NULL);
478 	mutex_init(&ibcm_sm_notice_serialize_lock, NULL, MUTEX_DEFAULT, NULL);
479 	mutex_init(&ibcm_qp_list_lock, NULL, MUTEX_DEFAULT, NULL);
480 	mutex_init(&ibcm_trace_mutex, NULL, MUTEX_DEFAULT, NULL);
481 	mutex_init(&ibcm_trace_print_mutex, NULL, MUTEX_DEFAULT, NULL);
482 	cv_init(&ibcm_svc_info_cv, NULL, CV_DRIVER, NULL);
483 	cv_init(&ibcm_timeout_list_cv, NULL, CV_DRIVER, NULL);
484 	cv_init(&ibcm_timeout_thread_done_cv, NULL, CV_DRIVER, NULL);
485 	cv_init(&ibcm_global_hca_cv, NULL, CV_DRIVER, NULL);
486 	cv_init(&ibcm_sa_open_cv, NULL, CV_DRIVER, NULL);
487 	avl_create(&ibcm_svc_avl_tree, ibcm_svc_compare,
488 	    sizeof (ibcm_svc_info_t),
489 	    offsetof(struct ibcm_svc_info_s, svc_link));
490 
491 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_locks: done");
492 }
493 
494 /* Destroys all global mutex and CV in cm module */
495 static void
496 ibcm_fini_locks()
497 {
498 	/* Destroy all global locks within cm module */
499 	mutex_destroy(&ibcm_svc_info_lock);
500 	mutex_destroy(&ibcm_timeout_list_lock);
501 	mutex_destroy(&ibcm_global_hca_lock);
502 	mutex_destroy(&ibcm_sa_open_lock);
503 	mutex_destroy(&ibcm_recv_mutex);
504 	mutex_destroy(&ibcm_sm_notice_serialize_lock);
505 	mutex_destroy(&ibcm_qp_list_lock);
506 	mutex_destroy(&ibcm_trace_mutex);
507 	mutex_destroy(&ibcm_trace_print_mutex);
508 	cv_destroy(&ibcm_svc_info_cv);
509 	cv_destroy(&ibcm_timeout_list_cv);
510 	cv_destroy(&ibcm_timeout_thread_done_cv);
511 	cv_destroy(&ibcm_global_hca_cv);
512 	cv_destroy(&ibcm_sa_open_cv);
513 	avl_destroy(&ibcm_svc_avl_tree);
514 
515 	IBTF_DPRINTF_L5(cmlog, "ibcm_fini_locks: done");
516 }
517 
518 
519 /* Initialize CM's classport info */
520 static void
521 ibcm_init_classportinfo()
522 {
523 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
524 
525 	ibcm_clpinfo.BaseVersion = IBCM_MAD_BASE_VERSION;
526 	ibcm_clpinfo.ClassVersion = IBCM_MAD_CLASS_VERSION;
527 
528 	/* For now, CM supports same capabilities at all ports */
529 	ibcm_clpinfo.CapabilityMask =
530 	    h2b16(IBCM_CPINFO_CAP_RC | IBCM_CPINFO_CAP_SIDR);
531 
532 	/* Bits 0-7 are all 0 for Communication Mgmt Class */
533 
534 	/* For now, CM has the same respvalue at all ports */
535 	ibcm_clpinfo.RespTimeValue_plus =
536 	    h2b32(ibt_usec2ib(ibcm_local_processing_time) & 0x1f);
537 
538 	/* For now, redirect fields are set to 0 */
539 	/* Trap fields are not applicable to CM, hence set to 0 */
540 
541 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
542 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_classportinfo: done");
543 }
544 
545 /*
546  * ibcm_init():
547  * 	- call ibt_attach()
548  * 	- create AVL trees
549  *	- Attach HCA handlers that are already present before
550  *	CM got loaded.
551  *
552  * Arguments:	NONE
553  *
554  * Return values:
555  *	IBCM_SUCCESS - success
556  */
557 static ibcm_status_t
558 ibcm_init(void)
559 {
560 	ibt_status_t	status;
561 	kthread_t	*t;
562 
563 	IBTF_DPRINTF_L3(cmlog, "ibcm_init:");
564 
565 	ibcm_init_classportinfo();
566 
567 	if (ibcm_init_ids() != IBCM_SUCCESS) {
568 		IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
569 		    "fatal error: vmem_create() failed");
570 		return (IBCM_FAILURE);
571 	}
572 	ibcm_init_locks();
573 
574 	if (ibcm_ar_init() != IBCM_SUCCESS) {
575 		IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
576 		    "fatal error: ibcm_ar_init() failed");
577 		ibcm_fini_ids();
578 		ibcm_fini_locks();
579 		return (IBCM_FAILURE);
580 	}
581 
582 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_taskq))
583 	ibcm_taskq = system_taskq;
584 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_taskq))
585 
586 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
587 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
588 
589 	/* Start the timeout list processing thread */
590 	ibcm_timeout_list_flags = 0;
591 	t = thread_create(NULL, 0, ibcm_process_tlist, 0, 0, &p0, TS_RUN,
592 	    ibcm_timeout_thread_pri);
593 	ibcm_timeout_thread_did = t->t_did;
594 
595 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
596 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
597 
598 	/*
599 	 * NOTE : if ibt_attach is done after ibcm_init_hcas, then some
600 	 * HCA DR events may be lost. CM could call re-init hca list
601 	 * again, but it is more complicated. Some HCA's DR's lost may
602 	 * be HCA detach, which makes hca list re-syncing and locking more
603 	 * complex
604 	 */
605 	status = ibt_attach(&ibcm_ibt_modinfo, NULL, NULL, &ibcm_ibt_handle);
606 	if (status != IBT_SUCCESS) {
607 		IBTF_DPRINTF_L2(cmlog, "ibcm_init(): ibt_attach failed %d",
608 		    status);
609 		(void) ibcm_ar_fini();
610 		ibcm_fini_ids();
611 		ibcm_fini_locks();
612 		ibcm_stop_timeout_thread();
613 		return (IBCM_FAILURE);
614 	}
615 
616 	/* Block all HCA attach/detach asyncs */
617 	mutex_enter(&ibcm_global_hca_lock);
618 
619 	ibcm_init_hcas();
620 	ibcm_finit_state = IBCM_FINIT_IDLE;
621 
622 	ibcm_path_cache_init();
623 
624 	/* Unblock any waiting HCA DR asyncs in CM */
625 	mutex_exit(&ibcm_global_hca_lock);
626 
627 	ibcm_rc_flow_control_init();
628 
629 	IBTF_DPRINTF_L4(cmlog, "ibcm_init: done");
630 	return (IBCM_SUCCESS);
631 }
632 
633 /* Allocates and initializes the "per hca" global data in CM */
634 static void
635 ibcm_init_hcas()
636 {
637 	uint_t	num_hcas = 0;
638 	ib_guid_t *guid_array;
639 	int i;
640 
641 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas:");
642 
643 	/* Get the number of HCAs */
644 	num_hcas = ibt_get_hca_list(&guid_array);
645 	IBTF_DPRINTF_L4(cmlog, "ibcm_init_hcas: ibt_get_hca_list() "
646 	    "returned %d hcas", num_hcas);
647 
648 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
649 
650 	for (i = 0; i < num_hcas; i++)
651 		ibcm_hca_attach(guid_array[i]);
652 
653 	if (num_hcas)
654 		ibt_free_hca_list(guid_array, num_hcas);
655 
656 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas: done");
657 }
658 
659 
660 /*
661  * ibcm_fini():
662  * 	- Deregister w/ ibt
663  * 	- Cleanup IBCM HCA listp
664  * 	- Destroy mutexes
665  *
666  * Arguments:	NONE
667  *
668  * Return values:
669  *	IBCM_SUCCESS - success
670  */
671 static ibcm_status_t
672 ibcm_fini(void)
673 {
674 	ibt_status_t	status;
675 
676 	IBTF_DPRINTF_L3(cmlog, "ibcm_fini:");
677 
678 	/*
679 	 * CM assumes that the all general clients got rid of all the
680 	 * established connections and service registrations, completed all
681 	 * pending SIDR operations before a call to ibcm_fini()
682 	 */
683 
684 	if (ibcm_ar_fini() != IBCM_SUCCESS) {
685 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: ibcm_ar_fini failed");
686 		return (IBCM_FAILURE);
687 	}
688 
689 	/* cleanup the svcinfo list */
690 	mutex_enter(&ibcm_svc_info_lock);
691 	if (avl_first(&ibcm_svc_avl_tree) != NULL) {
692 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
693 		    "ibcm_svc_avl_tree is not empty");
694 		mutex_exit(&ibcm_svc_info_lock);
695 		return (IBCM_FAILURE);
696 	}
697 	mutex_exit(&ibcm_svc_info_lock);
698 
699 	/* disables any new hca attach/detaches */
700 	mutex_enter(&ibcm_global_hca_lock);
701 
702 	ibcm_finit_state = IBCM_FINIT_BUSY;
703 
704 	if (ibcm_fini_hcas() != IBCM_SUCCESS) {
705 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
706 		    "some hca's still have client resources");
707 
708 		/* First, re-initialize the hcas */
709 		ibcm_init_hcas();
710 		/* and then enable the HCA asyncs */
711 		ibcm_finit_state = IBCM_FINIT_IDLE;
712 		mutex_exit(&ibcm_global_hca_lock);
713 		if (ibcm_ar_init() != IBCM_SUCCESS) {
714 			IBTF_DPRINTF_L1(cmlog, "ibcm_fini:ibcm_ar_init failed");
715 		}
716 		return (IBCM_FAILURE);
717 	}
718 
719 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
720 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
721 
722 	ASSERT(ibcm_timeout_list_hdr == NULL);
723 	ASSERT(ibcm_ud_timeout_list_hdr == NULL);
724 
725 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
726 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
727 
728 	/* Release any pending asyncs on ibcm_global_hca_lock */
729 	ibcm_finit_state = IBCM_FINIT_SUCCESS;
730 	mutex_exit(&ibcm_global_hca_lock);
731 
732 	ibcm_stop_timeout_thread();
733 
734 	/*
735 	 * Detach from IBTL. Waits until all pending asyncs are complete.
736 	 * Above cv_broadcast wakes up any waiting hca attach/detach asyncs
737 	 */
738 	status = ibt_detach(ibcm_ibt_handle);
739 
740 	/* if detach fails, CM didn't free up some resources, so assert */
741 	if (status != IBT_SUCCESS)
742 	    IBTF_DPRINTF_L1(cmlog, "ibcm_fini: ibt_detach failed %d", status);
743 
744 	ibcm_rc_flow_control_fini();
745 
746 	ibcm_path_cache_fini();
747 
748 	ibcm_fini_ids();
749 	ibcm_fini_locks();
750 	IBTF_DPRINTF_L3(cmlog, "ibcm_fini: done");
751 	return (IBCM_SUCCESS);
752 }
753 
754 /* This routine exit's the ibcm timeout thread  */
755 static void
756 ibcm_stop_timeout_thread()
757 {
758 	mutex_enter(&ibcm_timeout_list_lock);
759 
760 	/* Stop the timeout list processing thread */
761 	ibcm_timeout_list_flags =
762 	    ibcm_timeout_list_flags | IBCM_TIMEOUT_THREAD_EXIT;
763 
764 	/* Wake up, if the timeout thread is on a cv_wait */
765 	cv_signal(&ibcm_timeout_list_cv);
766 
767 	mutex_exit(&ibcm_timeout_list_lock);
768 	thread_join(ibcm_timeout_thread_did);
769 
770 	IBTF_DPRINTF_L5(cmlog, "ibcm_stop_timeout_thread: done");
771 }
772 
773 
774 /* Attempts to release all the hca's associated with CM */
775 static ibcm_status_t
776 ibcm_fini_hcas()
777 {
778 	ibcm_hca_info_t *hcap, *next;
779 
780 	IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas:");
781 
782 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
783 
784 	hcap = ibcm_hca_listp;
785 	while (hcap != NULL) {
786 		next = hcap->hca_next;
787 		if (ibcm_hca_detach(hcap) != IBCM_SUCCESS) {
788 			ibcm_hca_listp = hcap;
789 			return (IBCM_FAILURE);
790 		}
791 		hcap = next;
792 	}
793 
794 	IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas: SUCCEEDED");
795 	return (IBCM_SUCCESS);
796 }
797 
798 
799 /*
800  * ibcm_hca_attach():
801  *	Called as an asynchronous event to notify CM of an attach of HCA.
802  *	Here ibcm_hca_info_t is initialized and all fields are
803  *	filled in along with SA Access handles and IBMA handles.
804  *	Also called from ibcm_init to initialize ibcm_hca_info_t's for each
805  *	hca's
806  *
807  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
808  *	hca_guid	- HCA's guid
809  *
810  * Return values: NONE
811  */
812 static void
813 ibcm_hca_attach(ib_guid_t hcaguid)
814 {
815 	int			i;
816 	ibt_status_t		status;
817 	uint_t			nports = 0;
818 	ibcm_hca_info_t		*hcap;
819 	ibt_hca_attr_t		hca_attrs;
820 
821 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: guid = 0x%llX", hcaguid);
822 
823 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
824 
825 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*hcap))
826 
827 	status = ibt_query_hca_byguid(hcaguid, &hca_attrs);
828 	if (status != IBT_SUCCESS) {
829 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
830 		    "ibt_query_hca_byguid failed = %d", status);
831 		return;
832 	}
833 	nports = hca_attrs.hca_nports;
834 
835 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_attach: num ports = %x", nports);
836 
837 	if ((hcap = ibcm_add_hca_entry(hcaguid, nports)) == NULL)
838 		return;
839 
840 	hcap->hca_guid = hcaguid;	/* Set GUID */
841 	hcap->hca_num_ports = nports;	/* Set number of ports */
842 
843 	if (ibcm_init_hca_ids(hcap) != IBCM_SUCCESS) {
844 		ibcm_delete_hca_entry(hcap);
845 		return;
846 	}
847 
848 	/* Store the static hca attribute data */
849 	hcap->hca_caps = hca_attrs.hca_flags;
850 	hcap->hca_ack_delay = hca_attrs.hca_local_ack_delay;
851 	hcap->hca_max_rdma_in_qp = hca_attrs.hca_max_rdma_in_qp;
852 	hcap->hca_max_rdma_out_qp = hca_attrs.hca_max_rdma_out_qp;
853 
854 	/* loop thru nports and initialize IBMF handles */
855 	for (i = 0; i < hcap->hca_num_ports; i++) {
856 		status = ibt_get_port_state_byguid(hcaguid, i + 1, NULL, NULL);
857 		if (status != IBT_SUCCESS) {
858 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
859 			    "port_num %d state DOWN", i + 1);
860 		}
861 
862 		hcap->hca_port_info[i].port_hcap = hcap;
863 		hcap->hca_port_info[i].port_num = i+1;
864 
865 		if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
866 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
867 			    "ibcm_hca_init_port failed %d port_num %d",
868 			    status, i+1);
869 	}
870 
871 	/* create the "active" CM AVL tree */
872 	avl_create(&hcap->hca_active_tree, ibcm_active_node_compare,
873 	    sizeof (ibcm_state_data_t),
874 	    offsetof(struct ibcm_state_data_s, avl_active_link));
875 
876 	/* create the "passive" CM AVL tree */
877 	avl_create(&hcap->hca_passive_tree, ibcm_passive_node_compare,
878 	    sizeof (ibcm_state_data_t),
879 	    offsetof(struct ibcm_state_data_s, avl_passive_link));
880 
881 	/* create the "passive comid" CM AVL tree */
882 	avl_create(&hcap->hca_passive_comid_tree,
883 	    ibcm_passive_comid_node_compare,
884 	    sizeof (ibcm_state_data_t),
885 	    offsetof(struct ibcm_state_data_s, avl_passive_comid_link));
886 
887 	/*
888 	 * Mark the state of the HCA to "attach" only at the end
889 	 * Now CM starts accepting incoming MADs and client API calls
890 	 */
891 	hcap->hca_state = IBCM_HCA_ACTIVE;
892 
893 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*hcap))
894 
895 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: ATTACH Done");
896 }
897 
898 /*
899  * ibcm_hca_detach():
900  *	Called as an asynchronous event to notify CM of a detach of HCA.
901  *	Here ibcm_hca_info_t is freed up and all fields that
902  *	were initialized earlier are cleaned up
903  *
904  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
905  *	hca_guid    - HCA's guid
906  *
907  * Return values:
908  *	IBCM_SUCCESS	- able to detach HCA
909  *	IBCM_FAILURE	- failed to detach HCA
910  */
911 static ibcm_status_t
912 ibcm_hca_detach(ibcm_hca_info_t *hcap)
913 {
914 	int		port_index, i;
915 	ibcm_status_t	status = IBCM_SUCCESS;
916 	clock_t		absolute_time;
917 
918 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: hcap = 0x%p guid = 0x%llX",
919 	    hcap, hcap->hca_guid);
920 
921 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
922 
923 	/*
924 	 * Declare hca is going away to all CM clients. Wait until the
925 	 * access count becomes zero.
926 	 */
927 	hcap->hca_state = IBCM_HCA_NOT_ACTIVE;
928 
929 	/* wait on response CV to 500mS */
930 	absolute_time = ddi_get_lbolt() +
931 	    drv_usectohz(ibcm_wait_for_acc_cnt_timeout);
932 
933 	while (hcap->hca_acc_cnt > 0)
934 		if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock,
935 		    absolute_time) == -1)
936 			break;
937 
938 	if (hcap->hca_acc_cnt != 0) {
939 		/* We got a timeout */
940 #ifdef DEBUG
941 		if (ibcm_test_mode > 0)
942 			IBTF_DPRINTF_L1(cmlog, "ibcm_hca_detach: Unexpected "
943 			    "abort due to timeout on acc_cnt %u",
944 			    hcap->hca_acc_cnt);
945 		else
946 #endif
947 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
948 			    " to timeout on acc_cnt %u", hcap->hca_acc_cnt);
949 		hcap->hca_state = IBCM_HCA_ACTIVE;
950 		return (IBCM_FAILURE);
951 	}
952 
953 	/*
954 	 * First make sure, there are no active users of ibma handles,
955 	 * and then de-register handles.
956 	 */
957 
958 	/* make sure that there are no "Service"s registered w/ this HCA. */
959 	if (hcap->hca_svc_cnt != 0) {
960 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
961 		    "Active services still there %d", hcap->hca_svc_cnt);
962 		hcap->hca_state = IBCM_HCA_ACTIVE;
963 		return (IBCM_FAILURE);
964 	}
965 
966 	if (ibcm_check_sidr_clean(hcap) != IBCM_SUCCESS) {
967 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach:"
968 		    "There are active SIDR operations");
969 		hcap->hca_state = IBCM_HCA_ACTIVE;
970 		return (IBCM_FAILURE);
971 	}
972 
973 	if (ibcm_check_avl_clean(hcap) != IBCM_SUCCESS) {
974 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
975 		    "There are active RC connections");
976 		hcap->hca_state = IBCM_HCA_ACTIVE;
977 		return (IBCM_FAILURE);
978 	}
979 
980 	/*
981 	 * Now, wait until all rc and sidr stateps go away
982 	 * All these stateps must be short lived ones, waiting to be cleaned
983 	 * up after some timeout value, based on the current state.
984 	 */
985 	IBTF_DPRINTF_L5(cmlog, "ibcm_hca_detach:hca_guid = 0x%llX res_cnt = %d",
986 	    hcap->hca_guid, hcap->hca_res_cnt);
987 
988 	/* wait on response CV to 500mS */
989 	absolute_time = ddi_get_lbolt() +
990 	    drv_usectohz(ibcm_wait_for_res_cnt_timeout);
991 
992 	while (hcap->hca_res_cnt > 0)
993 		if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock,
994 		    absolute_time) == -1)
995 			break;
996 
997 	if (hcap->hca_res_cnt != 0) {
998 		/* We got a timeout waiting for hca_res_cnt to become 0 */
999 #ifdef DEBUG
1000 		if (ibcm_test_mode > 0)
1001 			IBTF_DPRINTF_L1(cmlog, "ibcm_hca_detach: Unexpected "
1002 			    "abort due to timeout on res_cnt %d",
1003 			    hcap->hca_res_cnt);
1004 		else
1005 #endif
1006 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
1007 			    " to timeout on res_cnt %d", hcap->hca_res_cnt);
1008 		hcap->hca_state = IBCM_HCA_ACTIVE;
1009 		return (IBCM_FAILURE);
1010 	}
1011 
1012 	/* Re-assert the while loop step above */
1013 	ASSERT(hcap->hca_sidr_list == NULL);
1014 	avl_destroy(&hcap->hca_active_tree);
1015 	avl_destroy(&hcap->hca_passive_tree);
1016 	avl_destroy(&hcap->hca_passive_comid_tree);
1017 
1018 	/*
1019 	 * Unregister all ports from IBMA
1020 	 * If there is a failure, re-initialize any free'd ibma handles. This
1021 	 * is required to receive the incoming mads
1022 	 */
1023 	status = IBCM_SUCCESS;
1024 	for (port_index = 0; port_index < hcap->hca_num_ports; port_index++) {
1025 		if ((status = ibcm_hca_fini_port(hcap, port_index)) !=
1026 		    IBCM_SUCCESS) {
1027 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
1028 			    "Failed to free IBMA Handle for port_num %d",
1029 			    port_index + 1);
1030 			break;
1031 		}
1032 	}
1033 
1034 	/* If detach fails, re-initialize ibma handles for incoming mads */
1035 	if (status != IBCM_SUCCESS)  {
1036 		for (i = 0; i < port_index; i++) {
1037 			if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
1038 				IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
1039 				    "Failed to re-allocate IBMA Handles for"
1040 				    " port_num %d", port_index + 1);
1041 		}
1042 		hcap->hca_state = IBCM_HCA_ACTIVE;
1043 		return (IBCM_FAILURE);
1044 	}
1045 
1046 	ibcm_fini_hca_ids(hcap);
1047 	ibcm_delete_hca_entry(hcap);
1048 
1049 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: DETACH succeeded");
1050 	return (IBCM_SUCCESS);
1051 }
1052 
1053 /* Checks, if there are any active sidr state entries in the specified hca */
1054 static ibcm_status_t
1055 ibcm_check_sidr_clean(ibcm_hca_info_t *hcap)
1056 {
1057 	ibcm_ud_state_data_t	*usp;
1058 	uint32_t		transient_cnt = 0;
1059 
1060 	IBTF_DPRINTF_L5(cmlog, "ibcm_check_sidr_clean:");
1061 
1062 	rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);
1063 	usp = hcap->hca_sidr_list;	/* Point to the list */
1064 	while (usp != NULL) {
1065 		mutex_enter(&usp->ud_state_mutex);
1066 		if ((usp->ud_state != IBCM_STATE_SIDR_REP_SENT) &&
1067 		    (usp->ud_state != IBCM_STATE_TIMED_OUT) &&
1068 		    (usp->ud_state != IBCM_STATE_DELETE)) {
1069 
1070 			IBTF_DPRINTF_L3(cmlog, "ibcm_check_sidr_clean:"
1071 			    "usp = %p not in transient state = %d", usp,
1072 			    usp->ud_state);
1073 
1074 			mutex_exit(&usp->ud_state_mutex);
1075 			rw_exit(&hcap->hca_sidr_list_lock);
1076 			return (IBCM_FAILURE);
1077 		} else {
1078 			mutex_exit(&usp->ud_state_mutex);
1079 			++transient_cnt;
1080 		}
1081 
1082 		usp = usp->ud_nextp;
1083 	}
1084 	rw_exit(&hcap->hca_sidr_list_lock);
1085 
1086 	IBTF_DPRINTF_L4(cmlog, "ibcm_check_sidr_clean: transient_cnt %d",
1087 	    transient_cnt);
1088 
1089 	return (IBCM_SUCCESS);
1090 }
1091 
1092 /* Checks, if there are any active rc state entries, in the specified hca */
1093 static ibcm_status_t
1094 ibcm_check_avl_clean(ibcm_hca_info_t *hcap)
1095 
1096 {
1097 	ibcm_state_data_t	*sp;
1098 	avl_tree_t		*avl_tree;
1099 	uint32_t		transient_cnt = 0;
1100 
1101 	IBTF_DPRINTF_L5(cmlog, "ibcm_check_avl_clean:");
1102 	/*
1103 	 * Both the trees ie., active and passive must reference to all
1104 	 * statep's, so let's use one
1105 	 */
1106 	avl_tree = &hcap->hca_active_tree;
1107 
1108 	rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
1109 
1110 	for (sp = avl_first(avl_tree); sp != NULL;
1111 	    sp = avl_walk(avl_tree, sp, AVL_AFTER)) {
1112 		mutex_enter(&sp->state_mutex);
1113 		if ((sp->state != IBCM_STATE_TIMEWAIT) &&
1114 		    (sp->state != IBCM_STATE_REJ_SENT) &&
1115 		    (sp->state != IBCM_STATE_DELETE)) {
1116 			IBTF_DPRINTF_L3(cmlog, "ibcm_check_avl_clean: "
1117 			    "sp = %p not in transient state = %d", sp,
1118 			    sp->state);
1119 			mutex_exit(&sp->state_mutex);
1120 			rw_exit(&hcap->hca_state_rwlock);
1121 			return (IBCM_FAILURE);
1122 		} else {
1123 			mutex_exit(&sp->state_mutex);
1124 			++transient_cnt;
1125 		}
1126 	}
1127 
1128 	rw_exit(&hcap->hca_state_rwlock);
1129 
1130 	IBTF_DPRINTF_L4(cmlog, "ibcm_check_avl_clean: transient_cnt %d",
1131 	    transient_cnt);
1132 
1133 	return (IBCM_SUCCESS);
1134 }
1135 
1136 /* Adds a new entry into CM's global hca list, if hca_guid is not there yet */
1137 static ibcm_hca_info_t *
1138 ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports)
1139 {
1140 	ibcm_hca_info_t	*hcap;
1141 
1142 	IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: guid = 0x%llX",
1143 	    hcaguid);
1144 
1145 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1146 
1147 	/*
1148 	 * Check if this hca_guid already in the list
1149 	 * If yes, then ignore this and return NULL
1150 	 */
1151 
1152 	hcap = ibcm_hca_listp;
1153 
1154 	/* search for this HCA */
1155 	while (hcap != NULL) {
1156 		if (hcap->hca_guid == hcaguid) {
1157 			/* already exists */
1158 			IBTF_DPRINTF_L2(cmlog, "ibcm_add_hca_entry: "
1159 			    "hcap %p guid 0x%llX, entry already exists !!",
1160 			    hcap, hcap->hca_guid);
1161 			return (NULL);
1162 		}
1163 		hcap = hcap->hca_next;
1164 	}
1165 
1166 	/* Allocate storage for the new HCA entry found */
1167 	hcap = kmem_zalloc(sizeof (ibcm_hca_info_t) +
1168 	    (nports - 1) * sizeof (ibcm_port_info_t), KM_SLEEP);
1169 
1170 	/* initialize RW lock */
1171 	rw_init(&hcap->hca_state_rwlock, NULL, RW_DRIVER, NULL);
1172 	/* initialize SIDR list lock */
1173 	rw_init(&hcap->hca_sidr_list_lock, NULL, RW_DRIVER, NULL);
1174 	/* Insert "hcap" into the global HCA list maintained by CM */
1175 	hcap->hca_next = ibcm_hca_listp;
1176 	ibcm_hca_listp = hcap;
1177 
1178 	IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: done hcap = 0x%p", hcap);
1179 
1180 	return (hcap);
1181 
1182 }
1183 
1184 /* deletes the given ibcm_hca_info_t from CM's global hca list */
1185 void
1186 ibcm_delete_hca_entry(ibcm_hca_info_t *hcap)
1187 {
1188 	ibcm_hca_info_t	*headp, *prevp = NULL;
1189 
1190 	/* ibcm_hca_global_lock is held */
1191 	IBTF_DPRINTF_L5(cmlog, "ibcm_delete_hca_entry: guid = 0x%llX "
1192 	    "hcap = 0x%p", hcap->hca_guid, hcap);
1193 
1194 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1195 
1196 	headp = ibcm_hca_listp;
1197 	while (headp != NULL) {
1198 		if (headp == hcap) {
1199 			IBTF_DPRINTF_L3(cmlog, "ibcm_delete_hca_entry: "
1200 			    "deleting hcap %p hcaguid %llX", hcap,
1201 			    hcap->hca_guid);
1202 			if (prevp) {
1203 				prevp->hca_next = headp->hca_next;
1204 			} else {
1205 				prevp = headp->hca_next;
1206 				ibcm_hca_listp = prevp;
1207 			}
1208 			rw_destroy(&hcap->hca_state_rwlock);
1209 			rw_destroy(&hcap->hca_sidr_list_lock);
1210 			kmem_free(hcap, sizeof (ibcm_hca_info_t) +
1211 			    (hcap->hca_num_ports - 1) *
1212 			    sizeof (ibcm_port_info_t));
1213 			return;
1214 		}
1215 
1216 		prevp = headp;
1217 		headp = headp->hca_next;
1218 	}
1219 }
1220 
1221 /*
1222  * ibcm_find_hca_entry:
1223  *	Given a HCA's GUID find out ibcm_hca_info_t entry for that HCA
1224  *	This entry can be then used to access AVL tree/SIDR list etc.
1225  *	If entry exists and in HCA ATTACH state, then hca's ref cnt is
1226  *	incremented and entry returned. Else NULL returned.
1227  *
1228  *	All functions that use ibcm_find_hca_entry and get a non-NULL
1229  *	return values must call ibcm_dec_hca_acc_cnt to decrement the
1230  *	respective hca ref cnt. There shouldn't be any usage of
1231  *	ibcm_hca_info_t * returned from ibcm_find_hca_entry,
1232  *	after decrementing the hca_acc_cnt
1233  *
1234  * INPUTS:
1235  *	hca_guid	- HCA's guid
1236  *
1237  * RETURN VALUE:
1238  *	hcap		- if a match is found, else NULL
1239  */
1240 ibcm_hca_info_t *
1241 ibcm_find_hca_entry(ib_guid_t hca_guid)
1242 {
1243 	ibcm_hca_info_t *hcap;
1244 
1245 	IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: guid = 0x%llX", hca_guid);
1246 
1247 	mutex_enter(&ibcm_global_hca_lock);
1248 
1249 	hcap = ibcm_hca_listp;
1250 	/* search for this HCA */
1251 	while (hcap != NULL) {
1252 		if (hcap->hca_guid == hca_guid)
1253 			break;
1254 		hcap = hcap->hca_next;
1255 	}
1256 
1257 	/* if no hcap for the hca_guid, return NULL */
1258 	if (hcap == NULL) {
1259 		mutex_exit(&ibcm_global_hca_lock);
1260 		return (NULL);
1261 	}
1262 
1263 	/* return hcap, only if it valid to use */
1264 	if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1265 		++(hcap->hca_acc_cnt);
1266 
1267 		IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: "
1268 		    "found hcap = 0x%p hca_acc_cnt %u", hcap,
1269 		    hcap->hca_acc_cnt);
1270 
1271 		mutex_exit(&ibcm_global_hca_lock);
1272 		return (hcap);
1273 	} else {
1274 		mutex_exit(&ibcm_global_hca_lock);
1275 
1276 		IBTF_DPRINTF_L2(cmlog, "ibcm_find_hca_entry: "
1277 		    "found hcap = 0x%p not in active state", hcap);
1278 		return (NULL);
1279 	}
1280 }
1281 
1282 /*
1283  * Searches for ibcm_hca_info_t entry based on hca_guid, but doesn't increment
1284  * the hca's reference count. This function is used, where the calling context
1285  * is attempting to delete hcap itself and hence acc_cnt cannot be incremented
1286  * OR assumes that valid hcap must be available in ibcm's global hca list.
1287  */
1288 ibcm_hca_info_t *
1289 ibcm_find_hcap_entry(ib_guid_t hca_guid)
1290 {
1291 	ibcm_hca_info_t *hcap;
1292 
1293 	IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: guid = 0x%llX", hca_guid);
1294 
1295 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1296 
1297 	hcap = ibcm_hca_listp;
1298 	/* search for this HCA */
1299 	while (hcap != NULL) {
1300 		if (hcap->hca_guid == hca_guid)
1301 			break;
1302 		hcap = hcap->hca_next;
1303 	}
1304 
1305 	if (hcap == NULL)
1306 		IBTF_DPRINTF_L2(cmlog, "ibcm_find_hcap_entry: No hcap found for"
1307 		    " hca_guid 0x%llX", hca_guid);
1308 	else
1309 		IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: hcap found for"
1310 		    " hca_guid 0x%llX", hca_guid);
1311 
1312 	return (hcap);
1313 }
1314 
1315 /* increment the hca's temporary reference count */
1316 ibcm_status_t
1317 ibcm_inc_hca_acc_cnt(ibcm_hca_info_t *hcap)
1318 {
1319 	mutex_enter(&ibcm_global_hca_lock);
1320 	if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1321 		++(hcap->hca_acc_cnt);
1322 		IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_acc_cnt: "
1323 		    "hcap = 0x%p  acc_cnt = %d ", hcap, hcap->hca_acc_cnt);
1324 		mutex_exit(&ibcm_global_hca_lock);
1325 		return (IBCM_SUCCESS);
1326 	} else {
1327 		IBTF_DPRINTF_L2(cmlog, "ibcm_inc_hca_acc_cnt: "
1328 		    "hcap INACTIVE 0x%p  acc_cnt = %d ", hcap,
1329 		    hcap->hca_acc_cnt);
1330 		mutex_exit(&ibcm_global_hca_lock);
1331 		return (IBCM_FAILURE);
1332 	}
1333 }
1334 
1335 /* decrement the hca's ref count, and wake up any waiting threads */
1336 void
1337 ibcm_dec_hca_acc_cnt(ibcm_hca_info_t *hcap)
1338 {
1339 	mutex_enter(&ibcm_global_hca_lock);
1340 	ASSERT(hcap->hca_acc_cnt > 0);
1341 	--(hcap->hca_acc_cnt);
1342 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_acc_cnt: hcap = 0x%p "
1343 	    "acc_cnt = %d", hcap, hcap->hca_acc_cnt);
1344 	if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1345 	    (hcap->hca_acc_cnt == 0)) {
1346 		IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_acc_cnt: "
1347 		    "cv_broadcast for hcap = 0x%p", hcap);
1348 		cv_broadcast(&ibcm_global_hca_cv);
1349 	}
1350 	mutex_exit(&ibcm_global_hca_lock);
1351 }
1352 
1353 /* increment the hca's resource count */
1354 void
1355 ibcm_inc_hca_res_cnt(ibcm_hca_info_t *hcap)
1356 
1357 {
1358 	mutex_enter(&ibcm_global_hca_lock);
1359 	++(hcap->hca_res_cnt);
1360 	IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_res_cnt: hcap = 0x%p "
1361 	    "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1362 	mutex_exit(&ibcm_global_hca_lock);
1363 }
1364 
1365 /* decrement the hca's resource count, and wake up any waiting threads */
1366 void
1367 ibcm_dec_hca_res_cnt(ibcm_hca_info_t *hcap)
1368 {
1369 	mutex_enter(&ibcm_global_hca_lock);
1370 	ASSERT(hcap->hca_res_cnt > 0);
1371 	--(hcap->hca_res_cnt);
1372 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_res_cnt: hcap = 0x%p "
1373 	    "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1374 	if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1375 	    (hcap->hca_res_cnt == 0)) {
1376 		IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_res_cnt: "
1377 		    "cv_broadcast for hcap = 0x%p", hcap);
1378 		cv_broadcast(&ibcm_global_hca_cv);
1379 	}
1380 	mutex_exit(&ibcm_global_hca_lock);
1381 }
1382 
1383 /* increment the hca's service count */
1384 void
1385 ibcm_inc_hca_svc_cnt(ibcm_hca_info_t *hcap)
1386 
1387 {
1388 	mutex_enter(&ibcm_global_hca_lock);
1389 	++(hcap->hca_svc_cnt);
1390 	IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_svc_cnt: hcap = 0x%p "
1391 	    "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1392 	mutex_exit(&ibcm_global_hca_lock);
1393 }
1394 
1395 /* decrement the hca's service count */
1396 void
1397 ibcm_dec_hca_svc_cnt(ibcm_hca_info_t *hcap)
1398 {
1399 	mutex_enter(&ibcm_global_hca_lock);
1400 	ASSERT(hcap->hca_svc_cnt > 0);
1401 	--(hcap->hca_svc_cnt);
1402 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_svc_cnt: hcap = 0x%p "
1403 	    "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1404 	mutex_exit(&ibcm_global_hca_lock);
1405 }
1406 
1407 /*
1408  * The following code manages three classes of requests that CM makes to
1409  * the fabric.  Those three classes are SA_ACCESS, REQ/REP/RTU, and DREQ/DREP.
1410  * The main issue is that the fabric can become very busy, and the CM
1411  * protocols rely on responses being made based on a predefined timeout
1412  * value.  By managing how many simultaneous sessions are allowed, there
1413  * is observed extremely high reliability of CM protocol succeeding when
1414  * it should.
1415  *
1416  * SA_ACCESS and DREQ/DREP are managed at the thread level, whereby the
1417  * thread blocks until there are less than some number of threads doing
1418  * similar requests.
1419  *
1420  * REQ/REP/RTU requests beyond a given limit are added to a list,
1421  * allowing the thread to return immediately to its caller in the
1422  * case where the "mode" is IBT_NONBLOCKING.  This is the mode used
1423  * by uDAPL and seems to be an important feature/behavior.
1424  */
1425 
1426 static int
1427 ibcm_ok_to_start(struct ibcm_open_s *openp)
1428 {
1429 	return (openp->sends < openp->sends_hiwat &&
1430 	    openp->in_progress < openp->in_progress_max);
1431 }
1432 
1433 void
1434 ibcm_open_done(ibcm_state_data_t *statep)
1435 {
1436 	int run;
1437 
1438 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1439 	if (statep->open_flow == 1) {
1440 		statep->open_flow = 0;
1441 		mutex_enter(&ibcm_open.mutex);
1442 		ibcm_open.in_progress--;
1443 		run = ibcm_ok_to_start(&ibcm_open);
1444 		mutex_exit(&ibcm_open.mutex);
1445 		if (run)
1446 			ibcm_run_tlist_thread();
1447 	}
1448 }
1449 
1450 /* dtrace */
1451 void
1452 ibcm_open_wait(hrtime_t delta)
1453 {
1454 	if (delta > 1000000)
1455 		IBTF_DPRINTF_L2(cmlog, "ibcm_open_wait: flow more %lld", delta);
1456 }
1457 
1458 void
1459 ibcm_open_start(ibcm_state_data_t *statep)
1460 {
1461 	ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_REQ);
1462 
1463 	mutex_enter(&statep->state_mutex);
1464 	ibcm_open_wait(gethrtime() - statep->post_time);
1465 	mutex_exit(&statep->state_mutex);
1466 
1467 	ibcm_post_rc_mad(statep, statep->stored_msg, ibcm_post_req_complete,
1468 	    statep);
1469 
1470 	mutex_enter(&statep->state_mutex);
1471 	IBCM_REF_CNT_DECR(statep);
1472 	mutex_exit(&statep->state_mutex);
1473 }
1474 
1475 void
1476 ibcm_open_enqueue(ibcm_state_data_t *statep)
1477 {
1478 	int run;
1479 
1480 	mutex_enter(&statep->state_mutex);
1481 	statep->post_time = gethrtime();
1482 	mutex_exit(&statep->state_mutex);
1483 	mutex_enter(&ibcm_open.mutex);
1484 	if (ibcm_open.queued == 0 && ibcm_ok_to_start(&ibcm_open)) {
1485 		ibcm_open.in_progress++;
1486 		mutex_exit(&ibcm_open.mutex);
1487 		ibcm_open_start(statep);
1488 	} else {
1489 		ibcm_open.queued++;
1490 		statep->open_link = NULL;
1491 		ibcm_open.tail->open_link = statep;
1492 		ibcm_open.tail = statep;
1493 		run = ibcm_ok_to_start(&ibcm_open);
1494 		mutex_exit(&ibcm_open.mutex);
1495 		if (run)
1496 			ibcm_run_tlist_thread();
1497 	}
1498 }
1499 
1500 ibcm_state_data_t *
1501 ibcm_open_dequeue(void)
1502 {
1503 	ibcm_state_data_t *statep;
1504 
1505 	ASSERT(MUTEX_HELD(&ibcm_open.mutex));
1506 	ibcm_open.queued--;
1507 	ibcm_open.in_progress++;
1508 	statep = ibcm_open.head.open_link;
1509 	ibcm_open.head.open_link = statep->open_link;
1510 	statep->open_link = NULL;
1511 	if (ibcm_open.tail == statep)
1512 		ibcm_open.tail = &ibcm_open.head;
1513 	return (statep);
1514 }
1515 
1516 void
1517 ibcm_check_for_opens(void)
1518 {
1519 	ibcm_state_data_t 	*statep;
1520 
1521 	mutex_enter(&ibcm_open.mutex);
1522 
1523 	while (ibcm_open.queued > 0) {
1524 		if (ibcm_ok_to_start(&ibcm_open)) {
1525 			statep = ibcm_open_dequeue();
1526 			mutex_exit(&ibcm_open.mutex);
1527 
1528 			ibcm_open_start(statep);
1529 
1530 			mutex_enter(&ibcm_open.mutex);
1531 		} else {
1532 			break;
1533 		}
1534 	}
1535 	mutex_exit(&ibcm_open.mutex);
1536 }
1537 
1538 
1539 static void
1540 ibcm_flow_init(ibcm_flow_t *flow, uint_t simul_max)
1541 {
1542 	flow->list			= NULL;
1543 	flow->simul			= 0;
1544 	flow->waiters_per_chunk		= 4;
1545 	flow->simul_max			= simul_max;
1546 	flow->lowat			= simul_max - flow->waiters_per_chunk;
1547 	flow->lowat_default		= flow->lowat;
1548 	/* stats */
1549 	flow->total			= 0;
1550 }
1551 
1552 static void
1553 ibcm_rc_flow_control_init(void)
1554 {
1555 	mutex_init(&ibcm_open.mutex, NULL, MUTEX_DEFAULT, NULL);
1556 	mutex_enter(&ibcm_open.mutex);
1557 	ibcm_flow_init(&ibcm_close_flow, ibcm_close_simul_max);
1558 	ibcm_flow_init(&ibcm_lapr_flow, ibcm_lapr_simul_max);
1559 	ibcm_flow_init(&ibcm_saa_flow, ibcm_saa_simul_max);
1560 
1561 	ibcm_open.queued 		= 0;
1562 	ibcm_open.exit_deferred 	= 0;
1563 	ibcm_open.in_progress 		= 0;
1564 	ibcm_open.in_progress_max 	= 16;
1565 	ibcm_open.sends 		= 0;
1566 	ibcm_open.sends_max 		= 0;
1567 	ibcm_open.sends_lowat 		= 8;
1568 	ibcm_open.sends_hiwat 		= 16;
1569 	ibcm_open.tail 			= &ibcm_open.head;
1570 	ibcm_open.head.open_link 	= NULL;
1571 	mutex_exit(&ibcm_open.mutex);
1572 }
1573 
1574 static void
1575 ibcm_rc_flow_control_fini(void)
1576 {
1577 	mutex_destroy(&ibcm_open.mutex);
1578 }
1579 
1580 static ibcm_flow1_t *
1581 ibcm_flow_find(ibcm_flow_t *flow)
1582 {
1583 	ibcm_flow1_t *flow1;
1584 	ibcm_flow1_t *f;
1585 
1586 	f = flow->list;
1587 	if (f) {	/* most likely code path */
1588 		while (f->link != NULL)
1589 			f = f->link;
1590 		if (f->waiters < flow->waiters_per_chunk)
1591 			return (f);
1592 	}
1593 
1594 	/* There was no flow1 list element ready for another waiter */
1595 	mutex_exit(&ibcm_open.mutex);
1596 	flow1 = kmem_alloc(sizeof (*flow1), KM_SLEEP);
1597 	mutex_enter(&ibcm_open.mutex);
1598 
1599 	f = flow->list;
1600 	if (f) {
1601 		while (f->link != NULL)
1602 			f = f->link;
1603 		if (f->waiters < flow->waiters_per_chunk) {
1604 			kmem_free(flow1, sizeof (*flow1));
1605 			return (f);
1606 		}
1607 		f->link = flow1;
1608 	} else {
1609 		flow->list = flow1;
1610 	}
1611 	cv_init(&flow1->cv, NULL, CV_DRIVER, NULL);
1612 	flow1->waiters = 0;
1613 	flow1->link = NULL;
1614 	return (flow1);
1615 }
1616 
1617 static void
1618 ibcm_flow_enter(ibcm_flow_t *flow)
1619 {
1620 	mutex_enter(&ibcm_open.mutex);
1621 	if (flow->list == NULL && flow->simul < flow->simul_max) {
1622 		flow->simul++;
1623 		flow->total++;
1624 		mutex_exit(&ibcm_open.mutex);
1625 	} else {
1626 		ibcm_flow1_t *flow1;
1627 
1628 		flow1 = ibcm_flow_find(flow);
1629 		flow1->waiters++;
1630 		cv_wait(&flow1->cv, &ibcm_open.mutex);
1631 		if (--flow1->waiters == 0) {
1632 			cv_destroy(&flow1->cv);
1633 			mutex_exit(&ibcm_open.mutex);
1634 			kmem_free(flow1, sizeof (*flow1));
1635 		} else
1636 			mutex_exit(&ibcm_open.mutex);
1637 	}
1638 }
1639 
1640 static void
1641 ibcm_flow_exit(ibcm_flow_t *flow)
1642 {
1643 	mutex_enter(&ibcm_open.mutex);
1644 	if (--flow->simul < flow->lowat) {
1645 		if (flow->lowat < flow->lowat_default)
1646 			flow->lowat++;
1647 		if (flow->list) {
1648 			ibcm_flow1_t *flow1;
1649 
1650 			flow1 = flow->list;
1651 			flow->list = flow1->link;	/* unlink */
1652 			flow1->link = NULL;		/* be clean */
1653 			flow->total += flow1->waiters;
1654 			flow->simul += flow1->waiters;
1655 			cv_broadcast(&flow1->cv);
1656 		}
1657 	}
1658 	mutex_exit(&ibcm_open.mutex);
1659 }
1660 
1661 void
1662 ibcm_flow_inc(void)
1663 {
1664 	mutex_enter(&ibcm_open.mutex);
1665 	if (++ibcm_open.sends > ibcm_open.sends_max) {
1666 		ibcm_open.sends_max = ibcm_open.sends;
1667 		IBTF_DPRINTF_L2(cmlog, "ibcm_flow_inc: sends max = %d",
1668 		    ibcm_open.sends_max);
1669 	}
1670 	mutex_exit(&ibcm_open.mutex);
1671 }
1672 
1673 static void
1674 ibcm_check_send_cmpltn_time(hrtime_t delta, char *event_msg)
1675 {
1676 	if (delta > 4000000LL) {
1677 		IBTF_DPRINTF_L2(cmlog, "ibcm_check_send_cmpltn_time: "
1678 		    "%s: %lldns", event_msg, delta);
1679 	}
1680 }
1681 
1682 void
1683 ibcm_flow_dec(hrtime_t time, char *mad_type)
1684 {
1685 	int flow_exit = 0;
1686 	int run = 0;
1687 
1688 	if (ibcm_dtrace)
1689 		ibcm_check_send_cmpltn_time(gethrtime() - time, mad_type);
1690 	mutex_enter(&ibcm_open.mutex);
1691 	ibcm_open.sends--;
1692 	if (ibcm_open.sends < ibcm_open.sends_lowat) {
1693 		run = ibcm_ok_to_start(&ibcm_open);
1694 		if (ibcm_open.exit_deferred) {
1695 			ibcm_open.exit_deferred--;
1696 			flow_exit = 1;
1697 		}
1698 	}
1699 	mutex_exit(&ibcm_open.mutex);
1700 	if (flow_exit)
1701 		ibcm_flow_exit(&ibcm_close_flow);
1702 	if (run)
1703 		ibcm_run_tlist_thread();
1704 }
1705 
1706 void
1707 ibcm_close_enter(void)
1708 {
1709 	ibcm_flow_enter(&ibcm_close_flow);
1710 }
1711 
1712 void
1713 ibcm_close_exit(void)
1714 {
1715 	int flow_exit;
1716 
1717 	mutex_enter(&ibcm_open.mutex);
1718 	if (ibcm_open.sends < ibcm_open.sends_lowat ||
1719 	    ibcm_open.exit_deferred >= 4)
1720 		flow_exit = 1;
1721 	else {
1722 		flow_exit = 0;
1723 		ibcm_open.exit_deferred++;
1724 	}
1725 	mutex_exit(&ibcm_open.mutex);
1726 	if (flow_exit)
1727 		ibcm_flow_exit(&ibcm_close_flow);
1728 }
1729 
1730 /*
1731  * This function needs to be called twice to finish our flow
1732  * control accounting when closing down a connection.  One
1733  * call has send_done set to 1, while the other has it set to 0.
1734  * Because of retries, this could get called more than once
1735  * with either 0 or 1, but additional calls have no effect.
1736  */
1737 void
1738 ibcm_close_done(ibcm_state_data_t *statep, int send_done)
1739 {
1740 	int flow_exit;
1741 
1742 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1743 	if (statep->close_flow == 1) {
1744 		if (send_done)
1745 			statep->close_flow = 3;
1746 		else
1747 			statep->close_flow = 2;
1748 	} else if ((send_done && statep->close_flow == 2) ||
1749 	    (!send_done && statep->close_flow == 3)) {
1750 		statep->close_flow = 0;
1751 		mutex_enter(&ibcm_open.mutex);
1752 		if (ibcm_open.sends < ibcm_open.sends_lowat ||
1753 		    ibcm_open.exit_deferred >= 4)
1754 			flow_exit = 1;
1755 		else {
1756 			flow_exit = 0;
1757 			ibcm_open.exit_deferred++;
1758 		}
1759 		mutex_exit(&ibcm_open.mutex);
1760 		if (flow_exit)
1761 			ibcm_flow_exit(&ibcm_close_flow);
1762 	}
1763 }
1764 
1765 void
1766 ibcm_lapr_enter(void)
1767 {
1768 	ibcm_flow_enter(&ibcm_lapr_flow);
1769 }
1770 
1771 void
1772 ibcm_lapr_exit(void)
1773 {
1774 	ibcm_flow_exit(&ibcm_lapr_flow);
1775 }
1776 
1777 void
1778 ibcm_sa_access_enter()
1779 {
1780 	ibcm_flow_enter(&ibcm_saa_flow);
1781 }
1782 
1783 void
1784 ibcm_sa_access_exit()
1785 {
1786 	ibcm_flow_exit(&ibcm_saa_flow);
1787 }
1788 
1789 static void
1790 ibcm_sm_notice_handler(ibmf_saa_handle_t saa_handle,
1791     ibmf_saa_subnet_event_t saa_event_code,
1792     ibmf_saa_event_details_t *saa_event_details,
1793     void *callback_arg)
1794 {
1795 	ibcm_port_info_t	*portp = (ibcm_port_info_t *)callback_arg;
1796 	ibt_subnet_event_code_t code;
1797 	ibt_subnet_event_t	event;
1798 	uint8_t			event_status;
1799 
1800 	IBTF_DPRINTF_L3(cmlog, "ibcm_sm_notice_handler: saa_hdl %p, code = %d",
1801 	    saa_handle, saa_event_code);
1802 
1803 	mutex_enter(&ibcm_sm_notice_serialize_lock);
1804 
1805 	switch (saa_event_code) {
1806 	case IBMF_SAA_EVENT_MCG_CREATED:
1807 		code = IBT_SM_EVENT_MCG_CREATED;
1808 		break;
1809 	case IBMF_SAA_EVENT_MCG_DELETED:
1810 		code = IBT_SM_EVENT_MCG_DELETED;
1811 		break;
1812 	case IBMF_SAA_EVENT_GID_AVAILABLE:
1813 		code = IBT_SM_EVENT_GID_AVAIL;
1814 		ibcm_path_cache_purge();
1815 		break;
1816 	case IBMF_SAA_EVENT_GID_UNAVAILABLE:
1817 		code = IBT_SM_EVENT_GID_UNAVAIL;
1818 		ibcm_path_cache_purge();
1819 		break;
1820 	case IBMF_SAA_EVENT_SUBSCRIBER_STATUS_CHG:
1821 		event_status =
1822 		    saa_event_details->ie_producer_event_status_mask &
1823 		    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
1824 		if (event_status == (portp->port_event_status &
1825 		    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)) {
1826 			mutex_exit(&ibcm_sm_notice_serialize_lock);
1827 			return;	/* no change */
1828 		}
1829 		portp->port_event_status = event_status;
1830 		if (event_status == IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)
1831 			code = IBT_SM_EVENT_AVAILABLE;
1832 		else
1833 			code = IBT_SM_EVENT_UNAVAILABLE;
1834 		break;
1835 	default:
1836 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1837 		return;
1838 	}
1839 
1840 	mutex_enter(&ibcm_global_hca_lock);
1841 
1842 	/* don't send the event if we're tearing down */
1843 	if (!IBCM_ACCESS_HCA_OK(portp->port_hcap)) {
1844 		mutex_exit(&ibcm_global_hca_lock);
1845 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1846 		return;
1847 	}
1848 
1849 	++(portp->port_hcap->hca_acc_cnt);
1850 	mutex_exit(&ibcm_global_hca_lock);
1851 
1852 	event.sm_notice_gid = saa_event_details->ie_gid;
1853 	ibtl_cm_sm_notice_handler(portp->port_sgid0, code, &event);
1854 
1855 	mutex_exit(&ibcm_sm_notice_serialize_lock);
1856 
1857 	ibcm_dec_hca_acc_cnt(portp->port_hcap);
1858 }
1859 
1860 void
1861 ibt_register_subnet_notices(ibt_clnt_hdl_t ibt_hdl,
1862     ibt_sm_notice_handler_t sm_notice_handler, void *private)
1863 {
1864 	ibcm_port_info_t	*portp;
1865 	ibcm_hca_info_t		*hcap;
1866 	uint8_t			port;
1867 	int			num_failed_sgids;
1868 	ibtl_cm_sm_init_fail_t	*ifail;
1869 	ib_gid_t		*sgidp;
1870 
1871 	IBTF_DPRINTF_L3(cmlog, "ibt_register_subnet_notices: ibt_hdl = %p",
1872 	    ibt_hdl);
1873 
1874 	mutex_enter(&ibcm_sm_notice_serialize_lock);
1875 
1876 	ibtl_cm_set_sm_notice_handler(ibt_hdl, sm_notice_handler, private);
1877 	if (sm_notice_handler == NULL) {
1878 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1879 		return;
1880 	}
1881 
1882 	/* for each port, if service is not available, make a call */
1883 	mutex_enter(&ibcm_global_hca_lock);
1884 	num_failed_sgids = 0;
1885 	hcap = ibcm_hca_listp;
1886 	while (hcap != NULL) {
1887 		portp = hcap->hca_port_info;
1888 		for (port = 0; port < hcap->hca_num_ports; port++) {
1889 			if (!(portp->port_event_status &
1890 			    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1891 				num_failed_sgids++;
1892 			portp++;
1893 		}
1894 		hcap = hcap->hca_next;
1895 	}
1896 	if (num_failed_sgids != 0) {
1897 		ifail = kmem_alloc(sizeof (*ifail) +
1898 		    (num_failed_sgids - 1) * sizeof (ib_gid_t), KM_SLEEP);
1899 		ifail->smf_num_sgids = num_failed_sgids;
1900 		ifail->smf_ibt_hdl = ibt_hdl;
1901 		sgidp = &ifail->smf_sgid[0];
1902 		hcap = ibcm_hca_listp;
1903 		while (hcap != NULL) {
1904 			portp = hcap->hca_port_info;
1905 			for (port = 0; port < hcap->hca_num_ports; port++) {
1906 				if (!(portp->port_event_status &
1907 				    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1908 					*sgidp++ = portp->port_sgid0;
1909 				portp++;
1910 			}
1911 			hcap = hcap->hca_next;
1912 		}
1913 	}
1914 	mutex_exit(&ibcm_global_hca_lock);
1915 
1916 	if (num_failed_sgids != 0) {
1917 		ibtl_cm_sm_notice_init_failure(ifail);
1918 		kmem_free(ifail, sizeof (*ifail) +
1919 		    (num_failed_sgids - 1) * sizeof (ib_gid_t));
1920 	}
1921 	mutex_exit(&ibcm_sm_notice_serialize_lock);
1922 }
1923 
1924 /* The following is run from a taskq because we've seen the stack overflow. */
1925 static void
1926 ibcm_init_saa(void *arg)
1927 {
1928 	ibcm_port_info_t		*portp = (ibcm_port_info_t *)arg;
1929 	int				status;
1930 	ib_guid_t			port_guid;
1931 	ibmf_saa_subnet_event_args_t	event_args;
1932 
1933 	port_guid = portp->port_sgid0.gid_guid;
1934 
1935 	IBTF_DPRINTF_L3(cmlog, "ibcm_init_saa: port guid %llX", port_guid);
1936 
1937 	event_args.is_event_callback_arg = portp;
1938 	event_args.is_event_callback = ibcm_sm_notice_handler;
1939 
1940 	if ((status = ibmf_sa_session_open(port_guid, 0, &event_args,
1941 	    IBMF_VERSION, 0, &portp->port_ibmf_saa_hdl)) != IBMF_SUCCESS) {
1942 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
1943 		    "ibmf_sa_session_open failed for port guid %llX "
1944 		    "status = %d", port_guid, status);
1945 	} else {
1946 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
1947 		    "registered sa_hdl 0x%p for port guid %llX",
1948 		    portp->port_ibmf_saa_hdl, port_guid);
1949 	}
1950 
1951 	mutex_enter(&ibcm_sa_open_lock);
1952 	portp->port_saa_open_in_progress = 0;
1953 	cv_broadcast(&ibcm_sa_open_cv);
1954 	mutex_exit(&ibcm_sa_open_lock);
1955 }
1956 
1957 void
1958 ibcm_init_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
1959 {
1960 	ibmf_saa_handle_t	saa_handle;
1961 	uint8_t			port_index = port - 1;
1962 	ibcm_port_info_t	*portp = &hcap->hca_port_info[port_index];
1963 	ibt_status_t		ibt_status;
1964 
1965 	if (port_index >= hcap->hca_num_ports)
1966 		return;
1967 
1968 	mutex_enter(&ibcm_sa_open_lock);
1969 	if (portp->port_saa_open_in_progress) {
1970 		mutex_exit(&ibcm_sa_open_lock);
1971 		return;
1972 	}
1973 
1974 	saa_handle = portp->port_ibmf_saa_hdl;
1975 	if (saa_handle != NULL) {
1976 		mutex_exit(&ibcm_sa_open_lock);
1977 		return;
1978 	}
1979 
1980 	portp->port_saa_open_in_progress = 1;
1981 	mutex_exit(&ibcm_sa_open_lock);
1982 
1983 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(portp->port_event_status))
1984 
1985 	/* The assumption is that we're getting event notifications */
1986 	portp->port_event_status = IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
1987 
1988 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(portp->port_event_status))
1989 
1990 	ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
1991 	    portp->port_num, &portp->port_sgid0, NULL);
1992 	if (ibt_status != IBT_SUCCESS) {
1993 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa_handle: "
1994 		    "ibt_get_port_state_byguid failed for guid %llX "
1995 		    "with status %d", portp->port_hcap->hca_guid, ibt_status);
1996 		mutex_enter(&ibcm_sa_open_lock);
1997 		portp->port_saa_open_in_progress = 0;
1998 		cv_broadcast(&ibcm_sa_open_cv);
1999 		mutex_exit(&ibcm_sa_open_lock);
2000 		return;
2001 	}
2002 	/* if the port is UP, try sa_session_open */
2003 	(void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
2004 }
2005 
2006 
2007 ibmf_saa_handle_t
2008 ibcm_get_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
2009 {
2010 	ibmf_saa_handle_t	saa_handle;
2011 	uint8_t			port_index = port - 1;
2012 	ibcm_port_info_t	*portp = &hcap->hca_port_info[port_index];
2013 	ibt_status_t		ibt_status;
2014 
2015 	if (port_index >= hcap->hca_num_ports)
2016 		return (NULL);
2017 
2018 	mutex_enter(&ibcm_sa_open_lock);
2019 	while (portp->port_saa_open_in_progress) {
2020 		cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
2021 	}
2022 
2023 	saa_handle = portp->port_ibmf_saa_hdl;
2024 	if (saa_handle != NULL) {
2025 		mutex_exit(&ibcm_sa_open_lock);
2026 		return (saa_handle);
2027 	}
2028 
2029 	portp->port_saa_open_in_progress = 1;
2030 	mutex_exit(&ibcm_sa_open_lock);
2031 
2032 	ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
2033 	    portp->port_num, &portp->port_sgid0, NULL);
2034 	if (ibt_status != IBT_SUCCESS) {
2035 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_saa_handle: "
2036 		    "ibt_get_port_state_byguid failed for guid %llX "
2037 		    "with status %d", portp->port_hcap->hca_guid, ibt_status);
2038 		mutex_enter(&ibcm_sa_open_lock);
2039 		portp->port_saa_open_in_progress = 0;
2040 		cv_broadcast(&ibcm_sa_open_cv);
2041 		mutex_exit(&ibcm_sa_open_lock);
2042 		return (NULL);
2043 	}
2044 	/* if the port is UP, try sa_session_open */
2045 	(void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
2046 
2047 	mutex_enter(&ibcm_sa_open_lock);
2048 	while (portp->port_saa_open_in_progress) {
2049 		cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
2050 	}
2051 	saa_handle = portp->port_ibmf_saa_hdl;
2052 	mutex_exit(&ibcm_sa_open_lock);
2053 	return (saa_handle);
2054 }
2055 
2056 
2057 /*
2058  * ibcm_hca_init_port():
2059  * 	- Register port with IBMA
2060  *
2061  * Arguments:
2062  *	hcap		- HCA's guid
2063  *	port_index	- port number minus 1
2064  *
2065  * Return values:
2066  *	IBCM_SUCCESS - success
2067  */
2068 ibt_status_t
2069 ibcm_hca_init_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2070 {
2071 	int			status;
2072 	ibmf_register_info_t	*ibmf_reg;
2073 
2074 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_init_port: hcap = 0x%p port_num %d",
2075 	    hcap, port_index + 1);
2076 
2077 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
2078 
2079 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hcap->hca_port_info))
2080 
2081 	if (hcap->hca_port_info[port_index].port_ibmf_hdl == NULL) {
2082 		/* Register with IBMF */
2083 		ibmf_reg = &hcap->hca_port_info[port_index].port_ibmf_reg;
2084 		ibmf_reg->ir_ci_guid = hcap->hca_guid;
2085 		ibmf_reg->ir_port_num = port_index + 1;
2086 		ibmf_reg->ir_client_class = COMM_MGT_MANAGER_AGENT;
2087 
2088 		/*
2089 		 * register with management framework
2090 		 */
2091 		status = ibmf_register(ibmf_reg, IBMF_VERSION,
2092 		    IBMF_REG_FLAG_NO_OFFLOAD, NULL, NULL,
2093 		    &(hcap->hca_port_info[port_index].port_ibmf_hdl),
2094 		    &(hcap->hca_port_info[port_index].port_ibmf_caps));
2095 
2096 		if (status != IBMF_SUCCESS) {
2097 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_init_port: "
2098 			    "ibmf_register failed for port_num %x, "
2099 			    "status = %x", port_index + 1, status);
2100 			return (ibcm_ibmf_analyze_error(status));
2101 		}
2102 
2103 		hcap->hca_port_info[port_index].port_qp1.qp_cm =
2104 		    IBMF_QP_HANDLE_DEFAULT;
2105 		hcap->hca_port_info[port_index].port_qp1.qp_port =
2106 		    &(hcap->hca_port_info[port_index]);
2107 
2108 		/*
2109 		 * Register the read callback with IBMF.
2110 		 * Since we just did an ibmf_register, handle is
2111 		 * valid and ibcm_recv_cb() is valid so we can
2112 		 * safely assert for success of ibmf_setup_recv_cb()
2113 		 *
2114 		 * Depending on the "state" of the HCA,
2115 		 * CM may drop incoming packets
2116 		 */
2117 		status = ibmf_setup_async_cb(
2118 		    hcap->hca_port_info[port_index].port_ibmf_hdl,
2119 		    IBMF_QP_HANDLE_DEFAULT, ibcm_recv_cb,
2120 		    &(hcap->hca_port_info[port_index].port_qp1), 0);
2121 		ASSERT(status == IBMF_SUCCESS);
2122 
2123 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_init_port: "
2124 		    "IBMF hdl[%x] = 0x%p", port_index,
2125 		    hcap->hca_port_info[port_index].port_ibmf_hdl);
2126 
2127 		/* Attempt to get the saa_handle for this port */
2128 		ibcm_init_saa_handle(hcap, port_index + 1);
2129 	}
2130 
2131 	return (IBT_SUCCESS);
2132 }
2133 
2134 /*
2135  * useful, to re attempt to initialize port ibma handles from elsewhere in
2136  * cm code
2137  */
2138 ibt_status_t
2139 ibcm_hca_reinit_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2140 {
2141 	ibt_status_t	status;
2142 
2143 	IBTF_DPRINTF_L5(cmlog, "ibcm_hca_reinit_port: hcap 0x%p port_num %d",
2144 	    hcap, port_index + 1);
2145 
2146 	mutex_enter(&ibcm_global_hca_lock);
2147 	status = ibcm_hca_init_port(hcap, port_index);
2148 	mutex_exit(&ibcm_global_hca_lock);
2149 	return (status);
2150 }
2151 
2152 
2153 /*
2154  * ibcm_hca_fini_port():
2155  * 	- Deregister port with IBMA
2156  *
2157  * Arguments:
2158  *	hcap		- HCA's guid
2159  *	port_index	- port number minus 1
2160  *
2161  * Return values:
2162  *	IBCM_SUCCESS - success
2163  */
2164 static ibcm_status_t
2165 ibcm_hca_fini_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2166 {
2167 	int			ibmf_status;
2168 	ibcm_status_t		ibcm_status;
2169 
2170 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: hcap = 0x%p port_num %d ",
2171 	    hcap, port_index + 1);
2172 
2173 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
2174 
2175 	if (hcap->hca_port_info[port_index].port_ibmf_saa_hdl != NULL) {
2176 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
2177 		    "ibmf_sa_session_close IBMF SAA hdl %p",
2178 		    hcap->hca_port_info[port_index].port_ibmf_saa_hdl);
2179 
2180 		ibmf_status = ibmf_sa_session_close(
2181 		    &hcap->hca_port_info[port_index].port_ibmf_saa_hdl, 0);
2182 		if (ibmf_status != IBMF_SUCCESS) {
2183 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2184 			    "ibmf_sa_session_close of port %d returned %x",
2185 			    port_index + 1, ibmf_status);
2186 			return (IBCM_FAILURE);
2187 		}
2188 	}
2189 
2190 	if (hcap->hca_port_info[port_index].port_ibmf_hdl != NULL) {
2191 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
2192 		    "ibmf_unregister IBMF Hdl %p",
2193 		    hcap->hca_port_info[port_index].port_ibmf_hdl);
2194 
2195 		/* clean-up all the ibmf qp's allocated on this port */
2196 		ibcm_status = ibcm_free_allqps(hcap, port_index + 1);
2197 
2198 		if (ibcm_status != IBCM_SUCCESS) {
2199 
2200 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2201 			    "ibcm_free_allqps failed for port_num %d",
2202 			    port_index + 1);
2203 			return (IBCM_FAILURE);
2204 		}
2205 
2206 		/* Tear down the receive callback */
2207 		ibmf_status = ibmf_tear_down_async_cb(
2208 		    hcap->hca_port_info[port_index].port_ibmf_hdl,
2209 		    IBMF_QP_HANDLE_DEFAULT, 0);
2210 
2211 		if (ibmf_status != IBMF_SUCCESS) {
2212 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2213 			    "ibmf_tear_down_async_cb failed %d port_num %d",
2214 			    ibmf_status, port_index + 1);
2215 			return (IBCM_FAILURE);
2216 		}
2217 
2218 		/* Now, unregister with IBMF */
2219 		ibmf_status = ibmf_unregister(
2220 		    &hcap->hca_port_info[port_index].port_ibmf_hdl, 0);
2221 		IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: "
2222 		    "ibmf_unregister of port_num %x returned %x",
2223 		    port_index + 1, ibmf_status);
2224 
2225 		if (ibmf_status == IBMF_SUCCESS)
2226 			hcap->hca_port_info[port_index].port_ibmf_hdl =
2227 								NULL;
2228 		else {
2229 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2230 			    "ibmf_unregister failed %d port_num %d",
2231 			    ibmf_status, port_index + 1);
2232 			return (IBCM_FAILURE);
2233 		}
2234 	}
2235 	return (IBCM_SUCCESS);
2236 }
2237 
2238 /*
2239  * ibcm_comm_est_handler():
2240  *	Check if the given channel is in ESTABLISHED state or not
2241  *
2242  * Arguments:
2243  *	eventp	- A pointer to an ibt_async_event_t struct
2244  *
2245  * Return values: NONE
2246  */
2247 static void
2248 ibcm_comm_est_handler(ibt_async_event_t *eventp)
2249 {
2250 	ibcm_state_data_t	*statep;
2251 
2252 	IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler:");
2253 
2254 	/* Both QP and EEC handles can't be NULL */
2255 	if (eventp->ev_chan_hdl == NULL) {
2256 		IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2257 		    "both QP and EEC handles are NULL");
2258 		return;
2259 	}
2260 
2261 	/* get the "statep" from qp/eec handles */
2262 	IBCM_GET_CHAN_PRIVATE(eventp->ev_chan_hdl, statep);
2263 	if (statep == NULL) {
2264 		IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: statep is NULL");
2265 		return;
2266 	}
2267 
2268 	mutex_enter(&statep->state_mutex);
2269 
2270 	IBCM_RELEASE_CHAN_PRIVATE(eventp->ev_chan_hdl);
2271 
2272 	IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: statep = %p", statep);
2273 
2274 	IBCM_REF_CNT_INCR(statep);
2275 
2276 	if ((statep->state == IBCM_STATE_REP_SENT) ||
2277 	    (statep->state == IBCM_STATE_MRA_REP_RCVD)) {
2278 		timeout_id_t	timer_val = statep->timerid;
2279 
2280 		statep->state = IBCM_STATE_TRANSIENT_ESTABLISHED;
2281 
2282 		if (timer_val) {
2283 			statep->timerid = 0;
2284 			mutex_exit(&statep->state_mutex);
2285 			(void) untimeout(timer_val);
2286 		} else
2287 			mutex_exit(&statep->state_mutex);
2288 
2289 		/* CM doesn't have RTU message here */
2290 		ibcm_cep_state_rtu(statep, NULL);
2291 
2292 	} else {
2293 		if (statep->state == IBCM_STATE_ESTABLISHED ||
2294 		    statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) {
2295 			IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: "
2296 			    "Channel already in ESTABLISHED state");
2297 		} else {
2298 			/* An unexpected behavior from remote */
2299 			IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2300 			    "Unexpected in state = %d", statep->state);
2301 		}
2302 		mutex_exit(&statep->state_mutex);
2303 
2304 		ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_COMEST);
2305 	}
2306 
2307 	mutex_enter(&statep->state_mutex);
2308 	IBCM_REF_CNT_DECR(statep);
2309 	mutex_exit(&statep->state_mutex);
2310 }
2311 
2312 
2313 /*
2314  * ibcm_async_handler():
2315  *	CM's Async Handler
2316  *	(Handles ATTACH, DETACH, COM_EST events)
2317  *
2318  * Arguments:
2319  *	eventp	- A pointer to an ibt_async_event_t struct
2320  *
2321  * Return values: None
2322  *
2323  * NOTE : CM assumes that all HCA DR events are delivered sequentially
2324  * i.e., until ibcm_async_handler  completes for a given HCA DR, framework
2325  * shall not invoke ibcm_async_handler with another DR event for the same
2326  * HCA
2327  */
2328 /* ARGSUSED */
2329 void
2330 ibcm_async_handler(void *clnt_hdl, ibt_hca_hdl_t hca_hdl,
2331     ibt_async_code_t code, ibt_async_event_t *eventp)
2332 {
2333 	ibcm_hca_info_t		*hcap;
2334 	ibcm_port_up_t		*pup;
2335 
2336 	IBTF_DPRINTF_L3(cmlog, "ibcm_async_handler: "
2337 	    "clnt_hdl = %p, code = 0x%x, eventp = 0x%p",
2338 	    clnt_hdl, code, eventp);
2339 
2340 	mutex_enter(&ibcm_global_hca_lock);
2341 
2342 	/* If fini is going to complete successfully, then return */
2343 	if (ibcm_finit_state != IBCM_FINIT_IDLE) {
2344 
2345 		/*
2346 		 * This finit state implies one of the following:
2347 		 * Init either didn't start or didn't complete OR
2348 		 * Fini is about to return SUCCESS and release the global lock.
2349 		 * In all these cases, it is safe to ignore the async.
2350 		 */
2351 
2352 		IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler: ignoring event %x, "
2353 		    "as either init didn't complete or fini about to succeed",
2354 		    code);
2355 		mutex_exit(&ibcm_global_hca_lock);
2356 		return;
2357 	}
2358 
2359 	switch (code) {
2360 	case IBT_EVENT_PORT_UP:
2361 		mutex_exit(&ibcm_global_hca_lock);
2362 		pup = kmem_alloc(sizeof (ibcm_port_up_t), KM_SLEEP);
2363 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pup))
2364 		pup->pup_hca_guid = eventp->ev_hca_guid;
2365 		pup->pup_port = eventp->ev_port;
2366 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pup))
2367 		(void) taskq_dispatch(ibcm_taskq,
2368 		    ibcm_service_record_rewrite_task, pup, TQ_SLEEP);
2369 		ibcm_path_cache_purge();
2370 		return;
2371 
2372 	case IBT_HCA_ATTACH_EVENT:
2373 
2374 		/* eventp->ev_hcaguid is the HCA GUID of interest */
2375 		ibcm_hca_attach(eventp->ev_hca_guid);
2376 		break;
2377 
2378 	case IBT_HCA_DETACH_EVENT:
2379 
2380 		/* eventp->ev_hca_guid is the HCA GUID of interest */
2381 		if ((hcap = ibcm_find_hcap_entry(eventp->ev_hca_guid)) ==
2382 		    NULL) {
2383 			IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler:"
2384 			    " hca %llX doesn't exist", eventp->ev_hca_guid);
2385 			break;
2386 		}
2387 
2388 		(void) ibcm_hca_detach(hcap);
2389 		break;
2390 
2391 	case IBT_EVENT_COM_EST_QP:
2392 		/* eventp->ev_qp_hdl is the ibt_qp_hdl_t of interest */
2393 	case IBT_EVENT_COM_EST_EEC:
2394 		/* eventp->ev_eec_hdl is the ibt_eec_hdl_t of interest */
2395 		ibcm_comm_est_handler(eventp);
2396 		break;
2397 	default:
2398 		break;
2399 	}
2400 
2401 	/* Unblock, any blocked fini/init operations */
2402 	mutex_exit(&ibcm_global_hca_lock);
2403 }
2404